From 3b5a85196f7d4a318327d3339ec95cd8e4c36b70 Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Mon, 17 Nov 2025 17:30:20 +0100 Subject: [PATCH 1/9] Podcast URL updates --- ...ab-testing-and-product-experimentation.md} | 213 +++++----- ...in-healthcare-and-digital-therapeutics.md} | 201 +++++---- ...-ml-product-design-and-experimentation.md} | 256 ++++++----- ...gorithms-data-structures-for-engineers.md} | 265 ++++++------ ....md => analytics-engineer-skills-tools.md} | 229 +++++----- ...-to-data-science-with-kaggle-portfolio.md} | 204 +++++---- ...ig-data-analytics-and-postdoc-research.md} | 262 ++++++------ ...=> big-data-engineer-vs-data-scientist.md} | 299 +++++++------ ...y-for-data-scientists-and-ml-engineers.md} | 234 +++++------ ...-and-scale-ai-data-products-with-mlops.md} | 194 ++++----- ...ngineering-systems-for-fraud-detection.md} | 278 ++++++------ ...building-ds-team.md => build-data-team.md} | 279 ++++++------ ...plainable-and-actionable-ai-ml-systems.md} | 274 ++++++------ ...a-science-practice-industrial-ai-mlops.md} | 213 +++++----- ...s.md => building-and-scaling-data-team.md} | 277 ++++++------ ...ducts-product-owner-vs-product-manager.md} | 209 +++++---- ...mocratizing-high-performance-computing.md} | 262 ++++++------ ...ommunities-diversity-and-career-growth.md} | 255 ++++++----- ...l-startup.md => building-mlops-startup.md} | 300 +++++++------ ...e-data-product-for-identity-resolution.md} | 310 +++++++------- ...ol.md => building-open-source-nlp-tool.md} | 276 ++++++------ ...-production-ml-platform-and-mlops-team.md} | 285 ++++++------- ...-and-reliable-machine-learning-systems.md} | 229 +++++----- ... causal-inference-for-machine-learning.md} | 273 ++++++------ ...a-officer-data-strategy-and-org-design.md} | 257 ++++++------ ...governance.md => cloud-data-governance.md} | 250 ++++++----- _podcast/{s01e02-processes.md => crisp-dm.md} | 26 +- ...e03-data-centric-ai.md => data-centric.md} | 278 ++++++------ ...usiness-pricing-and-client-acquisition.md} | 242 +++++------ ...ata-engineering-career-path-and-skills.md} | 271 ++++++------ ...g-leadership-and-modern-data-platforms.md} | 249 ++++++----- ...ta-engineering-tools-modern-data-stack.md} | 270 ++++++------ ...data-governance-data-access-management.md} | 280 ++++++------- ...ew-behavioral-and-portfolio-prep-guide.md} | 225 +++++----- ...lism-python-visualization-storytelling.md} | 261 ++++++------ ...-growth-event-tracking-and-reverse-etl.md} | 252 ++++++----- ...chitecture-decentralized-data-products.md} | 242 +++++------ ...vacy-engineering-gdpr-machine-learning.md} | 214 +++++----- ...-professionals-business-skills-in-saas.md} | 233 +++++------ ...ty-data-observability-data-reliability.md} | 233 +++++------ ...analytics-for-nonprofits-tech-for-good.md} | 263 ++++++------ ...d => data-science-career-abc-framework.md} | 282 ++++++------- ...ata-science-failures-and-mlops-lessons.md} | 275 ++++++------ ...public-policy-ethical-ai-social-impact.md} | 237 +++++------ ...=> data-science-interview-and-cv-guide.md} | 275 ++++++------ ...nce-job-red-flags-and-mismatched-roles.md} | 281 ++++++------- ...> data-science-leadership-hiring-mlops.md} | 285 +++++++------ ...-management-and-agile-machine-learning.md} | 206 +++++---- ...science-manager-vs-expert-hiring-guide.md} | 266 ++++++------ ...-science-team-structure-and-org-design.md} | 252 ++++++----- ...die-hacker-bootstrapping-side-projects.md} | 282 ++++++------- ...gy-and-dataops-for-ai-powered-products.md} | 228 +++++----- .../{s01e01-roles.md => data-team-roles.md} | 27 +- ...data-translator-role-and-data-strategy.md} | 245 ++++++----- ...d-gitops-best-practices-for-data-teams.md} | 275 ++++++------ ...automation-and-reliable-data-pipelines.md} | 275 ++++++------ ...principles-and-scalable-data-platforms.md} | 248 ++++++----- ...ksclub-building-scaling-data-community.md} | 233 +++++------ ...-fine-tuning-retrieval-open-source-api.md} | 271 ++++++------ ...veloper-personal-brand-learn-in-public.md} | 271 ++++++------ ... devrel-data-science-open-source-tools.md} | 263 ++++++------ ...=> devrel-open-source-machine-learning.md} | 248 ++++++----- ...g-model-monitoring-and-data-governance.md} | 227 +++++----- ...e-data-engineering-pricing-and-clients.md} | 277 ++++++------ ....md => freelancing-in-machine-learning.md} | 287 ++++++------- ...-engineer-interviews-and-career-growth.md} | 255 ++++++----- ...-engineering-to-leading-data-architect.md} | 224 +++++----- ...tics-engineering-sql-dbt-career-switch.md} | 267 ++++++------ ...> from-math-graduate-to-data-analytics.md} | 285 ++++++------- ...s-to-computer-vision-career-transition.md} | 254 ++++++----- ...-software-engineer-to-machine-learning.md} | 276 ++++++------ ...science-to-data-engineering-leadership.md} | 227 +++++----- ...ineering-to-leading-data-science-teams.md} | 280 ++++++------- ...-engineering-to-freelance-data-science.md} | 287 ++++++------- ...ata-analytics-and-data-engineering-job.md} | 215 +++++----- ...ata-engineering-job-prep-and-interview.md} | 259 ++++++------ ...scientist.md => get-data-scientist-job.md} | 270 ++++++------ ...unior-data-job-and-transferable-skills.md} | 242 +++++------ ...-manage-data-science-teams-in-b2b-saas.md} | 229 +++++----- ...=> hiring-data-scientists-and-analysts.md} | 252 ++++++----- ...ng-for-data-engineering-jobs-in-europe.md} | 272 ++++++------ ...nce-jobs-interview-questions-skills.md.md} | 213 +++++----- ...k.md => how-to-break-into-data-science.md} | 227 +++++----- ...how-to-grow-your-ml-engineering-career.md} | 229 +++++----- ...md => how-to-stand-out-in-data-science.md} | 248 ++++++----- ...o-switch-to-ml-tech-without-experience.md} | 233 +++++------ ...n-into-ml-and-data-engineering-from-qa.md} | 228 +++++----- ...g-face-contributions-and-nlp-portfolio.md} | 253 ++++++----- ...an-centered-mlops-and-model-monitoring.md} | 269 ++++++------ ...small-data-production-machine-learning.md} | 286 ++++++------- ...vesting-in-open-source-developer-tools.md} | 261 ++++++------ ...ata-product-adoption-modern-data-stack.md} | 243 ++++++----- ....md => launch-and-build-retail-startup.md} | 275 ++++++------ ...ne-learning-self-taught-bioinformatics.md} | 264 ++++++------ ...e-learning-data-science-interview-prep.md} | 229 +++++----- ...machine-learning-decision-optimization.md} | 38 +- ...-engineering-production-best-practices.md} | 249 ++++++----- ...or-asteroid-mining-and-water-detection.md} | 226 +++++----- ...ing-attribution-marketing-mix-modeling.md} | 261 ++++++------ ...chine-learning-system-design-interview.md} | 265 ++++++------ ...ney-with-machine-learning-roles-skills.md} | 219 +++++----- ...-engineering-kpis-and-metrics-strategy.md} | 245 ++++++----- ...ct-manager-and-mlops-platform-strategy.md} | 280 ++++++------- ...g-design-broken.md => ml-system-design.md} | 194 ++++----- ...> mlops-community-building-and-meetups.md} | 237 +++++------ ...ure-stores-feature-stores-feast-tecton.md} | 46 +- ....md => mlops-kubeflow-model-monitoring.md} | 304 +++++++------- ...ps-model-monitoring-data-observability.md} | 250 ++++++----- ...lines-orchestration-ingestion-modeling.md} | 210 +++++----- ...et-creation-annotation-tools-workflows.md} | 211 +++++----- ...> nlp-team-hiring-and-production-mlops.md} | 250 ++++++----- ...rce.md => open-source-ml-contributions.md} | 59 ++- ...urned-into-career-and-startup-creation.md} | 230 +++++----- ... personal-brand-for-data-professionals.md} | 37 +- ...to-data-science-lead-career-transition.md} | 273 ++++++------ ...cal-llm-use-cases-and-product-patterns.md} | 214 +++++----- ...md => pragmatic-and-standardized-mlops.md} | 295 +++++++------ ...oduct-designer-to-data-product-manager.md} | 226 +++++----- ...uction-ml-mlops-and-data-team-building.md} | 223 +++++----- ...uction-ml-pipelines-with-aws-and-kafka.md} | 283 ++++++------- ...d => project-manager-to-data-scientist.md} | 252 ++++++----- ...=> public-speaking-for-data-scientists.md} | 249 ++++++----- ...eering-work-and-building-iot-platforms.md} | 285 ++++++------- ...earch-to-production-ml-systems-roadmap.md} | 267 ++++++------ ...ponsible-explainable-ai-bias-detection.md} | 254 ++++++----- ...gineering-teams-self-service-platforms.md} | 232 +++++----- ...nterprise-ai-mlops-data-first-strategy.md} | 212 +++++----- ...tware-engineering-for-machine-learning.md} | 240 +++++------ ...-in-startups.md => solo-data-scientist.md} | 256 ++++++----- ...-for-developers-and-data-professionals.md} | 233 +++++------ ...ching-mentoring-data-analytics-fintech.md} | 266 ++++++------ ...-science-coding-practices-for-academia.md} | 284 ++++++------- ...ing-how-to-find-and-become-a-mentor.md.md} | 34 +- ... technical-writing-for-data-scientists.md} | 35 +- ...01-datatalks-club-anniversary-interview.md | 203 +++++---- ...02-bridging-data-science-and-healthcare.md | 289 ++++++------- ...-collaborative-data-science-in-business.md | 254 ++++++----- ...om-marketing-to-product-owner-in-search.md | 191 ++++----- ...mazon-to-machine-learning-ai-consultant.md | 266 ++++++------ ...n-rules-for-success-in-machine-learning.md | 231 +++++----- ...de-machine-learning-made-understandable.md | 241 +++++------ .../s16e08-ai-for-digital-health.md | 264 ++++++------ .../s16e09-become-data-freelancer.md | 255 ++++++----- ...ey-from-freelancing-to-starting-company.md | 292 ++++++------- ...-and-innovations-in-search-technologies.md | 193 ++++----- ...alysis-with-python-and-machine-learning.md | 258 ++++++------ ...-modeling-and-probabilistic-programming.md | 241 +++++------ ...machine-learning-engineering-in-finance.md | 233 +++++------ ...rating-job-hunt-for-perfect-job-in-tech.md | 255 ++++++----- ...t-through-volunteering-open-source-work.md | 232 +++++----- ...7e08-building-machine-learning-products.md | 249 ++++++----- ...7e09-building-production-search-systems.md | 319 +++++++------- ...8e01-inclusive-data-leadership-coaching.md | 253 ++++++----- ...s-and-llms-across-academia-and-industry.md | 219 +++++----- ...r-ecology-biodiversity-and-conservation.md | 38 +- ...g-in-open-source-probabl-ai-and-sklearn.md | 289 ++++++------- ...munity-building-and-teaching-in-ai-tech.md | 262 ++++++------ ...-building-domestic-risk-assessment-tool.md | 38 +- ...ervability-and-cure-for-data-team-blues.md | 222 +++++----- ...01-using-data-to-create-liveable-cities.md | 282 ++++++------- ...ed-ai-for-disordered-speech-recognition.md | 265 ++++++------ ...9e03-datatalks-club-anniversary-podcast.md | 282 ++++++------- .../{ => to-update}/s19e04-mlops-as-team.md | 241 +++++------ ...05-large-hadron-collider-and-mentorship.md | 259 ++++++------ ...y-trust-return-on-investment-and-future.md | 219 +++++----- ...arning-and-featuring-women-in-ml-and-ai.md | 235 +++++------ ...tions-and-promotions-in-and-out-of-tech.md | 274 ++++++------ .../s19e09-linguistics-and-fairness.md | 310 +++++++------- .../s20e01-trends-in-ai-infrastructure.md | 218 +++++----- ...mpetitive-machine-learning-and-teaching.md | 255 ++++++----- .../s20e03-trends-in-data-engineering.md | 260 ++++++------ ...0e04-mlops-in-corporations-and-startups.md | 263 ++++++------ .../s20e05-data-intensive-ai.md | 233 +++++------ ...ement-to-digital-warehousing-and-finops.md | 231 +++++----- .../s20e07-build-strong-career-in-data.md | 222 +++++----- ...8-from-hackathons-to-developer-advocacy.md | 266 ++++++------ ...ing-your-freelance-career-to-next-level.md | 187 ++++----- ...rithms-to-production-grade-data-systems.md | 217 +++++----- ...ategy-from-pipelines-to-business-impact.md | 266 ++++++------ ...-how-public-learning-turned-into-career.md | 194 ++++----- .../s21e05-from-astronomy-to-applied-ml.md | 277 ++++++------ .../s21e07-lessons-from-two-decades-of-ai.md | 273 ++++++------ ...ne-learning-career-in-data-and-teaching.md | 263 ++++++------ ...-tesla-building-data-products-that-work.md | 209 +++++---- ...ai-products-in-era-of-gen-ai-and-agents.md | 288 ++++++------- ...-from-applied-ai-tesla-waymo-and-beyond.md | 264 ++++++------ ...iotechnology-to-bioinformatics-software.md | 233 +++++------ ...-and-evaluate-ai-systems-in-age-of-llms.md | 244 +++++------ ...achine-learning-concepts-to-explain-ml.md} | 230 +++++----- ...25-08-16-free-machine-learning-courses.md} | 0 .../generate_central_narrative_podcasts.py | 329 +++++++++++++++ scripts/podcasts2.txt | 189 +++++++++ scripts/process_podcast_intros.py | 396 ++++++++++++++++++ 193 files changed, 22718 insertions(+), 23558 deletions(-) rename _podcast/{s07e06-ab-testing.md => ab-testing-and-product-experimentation.md} (97%) rename _podcast/{s08e04-machine-learning-and-personalization-in-healthcare.md => ai-in-healthcare-and-digital-therapeutics.md} (97%) rename _podcast/{s08e03-innovation-and-design-for-machine-learning.md => ai-ml-product-design-and-experimentation.md} (98%) rename _podcast/{s05e01-mastering-algorithms-and-data-structures.md => algorithms-data-structures-for-engineers.md} (96%) rename _podcast/{s03e11-analytics-engineer.md => analytics-engineer-skills-tools.md} (97%) rename _podcast/{s03e02-from-analytics-to-data-science.md => analytics-to-data-science-with-kaggle-portfolio.md} (97%) rename _podcast/{s06e05-post-doctoral-research.md => big-data-analytics-and-postdoc-research.md} (97%) rename _podcast/{s04e03-big-data-engineer-vs-data-scientist.md => big-data-engineer-vs-data-scientist.md} (98%) rename _podcast/{s13e03-biohacking-for-data-scientists-and-ml-engineers.md => biohacking-productivity-for-data-scientists-and-ml-engineers.md} (98%) rename _podcast/{s07e03-product-management-essentials.md => build-and-scale-ai-data-products-with-mlops.md} (97%) rename _podcast/{s15e09-data-engineering-for-fraud-prevention.md => build-and-scale-data-engineering-systems-for-fraud-detection.md} (97%) rename _podcast/{s01e03-building-ds-team.md => build-data-team.md} (97%) rename _podcast/{s14e09-interpretable-ai-and-ml.md => build-explainable-and-actionable-ai-ml-systems.md} (97%) rename _podcast/{s11e05-building-data-science-practice.md => building-and-scaling-data-science-practice-industrial-ai-mlops.md} (97%) rename _podcast/{s05e06-building-and-leading-data-teams.md => building-and-scaling-data-team.md} (97%) rename _podcast/{s11e06-product-owners-in-data-science.md => building-data-products-product-owner-vs-product-manager.md} (97%) rename _podcast/{s10e08-leading-data-research.md => building-data-science-programs-and-democratizing-high-performance-computing.md} (97%) rename _podcast/{s13e01-accelerating-adoption-of-ai-through-diversity.md => building-ml-communities-diversity-and-career-growth.md} (97%) rename _podcast/{s04e04-ml-startup.md => building-mlops-startup.md} (98%) rename _podcast/{s11e04-large-scale-entity-resolution.md => building-open-source-data-product-for-identity-resolution.md} (97%) rename _podcast/{s13e09-building-open-source-nlp-tool.md => building-open-source-nlp-tool.md} (97%) rename _podcast/{s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md => building-production-ml-platform-and-mlops-team.md} (97%) rename _podcast/{s14e01-building-scalable-and-reliable-machine-learning-systems.md => building-scalable-and-reliable-machine-learning-systems.md} (97%) rename _podcast/{s15e06-democratizing-causality.md => causal-inference-for-machine-learning.md} (97%) rename _podcast/{s04e09-chief-data-officer.md => chief-data-officer-data-strategy-and-org-design.md} (97%) rename _podcast/{s03e10-data-governance.md => cloud-data-governance.md} (97%) rename _podcast/{s01e02-processes.md => crisp-dm.md} (97%) rename _podcast/{s12e03-data-centric-ai.md => data-centric.md} (98%) rename _podcast/{s13e04-starting-consultancy-in-data-space.md => data-consulting-business-pricing-and-client-acquisition.md} (97%) rename _podcast/{s08e08-teaching-data-engineers.md => data-engineering-career-path-and-skills.md} (98%) rename _podcast/{s07e07-becoming-a-data-engineering-manager.md => data-engineering-leadership-and-modern-data-platforms.md} (97%) rename _podcast/{s05e02-data-engineering-acronyms.md => data-engineering-tools-modern-data-stack.md} (98%) rename _podcast/{s14e04-data-access-management.md => data-governance-data-access-management.md} (97%) rename _podcast/{s06e02-non-technical-interviews.md => data-interview-behavioral-and-portfolio-prep-guide.md} (99%) rename _podcast/{s11e08-technical-writing-and-data-journalism.md => data-journalism-python-visualization-storytelling.md} (96%) rename _podcast/{s03e08-data-led-professional.md => data-led-growth-event-tracking-and-reverse-etl.md} (97%) rename _podcast/{s10e06-data-mesh-101.md => data-mesh-architecture-decentralized-data-products.md} (97%) rename _podcast/{s14e02-practical-data-privacy.md => data-privacy-engineering-gdpr-machine-learning.md} (97%) rename _podcast/{s12e02-business-skills-for-data-professionals.md => data-professionals-business-skills-in-saas.md} (97%) rename _podcast/{s03e03-data-observability.md => data-quality-data-observability-data-reliability.md} (97%) rename _podcast/{s13e02-analytics-for-better-world.md => data-science-and-analytics-for-nonprofits-tech-for-good.md} (97%) rename _podcast/{s02e07-abc-data-science.md => data-science-career-abc-framework.md} (98%) rename _podcast/{s03e09-what-data-scientists-dont-mention.md => data-science-failures-and-mlops-lessons.md} (97%) rename _podcast/{s10e01-data-science-for-social-impact.md => data-science-for-public-policy-ethical-ai-social-impact.md} (97%) rename _podcast/{s03e04-interviewing-300-data-scientists.md => data-science-interview-and-cv-guide.md} (96%) rename _podcast/{s10e02-decoding-data-science-job-descriptions.md => data-science-job-red-flags-and-mismatched-roles.md} (97%) rename _podcast/{s06e09-data-science-manager.md => data-science-leadership-hiring-mlops.md} (97%) rename _podcast/{s13e06-secret-sauce-of-data-science-management.md => data-science-management-and-agile-machine-learning.md} (97%) rename _podcast/{s06e03-manager-vs-expert.md => data-science-manager-vs-expert-hiring-guide.md} (97%) rename _podcast/{s09e07-designing-data-science-organization.md => data-science-team-structure-and-org-design.md} (97%) rename _podcast/{s12e05-indie-hacking.md => data-scientist-and-indie-hacker-bootstrapping-side-projects.md} (97%) rename _podcast/{s14e03-data-strategy-key-principles-and-best-practices.md => data-strategy-and-dataops-for-ai-powered-products.md} (97%) rename _podcast/{s01e01-roles.md => data-team-roles.md} (98%) rename _podcast/{s03e04-effective-communication-with-business.md => data-translator-role-and-data-strategy.md} (97%) rename _podcast/{s11e03-from-data-science-to-dataops.md => dataops-and-gitops-best-practices-for-data-teams.md} (97%) rename _podcast/{s08e05-storytime-for-dataops.md => dataops-automation-and-reliable-data-pipelines.md} (98%) rename _podcast/{s02e11-dataops.md => dataops-principles-and-scalable-data-platforms.md} (97%) rename _podcast/{s07e01-datatalksclub-behind-the-scenes.md => datatalksclub-building-scaling-data-community.md} (97%) rename _podcast/{s15e03-llms-for-everyone.md => deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md} (97%) rename _podcast/{s03e07-market-yourself.md => developer-personal-brand-learn-in-public.md} (98%) rename _podcast/{s02e02-developer-advocacy.md => devrel-data-science-open-source-tools.md} (97%) rename _podcast/{s14e06-data-developer-relations.md => devrel-open-source-machine-learning.md} (97%) rename _podcast/{s05e09-business-acumen.md => feature-engineering-model-monitoring-and-data-governance.md} (97%) rename _podcast/{s09e04-freelancing-and-consulting-with-data-engineering.md => freelance-data-engineering-pricing-and-clients.md} (97%) rename _podcast/{s04e08-freelancing.md => freelancing-in-machine-learning.md} (97%) rename _podcast/{s12e09-staff-ai-engineer.md => from-academia-to-staff-ai-engineer-interviews-and-career-growth.md} (97%) rename _podcast/{s15e08-from-data-manager-to-data-architect.md => from-iot-data-engineering-to-leading-data-architect.md} (96%) rename _podcast/{s11e07-from-digital-marketing-to-analytics-engineering.md => from-marketing-to-analytics-engineering-sql-dbt-career-switch.md} (96%) rename _podcast/{s07e09-from-math-teacher-to-analytics-engineer.md => from-math-graduate-to-data-analytics.md} (97%) rename _podcast/{s03e06-from-physics-to-machine-learning.md => from-physics-to-computer-vision-career-transition.md} (97%) rename _podcast/{s04e01-from-swe-to-ml.md => from-software-engineer-to-machine-learning.md} (98%) rename _podcast/{s07e08-from-data-science-to-data-engineering.md => from-software-engineering-data-science-to-data-engineering-leadership.md} (99%) rename _podcast/{s12e01-from-software-engineer-to-data-science-manager.md => from-software-engineering-to-leading-data-science-teams.md} (97%) rename _podcast/{s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md => from-startup-engineering-to-freelance-data-science.md} (97%) rename _podcast/{s08e09-from-academia-to-data-analytics-and-engineering.md => get-data-analytics-and-data-engineering-job.md} (98%) rename _podcast/{s09e03-getting-data-engineering-job-(summary-and-q&a).md => get-data-engineering-job-prep-and-interview.md} (96%) rename _podcast/{s01e04-standing-out-as-a-data-scientist.md => get-data-scientist-job.md} (97%) rename _podcast/{s07e04-career-coaching.md => get-junior-data-job-and-transferable-skills.md} (97%) rename _podcast/{s11e02-data-science-career-development.md => hire-and-manage-data-science-teams-in-b2b-saas.md} (97%) rename _podcast/{s07e02-recruiting-data-professionals.md => hiring-data-scientists-and-analysts.md} (98%) rename _podcast/{s08e06-recruiting-data-engineers.md => hiring-for-data-engineering-jobs-in-europe.md} (96%) rename _podcast/{s09e09-hiring-data-science-talent.md => hiring-for-data-science-jobs-interview-questions-skills.md.md} (97%) rename _podcast/{s09e05-data-scientists-at-work.md => how-to-break-into-data-science.md} (98%) rename _podcast/{s12e07-navigating-career-changes-in-machine-learning.md => how-to-grow-your-ml-engineering-career.md} (97%) rename _podcast/{s08e02-hacking-your-data-career.md => how-to-stand-out-in-data-science.md} (98%) rename _podcast/{s08e07-from-roasting-coffee-to-backend-development.md => how-to-switch-to-ml-tech-without-experience.md} (98%) rename _podcast/{s11e01-from-testing-phones-to-managing-nlp-projects.md => how-to-transition-into-ml-and-data-engineering-from-qa.md} (98%) rename _podcast/{s09e06-developer-advocacy-engineer-for-open-source.md => hugging-face-contributions-and-nlp-portfolio.md} (97%) rename _podcast/{s04e06-humans-in-the-loop.md => human-centered-mlops-and-model-monitoring.md} (97%) rename _podcast/{s13e08-navigating-industrial-data-challenges.md => industrial-data-small-data-production-machine-learning.md} (97%) rename _podcast/{s15e02-investing-in-open-source-data-tools.md => investing-in-open-source-developer-tools.md} (97%) rename _podcast/{s05e08-the-last-mile-in-data.md => last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md} (97%) rename _podcast/{s04e07-launching-a-startup.md => launch-and-build-retail-startup.md} (97%) rename _podcast/{s13e07-mastering-self-learning-in-machine-learning.md => learn-machine-learning-self-taught-bioinformatics.md} (97%) rename _podcast/{s12e06-preparing-for-data-science-interview.md => machine-learning-data-science-interview-prep.md} (97%) rename _podcast/{s02e06-decision-optimization.md => machine-learning-decision-optimization.md} (75%) rename _podcast/{s04e05-running-from-complexity.md => machine-learning-engineering-production-best-practices.md} (98%) rename _podcast/{s09e02-using-data-for-asteroid-mining.md => machine-learning-for-asteroid-mining-and-water-detection.md} (98%) rename _podcast/{s09e01-machine-learning-in-marketing.md => machine-learning-in-marketing-attribution-marketing-mix-modeling.md} (97%) rename _podcast/{s07e05-machine-learning-system-design-interview.md => machine-learning-system-design-interview.md} (97%) rename _podcast/{s02e09-roles-skills-monetizing-ml.md => make-money-with-machine-learning-roles-skills.md} (98%) rename _podcast/{s05e03-metrics-and-kpis.md => ml-engineering-kpis-and-metrics-strategy.md} (98%) rename _podcast/{s06e07-product-management-for-machine-learning.md => ml-product-manager-and-mlops-platform-strategy.md} (97%) rename _podcast/{s15e01-why-machine-learning-design-broken.md => ml-system-design.md} (97%) rename _podcast/{s02e12-communities.md => mlops-community-building-and-meetups.md} (97%) rename _podcast/{s02e05-feature-stores.md => mlops-feature-stores-feature-stores-feast-tecton.md} (93%) rename _podcast/{s02e04-mlops.md => mlops-kubeflow-model-monitoring.md} (97%) rename _podcast/{s10e03-mlops-architect.md => mlops-model-monitoring-data-observability.md} (98%) rename _podcast/{s14e07-from-mlops-to-dataops.md => modern-data-pipelines-orchestration-ingestion-modeling.md} (97%) rename _podcast/{s10e07-dataset-creation-and-curation.md => nlp-dataset-creation-annotation-tools-workflows.md} (97%) rename _podcast/{s06e08-nlp-teams.md => nlp-team-hiring-and-production-mlops.md} (97%) rename _podcast/{s02e03-open-source.md => open-source-ml-contributions.md} (92%) rename _podcast/{s09e08-from-open-source-maintainer-to-founder.md => open-source-turned-into-career-and-startup-creation.md} (97%) rename _podcast/{s02e08-personal-branding.md => personal-brand-for-data-professionals.md} (76%) rename _podcast/{s06e06-from-academia-to-industry.md => postdoc-to-data-science-lead-career-transition.md} (98%) rename _podcast/{s15e04-good-bad-and-ugly-of-gpt.md => practical-llm-use-cases-and-product-patterns.md} (97%) rename _podcast/{s15e07-pragmatic-and-standardized-mlops.md => pragmatic-and-standardized-mlops.md} (97%) rename _podcast/{s06e04-becoming-a-data-product-manager.md => product-designer-to-data-product-manager.md} (97%) rename _podcast/{s05e07-ml-vs-analytics.md => production-ml-mlops-and-data-team-building.md} (97%) rename _podcast/{s04e02-build-your-own-data-pipeline.md => production-ml-pipelines-with-aws-and-kafka.md} (97%) rename _podcast/{s03e01-from-pm-to-ds.md => project-manager-to-data-scientist.md} (97%) rename _podcast/{s02e10-public-speaking.md => public-speaking-for-data-scientists.md} (98%) rename _podcast/{s15e05-mastering-data-engineering-as-remote-worker.md => remote-data-engineering-work-and-building-iot-platforms.md} (96%) rename _podcast/{s05e05-researchers-vs-engineers.md => research-to-production-ml-systems-roadmap.md} (98%) rename _podcast/{s10e09-responsible-and-explainable-ai.md => responsible-explainable-ai-bias-detection.md} (97%) rename _podcast/{s10e05-growing-data-engineering-team-in-scale-up.md => scale-data-engineering-teams-self-service-platforms.md} (97%) rename _podcast/{s10e04-lessons-learned-about-data-&-ai-at-enterprises.md => scale-enterprise-ai-mlops-data-first-strategy.md} (97%) rename _podcast/{s13e05-se4ml-software-engineering-for-machine-learning.md => software-engineering-for-machine-learning.md} (97%) rename _podcast/{s05e04-introducing-data-science-in-startups.md => solo-data-scientist.md} (97%) rename _podcast/{s06e01-solopreneur.md => solopreneurship-for-developers-and-data-professionals.md} (98%) rename _podcast/{s11e09-teaching-and-mentoring-in-data-analytics.md => teaching-mentoring-data-analytics-fintech.md} (97%) rename _podcast/{s12e04-doing-software-engineering-in-academia.md => teaching-reproducible-research-and-open-science-coding-practices-for-academia.md} (97%) rename _podcast/{s01e05-mentoring.md => tech-mentoring-how-to-find-and-become-a-mentor.md.md} (93%) rename _podcast/{s02e01-writing.md => technical-writing-for-data-scientists.md} (91%) rename _podcast/{ => to-update}/s16e01-datatalks-club-anniversary-interview.md (97%) rename _podcast/{ => to-update}/s16e02-bridging-data-science-and-healthcare.md (96%) rename _podcast/{ => to-update}/s16e03-collaborative-data-science-in-business.md (97%) rename _podcast/{ => to-update}/s16e04-from-marketing-to-product-owner-in-search.md (95%) rename _podcast/{ => to-update}/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md (96%) rename _podcast/{ => to-update}/s16e06-unwritten-rules-for-success-in-machine-learning.md (96%) rename _podcast/{ => to-update}/s16e07-cracking-code-machine-learning-made-understandable.md (96%) rename _podcast/{ => to-update}/s16e08-ai-for-digital-health.md (95%) rename _podcast/{ => to-update}/s16e09-become-data-freelancer.md (97%) rename _podcast/{ => to-update}/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md (97%) rename _podcast/{ => to-update}/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md (97%) rename _podcast/{ => to-update}/s17e03-stock-market-analysis-with-python-and-machine-learning.md (95%) rename _podcast/{ => to-update}/s17e04-bayesian-modeling-and-probabilistic-programming.md (96%) rename _podcast/{ => to-update}/s17e05-machine-learning-engineering-in-finance.md (96%) rename _podcast/{ => to-update}/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md (97%) rename _podcast/{ => to-update}/s17e07-make-impact-through-volunteering-open-source-work.md (97%) rename _podcast/{ => to-update}/s17e08-building-machine-learning-products.md (96%) rename _podcast/{ => to-update}/s17e09-building-production-search-systems.md (96%) rename _podcast/{ => to-update}/s18e01-inclusive-data-leadership-coaching.md (96%) rename _podcast/{ => to-update}/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md (96%) rename _podcast/{ => to-update}/s18e03-ai-for-ecology-biodiversity-and-conservation.md (68%) rename _podcast/{ => to-update}/s18e04-working-in-open-source-probabl-ai-and-sklearn.md (94%) rename _podcast/{ => to-update}/s18e05-community-building-and-teaching-in-ai-tech.md (92%) rename _podcast/{ => to-update}/s18e07-building-domestic-risk-assessment-tool.md (68%) rename _podcast/{ => to-update}/s18e09-dataops-observability-and-cure-for-data-team-blues.md (91%) rename _podcast/{ => to-update}/s19e01-using-data-to-create-liveable-cities.md (95%) rename _podcast/{ => to-update}/s19e02-human-centered-ai-for-disordered-speech-recognition.md (94%) rename _podcast/{ => to-update}/s19e03-datatalks-club-anniversary-podcast.md (96%) rename _podcast/{ => to-update}/s19e04-mlops-as-team.md (94%) rename _podcast/{ => to-update}/s19e05-large-hadron-collider-and-mentorship.md (95%) rename _podcast/{ => to-update}/s19e06-ai-in-industry-trust-return-on-investment-and-future.md (94%) rename _podcast/{ => to-update}/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md (95%) rename _podcast/{ => to-update}/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md (95%) rename _podcast/{ => to-update}/s19e09-linguistics-and-fairness.md (94%) rename _podcast/{ => to-update}/s20e01-trends-in-ai-infrastructure.md (93%) rename _podcast/{ => to-update}/s20e02-competitive-machine-learning-and-teaching.md (89%) rename _podcast/{ => to-update}/s20e03-trends-in-data-engineering.md (91%) rename _podcast/{ => to-update}/s20e04-mlops-in-corporations-and-startups.md (96%) rename _podcast/{ => to-update}/s20e05-data-intensive-ai.md (95%) rename _podcast/{ => to-update}/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md (96%) rename _podcast/{ => to-update}/s20e07-build-strong-career-in-data.md (96%) rename _podcast/{ => to-update}/s20e08-from-hackathons-to-developer-advocacy.md (96%) rename _podcast/{ => to-update}/s20e09-taking-your-freelance-career-to-next-level.md (93%) rename _podcast/{ => to-update}/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md (94%) rename _podcast/{ => to-update}/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md (96%) rename _podcast/{ => to-update}/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md (96%) rename _podcast/{ => to-update}/s21e05-from-astronomy-to-applied-ml.md (96%) rename _podcast/{ => to-update}/s21e07-lessons-from-two-decades-of-ai.md (94%) rename _podcast/{ => to-update}/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md (96%) rename _podcast/{ => to-update}/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md (96%) rename _podcast/{ => to-update}/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md (92%) rename _podcast/{ => to-update}/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md (95%) rename _podcast/{ => to-update}/s22e03-from-biotechnology-to-bioinformatics-software.md (95%) rename _podcast/{ => to-update}/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md (95%) rename _podcast/{s08e01-visualising-machine-learning.md => visualizing-machine-learning-concepts-to-explain-ml.md} (96%) rename _posts/{2025-08-16-ultimate-list-of-20-free-online-courses-on-machine-learning.md => 2025-08-16-free-machine-learning-courses.md} (100%) create mode 100755 scripts/generate_central_narrative_podcasts.py create mode 100644 scripts/podcasts2.txt create mode 100755 scripts/process_podcast_intros.py diff --git a/_podcast/s07e06-ab-testing.md b/_podcast/ab-testing-and-product-experimentation.md similarity index 97% rename from _podcast/s07e06-ab-testing.md rename to _podcast/ab-testing-and-product-experimentation.md index f1e24b3c..a4b72513 100644 --- a/_podcast/s07e06-ab-testing.md +++ b/_podcast/ab-testing-and-product-experimentation.md @@ -1,41 +1,115 @@ --- +title: 'Product Analytics & A/B Testing: Causality, Metrics, Power Analysis, A/A Tests' +short: A/B Testing +season: 7 episode: 6 guests: - jakobgraff -short: A/B Testing -title: 'Product Analytics & A/B Testing: Causality, Metrics, Power Analysis, A/A Tests' -description: 'Master product analytics, A/B testing & power analysis: design stable - metrics, validate randomization with A/A tests, plan sample size to de-risk features.' -intro: How do you design product experiments that truly establish causality and avoid - costly false conclusions? In this episode, Jakob Graff — Director of Data Science - and Data Analytics at diconium, with prior analytics leadership at Inkitt, Babbel, - King and a background in econometrics — walks through practical product analytics - and A/B testing strategies focused on causality and reliable metrics.

We - cover why randomized experiments mirror clinical trials, how experimentation de-risks - features and builds organizational learning, and a concrete case study on subscription - vs. points revenue metric design. Jakob explains experimentation platform trade-offs - (third-party vs. in-house), traffic splitters, assignment tracking, and why A/A - tests validate system trust. You’ll hear best practices for first tests (two-group - simplicity), metric selection considering noise and seasonality, and how to plan - duration with power analysis and sample-size calculations. The discussion also compares - z/t/nonparametric tests, p-value intuition from A/A comparisons, frequentist vs - Bayesian perspectives, and multi-armed test considerations.

Listen to learn - practical steps for designing randomized experiments, selecting stable metrics, - planning sample sizes, and interpreting results so your product analytics and A/B - testing produce actionable, causal insights. -topics: -- data science -- practices +image: images/podcast/s07e06-ab-testing.jpg ids: anchor: AB-Testing---Jakob-Graff-e1eq73v youtube: 0Gqx1LtqRZU -image: images/podcast/s07e06-ab-testing.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/AB-Testing---Jakob-Graff-e1eq73v apple: https://podcasts.apple.com/us/podcast/a-b-testing-jakob-graff/id1541710331?i=1000552243668 spotify: https://open.spotify.com/episode/3LhBOO1UANCGbOwkntZt4j youtube: https://www.youtube.com/watch?v=0Gqx1LtqRZU -season: 7 + +description: 'Master product analytics, A/B testing & power analysis: design stable metrics, validate randomization with A/A tests, plan sample size to de-risk features.' +intro: How do you design product experiments that truly establish causality and avoid costly false conclusions? In this episode, Jakob Graff — Director of Data Science and Data Analytics at diconium, with prior analytics leadership at Inkitt, Babbel, King and a background in econometrics — walks through practical product analytics and A/B testing strategies focused on causality and reliable metrics.

We cover why randomized experiments mirror clinical trials, how experimentation de-risks features and builds organizational learning, and a concrete case study on subscription vs. points revenue metric design. Jakob explains experimentation platform trade-offs (third-party vs. in-house), traffic splitters, assignment tracking, and why A/A tests validate system trust. You’ll hear best practices for first tests (two-group simplicity), metric selection considering noise and seasonality, and how to plan duration with power analysis and sample-size calculations. The discussion also compares z/t/nonparametric tests, p-value intuition from A/A comparisons, frequentist vs Bayesian perspectives, and multi-armed test considerations.

Listen to learn practical steps for designing randomized experiments, selecting stable metrics, planning sample sizes, and interpreting results so your product analytics and A/B testing produce actionable, causal insights +topics: +- data science +- practices +dateadded: 2022-02-27 + +duration: PT01H03M37S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=0 + endOffset: 63 +- name: Guest Background & Career Transition to Data Science + startOffset: 63 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=63 + endOffset: 311 +- name: 'Econometrics to Product Analytics: Causality Emphasis' + startOffset: 311 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=311 + endOffset: 493 +- name: 'A/B Testing Explained: Clinical Trials Analogy & Randomization' + startOffset: 493 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=493 + endOffset: 708 +- name: 'Experimentation Purpose: Establishing Causality & Controlling Noise' + startOffset: 708 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=708 + endOffset: 867 +- name: 'Case Study: Subscription vs Points — Revenue Metric Design' + startOffset: 867 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=867 + endOffset: 1086 +- name: De-risking Features & Building Organizational Learning with Experiments + startOffset: 1086 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1086 + endOffset: 1434 +- name: 'Experimentation Platform Choices: Third-Party vs In-House' + startOffset: 1434 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1434 + endOffset: 1484 +- name: Traffic Splitter Implementation, Assignment Tracking & Monitoring + startOffset: 1484 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1484 + endOffset: 1672 +- name: 'A/A Testing: Validating Randomization and System Trust' + startOffset: 1672 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1672 + endOffset: 1805 +- name: 'First Test Best Practices: Two-Group Design & Simplicity' + startOffset: 1805 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1805 + endOffset: 2003 +- name: 'Metric Selection: Noise, Stability, Seasonality & Business Cycles' + startOffset: 2003 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2003 + endOffset: 2264 +- name: 'Test Duration & Power Analysis: Sample Size Planning' + startOffset: 2264 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2264 + endOffset: 2423 +- name: 'Statistical Tests Overview: Z-test, T-test, and Nonparametric Options' + startOffset: 2423 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2423 + endOffset: 2679 +- name: 'Data Distribution Checks: Histograms, Tails, and Visualization' + startOffset: 2679 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2679 + endOffset: 2864 +- name: 'P-value Intuition: Explaining Significance via A/A Comparison' + startOffset: 2864 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2864 + endOffset: 3115 +- name: 'Frequentist vs Bayesian Testing: Credible Intervals, Priors & Costs' + startOffset: 3115 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3115 + endOffset: 3548 +- name: 'Multi-armed Tests (A/B/C/D): Duration, Power, and Multiple Comparisons' + startOffset: 3548 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3548 + endOffset: 3772 +- name: Practical Experimentation Tips & Analogies (Pizza Dough) + startOffset: 3772 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3772 + endOffset: 3839 +- name: Hiring, Resources & Contact Information + startOffset: 3839 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3839 + endOffset: 3880 +- name: Episode Wrap-up and Key Takeaways + startOffset: 3880 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3880 + endOffset: 3817 + transcript: - header: Podcast Introduction - header: Guest Background & Career Transition to Data Science @@ -1009,91 +1083,4 @@ transcript: sec: 3880 time: '1:04:40' who: Alexey -dateadded: '2022-02-27' -duration: PT01H03M37S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=0 - endOffset: 63 -- name: Guest Background & Career Transition to Data Science - startOffset: 63 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=63 - endOffset: 311 -- name: 'Econometrics to Product Analytics: Causality Emphasis' - startOffset: 311 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=311 - endOffset: 493 -- name: 'A/B Testing Explained: Clinical Trials Analogy & Randomization' - startOffset: 493 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=493 - endOffset: 708 -- name: 'Experimentation Purpose: Establishing Causality & Controlling Noise' - startOffset: 708 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=708 - endOffset: 867 -- name: 'Case Study: Subscription vs Points — Revenue Metric Design' - startOffset: 867 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=867 - endOffset: 1086 -- name: De-risking Features & Building Organizational Learning with Experiments - startOffset: 1086 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1086 - endOffset: 1434 -- name: 'Experimentation Platform Choices: Third-Party vs In-House' - startOffset: 1434 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1434 - endOffset: 1484 -- name: Traffic Splitter Implementation, Assignment Tracking & Monitoring - startOffset: 1484 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1484 - endOffset: 1672 -- name: 'A/A Testing: Validating Randomization and System Trust' - startOffset: 1672 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1672 - endOffset: 1805 -- name: 'First Test Best Practices: Two-Group Design & Simplicity' - startOffset: 1805 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1805 - endOffset: 2003 -- name: 'Metric Selection: Noise, Stability, Seasonality & Business Cycles' - startOffset: 2003 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2003 - endOffset: 2264 -- name: 'Test Duration & Power Analysis: Sample Size Planning' - startOffset: 2264 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2264 - endOffset: 2423 -- name: 'Statistical Tests Overview: Z-test, T-test, and Nonparametric Options' - startOffset: 2423 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2423 - endOffset: 2679 -- name: 'Data Distribution Checks: Histograms, Tails, and Visualization' - startOffset: 2679 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2679 - endOffset: 2864 -- name: 'P-value Intuition: Explaining Significance via A/A Comparison' - startOffset: 2864 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2864 - endOffset: 3115 -- name: 'Frequentist vs Bayesian Testing: Credible Intervals, Priors & Costs' - startOffset: 3115 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3115 - endOffset: 3548 -- name: 'Multi-armed Tests (A/B/C/D): Duration, Power, and Multiple Comparisons' - startOffset: 3548 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3548 - endOffset: 3772 -- name: Practical Experimentation Tips & Analogies (Pizza Dough) - startOffset: 3772 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3772 - endOffset: 3839 -- name: Hiring, Resources & Contact Information - startOffset: 3839 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3839 - endOffset: 3880 -- name: Episode Wrap-up and Key Takeaways - startOffset: 3880 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3880 - endOffset: 3817 --- diff --git a/_podcast/s08e04-machine-learning-and-personalization-in-healthcare.md b/_podcast/ai-in-healthcare-and-digital-therapeutics.md similarity index 97% rename from _podcast/s08e04-machine-learning-and-personalization-in-healthcare.md rename to _podcast/ai-in-healthcare-and-digital-therapeutics.md index f0697a00..e9a8ff79 100644 --- a/_podcast/s08e04-machine-learning-and-personalization-in-healthcare.md +++ b/_podcast/ai-in-healthcare-and-digital-therapeutics.md @@ -1,41 +1,112 @@ --- +title: 'AI in Healthcare & Digital Therapeutics: Building Data Teams, Personalization, A/B Testing & Ethics' +short: Machine Learning and Personalization in Healthcare +season: 8 episode: 4 guests: - stefangudmundsson -intro: How can AI power effective digital therapeutics while balancing personalization, - rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — - Director of Data, Analytics, and AI with a track record building ML and data teams - at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches - for AI in healthcare and digital therapeutics.

We cover how machine learning - is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s - gamified digital therapeutics and quality‑of‑life goals; behavioral design that - minimizes in‑app time; and engagement strategies like charity incentives versus - leaderboards. Stefan explains building the analytics foundation—data pipelines, - dashboards, and experimentation capabilities—and why A/B testing and agenda‑driven - recommender systems are core to personalization. He also tackles data privacy and - ethics (GDPR/HIPAA, de‑identification), remote monitoring with wearables, clinical - trials versus app experiments, managing medical risk, and hiring and scaling data, - ML, and engineering teams.

Listen to get concrete frameworks for building - data teams, running safe, measurable experiments, designing personalized interventions, - and embedding ethical safeguards into AI-driven digital therapeutics. +image: images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg ids: anchor: Machine-Learning-and-Personalization-in-Healthcare---Stefan-Gudmundsson-e1h5gdg youtube: IDzhmmKeNG4 -image: images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Machine-Learning-and-Personalization-in-Healthcare---Stefan-Gudmundsson-e1h5gdg apple: https://podcasts.apple.com/us/podcast/machine-learning-and-personalization-in-healthcare/id1541710331?i=1000557726819 spotify: https://open.spotify.com/episode/3s78PtlbUmecuMOXwO8aD5?si=991e1811a5204305 youtube: https://www.youtube.com/watch?v=IDzhmmKeNG4 -season: 8 -short: Machine Learning and Personalization in Healthcare -title: 'AI in Healthcare & Digital Therapeutics: Building Data Teams, Personalization, - A/B Testing & Ethics' -description: 'Learn to build data teams and ethical AI in healthcare: actionable personalization, - A/B testing for digital therapeutics, GDPR-safe experiments.' + +description: 'Learn to build data teams and ethical AI in healthcare: actionable personalization, A/B testing for digital therapeutics, GDPR-safe experiments.' +intro: How can AI power effective digital therapeutics while balancing personalization, rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — Director of Data, Analytics, and AI with a track record building ML and data teams at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches for AI in healthcare and digital therapeutics.

We cover how machine learning is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s gamified digital therapeutics and quality‑of‑life goals; behavioral design that minimizes in‑app time; and engagement strategies like charity incentives versus leaderboards. Stefan explains building the analytics foundation—data pipelines, dashboards, and experimentation capabilities—and why A/B testing and agenda‑driven recommender systems are core to personalization. He also tackles data privacy and ethics (GDPR/HIPAA, de‑identification), remote monitoring with wearables, clinical trials versus app experiments, managing medical risk, and hiring and scaling data, ML, and engineering teams.

Listen to get concrete frameworks for building data teams, running safe, measurable experiments, designing personalized interventions, and embedding ethical safeguards into AI-driven digital therapeutics topics: - machine learning - healthcare +dateadded: 2022-04-16 + +duration: PT00H57M48S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=0 + endOffset: 38 +- name: 'Career Snapshot: Developer to AI & Data Leader' + startOffset: 38 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=38 + endOffset: 128 +- name: Building AI Teams at King and H&M + startOffset: 128 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=128 + endOffset: 367 +- name: 'Machine Learning in Healthcare: Diagnosis, Drug Discovery & AlphaFold' + startOffset: 367 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=367 + endOffset: 602 +- name: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality‑of‑Life + Goals' + startOffset: 602 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=602 + endOffset: 904 +- name: 'Behavioral Design & Habit Formation: Low In‑App Time Strategy' + startOffset: 904 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=904 + endOffset: 1167 +- name: 'Building Data Culture: Metrics, Buy‑in, and Responsible Experimentation' + startOffset: 1167 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1167 + endOffset: 1543 +- name: 'Engagement & Rewards: Charity Incentives vs. Leaderboards' + startOffset: 1543 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1543 + endOffset: 1622 +- name: 'Analytics Foundation: Data Pipelines, Dashboards & Experimentation Capabilities' + startOffset: 1622 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1622 + endOffset: 1773 +- name: 'Remote Monitoring & Wearables: Activity and Heart‑Rate Variability' + startOffset: 1773 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1773 + endOffset: 1901 +- name: 'Data Privacy & Ethics: GDPR/HIPAA, De‑identification, and Empathy' + startOffset: 1901 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1901 + endOffset: 2139 +- name: 'Personalization Strategy: Agenda‑Driven Recommender Systems' + startOffset: 2139 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2139 + endOffset: 2397 +- name: 'A/B Testing as Personalization Foundation: Segmentation & Iteration' + startOffset: 2397 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2397 + endOffset: 2580 +- name: 'Experimentation Platform: Variant Availability and Measurement' + startOffset: 2580 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2580 + endOffset: 2729 +- name: 'Clinical Trials vs. App Experiments: Scale, Cost, and Bias' + startOffset: 2729 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2729 + endOffset: 2965 +- name: 'Data‑Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' + startOffset: 2965 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2965 + endOffset: 3115 +- name: 'Managing Medical Risk: Safeguards for Safe Experimentation' + startOffset: 3115 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3115 + endOffset: 3201 +- name: 'Hiring & Scaling: Growing the Data, ML and Engineering Team' + startOffset: 3201 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3201 + endOffset: 3353 +- name: 'AI for Mental Health: Monitoring Signals and Supportive Interventions' + startOffset: 3353 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3353 + endOffset: 3449 +- name: 'Resources & Contact: LinkedIn and Open Roles at Sidekick Health' + startOffset: 3449 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3449 + endOffset: 3468 + transcript: - header: Podcast Introduction - line: Hello, everyone. This week we'll talk about machine learning in healthcare, @@ -1161,90 +1232,6 @@ transcript: sec: 3469 time: '57:49' who: Stefan -dateadded: '2022-04-16' -duration: PT00H57M48S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=0 - endOffset: 38 -- name: 'Career Snapshot: Developer to AI & Data Leader' - startOffset: 38 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=38 - endOffset: 128 -- name: Building AI Teams at King and H&M - startOffset: 128 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=128 - endOffset: 367 -- name: 'Machine Learning in Healthcare: Diagnosis, Drug Discovery & AlphaFold' - startOffset: 367 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=367 - endOffset: 602 -- name: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality‑of‑Life - Goals' - startOffset: 602 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=602 - endOffset: 904 -- name: 'Behavioral Design & Habit Formation: Low In‑App Time Strategy' - startOffset: 904 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=904 - endOffset: 1167 -- name: 'Building Data Culture: Metrics, Buy‑in, and Responsible Experimentation' - startOffset: 1167 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1167 - endOffset: 1543 -- name: 'Engagement & Rewards: Charity Incentives vs. Leaderboards' - startOffset: 1543 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1543 - endOffset: 1622 -- name: 'Analytics Foundation: Data Pipelines, Dashboards & Experimentation Capabilities' - startOffset: 1622 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1622 - endOffset: 1773 -- name: 'Remote Monitoring & Wearables: Activity and Heart‑Rate Variability' - startOffset: 1773 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1773 - endOffset: 1901 -- name: 'Data Privacy & Ethics: GDPR/HIPAA, De‑identification, and Empathy' - startOffset: 1901 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1901 - endOffset: 2139 -- name: 'Personalization Strategy: Agenda‑Driven Recommender Systems' - startOffset: 2139 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2139 - endOffset: 2397 -- name: 'A/B Testing as Personalization Foundation: Segmentation & Iteration' - startOffset: 2397 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2397 - endOffset: 2580 -- name: 'Experimentation Platform: Variant Availability and Measurement' - startOffset: 2580 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2580 - endOffset: 2729 -- name: 'Clinical Trials vs. App Experiments: Scale, Cost, and Bias' - startOffset: 2729 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2729 - endOffset: 2965 -- name: 'Data‑Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' - startOffset: 2965 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2965 - endOffset: 3115 -- name: 'Managing Medical Risk: Safeguards for Safe Experimentation' - startOffset: 3115 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3115 - endOffset: 3201 -- name: 'Hiring & Scaling: Growing the Data, ML and Engineering Team' - startOffset: 3201 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3201 - endOffset: 3353 -- name: 'AI for Mental Health: Monitoring Signals and Supportive Interventions' - startOffset: 3353 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3353 - endOffset: 3449 -- name: 'Resources & Contact: LinkedIn and Open Roles at Sidekick Health' - startOffset: 3449 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3449 - endOffset: 3468 --- Links: diff --git a/_podcast/s08e03-innovation-and-design-for-machine-learning.md b/_podcast/ai-ml-product-design-and-experimentation.md similarity index 98% rename from _podcast/s08e03-innovation-and-design-for-machine-learning.md rename to _podcast/ai-ml-product-design-and-experimentation.md index d1ba5a75..e5145a56 100644 --- a/_podcast/s08e03-innovation-and-design-for-machine-learning.md +++ b/_podcast/ai-ml-product-design-and-experimentation.md @@ -1,41 +1,142 @@ --- +title: 'AI Product Design: Algorithm-Ready UX, Rapid Experiments & Data-Driven Roadmaps' +short: Innovation and Design for Machine Learning +season: 8 episode: 3 guests: - liesbethdingemans -intro: How do you design products that are “algorithm-ready” while running rapid experiments - and building data-driven roadmaps? In this episode, Liesbeth Dingemans—strategy and - AI leader, founder of Dingemans Consulting, former VP of Revenue at Source.ag and - Head of AI Strategy at Prosus—walks through pragmatic approaches to AI product design - that bridge vision and execution.

We cover algorithm-friendly UX and signal - collection, a concrete interaction-design case study comparing TikTok and Instagram - signals, and the Double Diamond framework for moving from problem framing to solution - exploration. Liesbeth explains scoping and prioritization, parallel experiments - and proofs of concept, one-week design sprints, appropriate timeframes for research-to-scale, - and the role of designers, data scientists, engineers and product managers in shaping - AI roadmaps.

Listeners will learn how to avoid rework by involving data - science early, use scoping documents to challenge assumptions, create measurable - experiments (the Task Force/“Jet Ski” model), and build data-driven pitches for - long-term bets versus quarterly OKRs. Tune in for concrete frameworks and practices - to make AI product design, rapid experiments, and data-driven roadmaps work in your - organization. +image: images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg ids: anchor: Innovation-and-Design-for-Machine-Learning---Liesbeth-Dingemans-e1gq0en youtube: tcqBfZw41FM -image: images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Innovation-and-Design-for-Machine-Learning---Liesbeth-Dingemans-e1gq0en apple: https://podcasts.apple.com/us/podcast/innovation-and-design-for-machine-learning-liesbeth/id1541710331?i=1000556693861 spotify: https://open.spotify.com/episode/4vhTQJ6Aj9z5VHm9UsHspv youtube: https://www.youtube.com/watch?v=tcqBfZw41FM -season: 8 -short: Innovation and Design for Machine Learning -title: 'AI Product Design: Algorithm-Ready UX, Rapid Experiments & Data-Driven Roadmaps' -description: 'Master AI product design: build algorithm-ready UX, run rapid experiments - and craft data-driven roadmaps to prioritize innovation and ship measurable results.' + +description: 'Master AI product design: build algorithm-ready UX, run rapid experiments and craft data-driven roadmaps to prioritize innovation and ship measurable results.' +intro: How do you design products that are “algorithm-ready” while running rapid experiments and building data-driven roadmaps? In this episode, Liesbeth Dingemans—strategy and AI leader, founder of Dingemans Consulting, former VP of Revenue at Source.ag and Head of AI Strategy at Prosus—walks through pragmatic approaches to AI product design that bridge vision and execution.

We cover algorithm-friendly UX and signal collection, a concrete interaction-design case study comparing TikTok and Instagram signals, and the Double Diamond framework for moving from problem framing to solution exploration. Liesbeth explains scoping and prioritization, parallel experiments and proofs of concept, one-week design sprints, appropriate timeframes for research-to-scale, and the role of designers, data scientists, engineers and product managers in shaping AI roadmaps.

Listeners will learn how to avoid rework by involving data science early, use scoping documents to challenge assumptions, create measurable experiments (the Task Force/“Jet Ski” model), and build data-driven pitches for long-term bets versus quarterly OKRs. Tune in for concrete frameworks and practices to make AI product design, rapid experiments, and data-driven roadmaps work in your organization topics: - machine learning - design thinking +- strategy +- ai - practices +dateadded: 2022-04-10 + +duration: PT00H59M14S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=0 + endOffset: 78 +- name: 'Guest Background: Strategy, Product and AI Trajectory' + startOffset: 78 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=78 + endOffset: 221 +- name: 'Interdisciplinary Perspective: Physics Meets Humanities' + startOffset: 221 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=221 + endOffset: 307 +- name: Design as a User-Centered Product Process + startOffset: 307 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=307 + endOffset: 403 +- name: Algorithm-Friendly Product Design & Signal Collection + startOffset: 403 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=403 + endOffset: 604 +- name: 'Interaction Design Case Study: TikTok vs Instagram Signals' + startOffset: 604 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=604 + endOffset: 732 +- name: 'Double Diamond Framework: Problem Framing to Solutions' + startOffset: 732 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=732 + endOffset: 872 +- name: 'Problem Discovery: Scoping and Prioritizing User Problems' + startOffset: 872 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=872 + endOffset: 962 +- name: 'Solution Exploration: Parallel Experiments & Proofs of Concept' + startOffset: 962 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=962 + endOffset: 1101 +- name: Timeframes for Research, Prototyping and Scaling + startOffset: 1101 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1101 + endOffset: 1217 +- name: Design Thinking Overview & Google PAIR Resources + startOffset: 1217 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1217 + endOffset: 1396 +- name: 'Design Sprint Structure: One-Week Prototyping Approach' + startOffset: 1396 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1396 + endOffset: 1500 +- name: 'Cross-Functional Participation: Designers, Data Scientists, PMs' + startOffset: 1500 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1500 + endOffset: 1633 +- name: 'Engineering Involvement: Building Algorithm-Ready Interfaces' + startOffset: 1633 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1633 + endOffset: 1698 +- name: 'Data Scientists in Problem Definition: Avoiding Rework' + startOffset: 1698 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1698 + endOffset: 1864 +- name: 'Scoping Documents: Challenging Assumptions with "Why"' + startOffset: 1864 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1864 + endOffset: 2005 +- name: Organizational Miscommunication & Backtracking Problems + startOffset: 2005 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2005 + endOffset: 2235 +- name: Product Managers’ Role in AI Roadmaps and Prioritization + startOffset: 2235 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2235 + endOffset: 2373 +- name: 'Innovation vs Quarterly OKRs: Making Space for Long-Term Bets' + startOffset: 2373 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2373 + endOffset: 2599 +- name: 'Radical Innovation Example: Second-Hand Car Trust Solutions' + startOffset: 2599 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2599 + endOffset: 2790 +- name: 'Building Evidence: Data-Driven Pitches for Big Ideas' + startOffset: 2790 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2790 + endOffset: 2956 +- name: 'Task Force Model (Jet Ski): Rapid Experimentation Teams' + startOffset: 2956 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2956 + endOffset: 3165 +- name: 'Innovation Workflow: From Discovery to Investment Case' + startOffset: 3165 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3165 + endOffset: 3251 +- name: 'Experimentation Culture: Prioritization Through Measurability' + startOffset: 3251 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3251 + endOffset: 3396 +- name: 'Measurement Mindset: Data-Guided Product Decisions (Citrix)' + startOffset: 3396 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3396 + endOffset: 3500 +- name: 'Skill Building: Learnable Design & Innovation Practices' + startOffset: 3500 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3500 + endOffset: 3605 +- name: Closing Notes, Resources and Contact Links + startOffset: 3605 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3605 + endOffset: 3554 + transcript: - header: Episode Introduction & Guest Overview - header: 'Guest Background: Strategy, Product and AI Trajectory' @@ -1119,117 +1220,6 @@ transcript: sec: 3632 time: '1:00:32' who: Liesbeth -dateadded: '2022-04-10' -duration: PT00H59M14S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=0 - endOffset: 78 -- name: 'Guest Background: Strategy, Product and AI Trajectory' - startOffset: 78 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=78 - endOffset: 221 -- name: 'Interdisciplinary Perspective: Physics Meets Humanities' - startOffset: 221 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=221 - endOffset: 307 -- name: Design as a User-Centered Product Process - startOffset: 307 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=307 - endOffset: 403 -- name: Algorithm-Friendly Product Design & Signal Collection - startOffset: 403 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=403 - endOffset: 604 -- name: 'Interaction Design Case Study: TikTok vs Instagram Signals' - startOffset: 604 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=604 - endOffset: 732 -- name: 'Double Diamond Framework: Problem Framing to Solutions' - startOffset: 732 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=732 - endOffset: 872 -- name: 'Problem Discovery: Scoping and Prioritizing User Problems' - startOffset: 872 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=872 - endOffset: 962 -- name: 'Solution Exploration: Parallel Experiments & Proofs of Concept' - startOffset: 962 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=962 - endOffset: 1101 -- name: Timeframes for Research, Prototyping and Scaling - startOffset: 1101 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1101 - endOffset: 1217 -- name: Design Thinking Overview & Google PAIR Resources - startOffset: 1217 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1217 - endOffset: 1396 -- name: 'Design Sprint Structure: One-Week Prototyping Approach' - startOffset: 1396 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1396 - endOffset: 1500 -- name: 'Cross-Functional Participation: Designers, Data Scientists, PMs' - startOffset: 1500 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1500 - endOffset: 1633 -- name: 'Engineering Involvement: Building Algorithm-Ready Interfaces' - startOffset: 1633 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1633 - endOffset: 1698 -- name: 'Data Scientists in Problem Definition: Avoiding Rework' - startOffset: 1698 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1698 - endOffset: 1864 -- name: 'Scoping Documents: Challenging Assumptions with "Why"' - startOffset: 1864 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1864 - endOffset: 2005 -- name: Organizational Miscommunication & Backtracking Problems - startOffset: 2005 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2005 - endOffset: 2235 -- name: Product Managers’ Role in AI Roadmaps and Prioritization - startOffset: 2235 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2235 - endOffset: 2373 -- name: 'Innovation vs Quarterly OKRs: Making Space for Long-Term Bets' - startOffset: 2373 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2373 - endOffset: 2599 -- name: 'Radical Innovation Example: Second-Hand Car Trust Solutions' - startOffset: 2599 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2599 - endOffset: 2790 -- name: 'Building Evidence: Data-Driven Pitches for Big Ideas' - startOffset: 2790 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2790 - endOffset: 2956 -- name: 'Task Force Model (Jet Ski): Rapid Experimentation Teams' - startOffset: 2956 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2956 - endOffset: 3165 -- name: 'Innovation Workflow: From Discovery to Investment Case' - startOffset: 3165 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3165 - endOffset: 3251 -- name: 'Experimentation Culture: Prioritization Through Measurability' - startOffset: 3251 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3251 - endOffset: 3396 -- name: 'Measurement Mindset: Data-Guided Product Decisions (Citrix)' - startOffset: 3396 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3396 - endOffset: 3500 -- name: 'Skill Building: Learnable Design & Innovation Practices' - startOffset: 3500 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3500 - endOffset: 3605 -- name: Closing Notes, Resources and Contact Links - startOffset: 3605 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3605 - endOffset: 3554 --- Links: diff --git a/_podcast/s05e01-mastering-algorithms-and-data-structures.md b/_podcast/algorithms-data-structures-for-engineers.md similarity index 96% rename from _podcast/s05e01-mastering-algorithms-and-data-structures.md rename to _podcast/algorithms-data-structures-for-engineers.md index 094b9bc7..af959996 100644 --- a/_podcast/s05e01-mastering-algorithms-and-data-structures.md +++ b/_podcast/algorithms-data-structures-for-engineers.md @@ -1,12 +1,11 @@ --- -title: 'Practical Algorithms for Engineers: Bloom Filters, Approximate Nearest-Neighbor - & Performance' +title: 'Practical Algorithms for Engineers: Bloom Filters, Approximate Nearest-Neighbor & Performance' short: Mastering Algorithms and Data Structures +season: 5 +episode: 1 guests: - marcellolarocca image: images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg -season: 5 -episode: 1 ids: youtube: RiQa-9LguW8 anchor: Mastering-Algorithms-and-Data-Structures---Marcello-La-Rocca-e16s7lf @@ -15,6 +14,131 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Mastering-Algorithms-and-Data-Structures---Marcello-La-Rocca-e16s7lf spotify: https://open.spotify.com/episode/5IM2Des1sjVIwrvB3dGoJN apple: https://podcasts.apple.com/us/podcast/mastering-algorithms-and-data-structures-marcello-la/id1541710331?i=1000534241523 + +description: Learn Bloom filters, approximate nearest-neighbor and performance tuning to gain memory-efficient containment, fast vector search and practical profiling tips +intro: How do engineers choose and implement the right algorithm for memory, latency, and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com and author of Algorithms and Data Structures in Action, with experience at Twitter, Microsoft and Apple — walks through practical algorithmic solutions engineers can actually use in production. We focus on Bloom filters for memory‑efficient containment checks (and real-world uses like crawlers, routing tables, and adtech device-ID targeting), and on approximate nearest‑neighbour (ANN) strategies when KD‑trees break down for high‑dimensional data — covering R‑trees, SS‑trees, vector similarity, embeddings and Faiss. Along the way Marcello discusses core data structures, profiling and performance pitfalls, abstraction vs implementation trade‑offs, cross‑language serialization, and language performance choices (Python vs C++ and Cython). If you want actionable guidance — including when to trust libraries versus inspect internals, practical code in Java/JavaScript/Python, and study resources to get hands‑on — this episode gives concrete patterns, trade‑offs, and examples you can apply to improve search, recommendation, and large‑scale systems performance +topics: +- algorithms +- data structures +- software engineering +dateadded: 2021-09-05 + +duration: PT01H01M51S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=0 + endOffset: 111 +- name: 'Guest Intro: Marcello La Rocca and book announcement' + startOffset: 111 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=111 + endOffset: 191 +- name: 'Career Path: web development to Twitter, Microsoft, Apple, Tundra' + startOffset: 191 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=191 + endOffset: 319 +- name: 'Learning Philosophy: focus on applications over formal proofs' + startOffset: 319 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=319 + endOffset: 450 +- name: 'Anecdote: mathematical proof vs practical innovation' + startOffset: 450 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=450 + endOffset: 563 +- name: 'Recommended Resources: MIT course, Tim Roughgarden, Grokking Algorithms' + startOffset: 563 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=563 + endOffset: 634 +- name: 'Core Data Structures: arrays, lists, sets, dictionaries, stacks, queues' + startOffset: 634 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=634 + endOffset: 737 +- name: 'Abstraction vs Implementation: APIs, performance trade-offs' + startOffset: 737 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=737 + endOffset: 957 +- name: 'Practicing Algorithms Outside Work: competitions and side projects' + startOffset: 957 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=957 + endOffset: 1154 +- name: 'Using Libraries & Profiling: spotting algorithmic wins in production' + startOffset: 1154 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1154 + endOffset: 1214 +- name: 'Performance Pitfalls: containment checks and wrong list usage' + startOffset: 1214 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1214 + endOffset: 1332 +- name: 'Data-Science Use Cases: Bloom filters and nearest-neighbour search' + startOffset: 1332 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1332 + endOffset: 1419 +- name: 'Book Overview: bridging theory and practical use cases' + startOffset: 1419 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1419 + endOffset: 1504 +- name: 'Book Structure: basics, nearest-neighbour & MapReduce, graphs & optimization' + startOffset: 1504 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1504 + endOffset: 1591 +- name: 'Prerequisites & Format: appendices, pseudocode, who the book is for' + startOffset: 1591 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1591 + endOffset: 1717 +- name: 'Code Repository: implementations in Java, JavaScript, Python (and more)' + startOffset: 1717 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1717 + endOffset: 1809 +- name: 'Bloom Filter Explained: memory-efficient containment with false positives' + startOffset: 1809 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1809 + endOffset: 2083 +- name: 'Bloom Filter Applications: crawlers, routing tables, marketing/adtech' + startOffset: 2083 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2083 + endOffset: 2159 +- name: 'Adtech Example: device IDs and returning-user targeting with Bloom filters' + startOffset: 2159 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2159 + endOffset: 2350 +- name: 'Nearest-Neighbour Need: KD-tree limits and high-dimensional data challenges' + startOffset: 2350 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2350 + endOffset: 2564 +- name: 'Approximate Nearest-Neighbour: R-trees, SS-trees for geolocation & logistics' + startOffset: 2564 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2564 + endOffset: 2686 +- name: 'Vector Similarity: embeddings, recommender systems, Faiss usage' + startOffset: 2686 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2686 + endOffset: 2867 +- name: 'Frameworks vs Internals: when to trust libraries and when to inspect them' + startOffset: 2867 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2867 + endOffset: 2992 +- name: 'Cross-language Compatibility: serializing Bloom filters and hash seeds' + startOffset: 2992 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2992 + endOffset: 3175 +- name: 'Tech Interviews: algorithm emphasis, balanced assessment approaches' + startOffset: 3175 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3175 + endOffset: 3533 +- name: 'Hands-on Learning: LeetCode, contests, open-source projects' + startOffset: 3533 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3533 + endOffset: 3639 +- name: 'Language Trade-offs: Python vs C++ and using Cython for performance' + startOffset: 3639 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3639 + endOffset: 3781 +- name: 'Closing: contact info and book links' + startOffset: 3781 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3781 + endOffset: 3711 + transcript: - header: Podcast Introduction - header: 'Guest Intro: Marcello La Rocca and book announcement' @@ -780,139 +904,6 @@ transcript: sec: 3822 time: '1:03:42' who: Alexey -description: Learn Bloom filters, approximate nearest-neighbor and performance tuning - to gain memory-efficient containment, fast vector search and practical profiling - tips. -intro: How do engineers choose and implement the right algorithm for memory, latency, - and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com - and author of Algorithms and Data Structures in Action, with experience at Twitter, - Microsoft and Apple — walks through practical algorithmic solutions engineers can - actually use in production. We focus on Bloom filters for memory‑efficient containment - checks (and real-world uses like crawlers, routing tables, and adtech device-ID - targeting), and on approximate nearest‑neighbour (ANN) strategies when KD‑trees - break down for high‑dimensional data — covering R‑trees, SS‑trees, vector similarity, - embeddings and Faiss. Along the way Marcello discusses core data structures, profiling - and performance pitfalls, abstraction vs implementation trade‑offs, cross‑language - serialization, and language performance choices (Python vs C++ and Cython). If you - want actionable guidance — including when to trust libraries versus inspect internals, - practical code in Java/JavaScript/Python, and study resources to get hands‑on — - this episode gives concrete patterns, trade‑offs, and examples you can apply to - improve search, recommendation, and large‑scale systems performance. -dateadded: '2021-09-05' -duration: PT01H01M51S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=0 - endOffset: 111 -- name: 'Guest Intro: Marcello La Rocca and book announcement' - startOffset: 111 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=111 - endOffset: 191 -- name: 'Career Path: web development to Twitter, Microsoft, Apple, Tundra' - startOffset: 191 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=191 - endOffset: 319 -- name: 'Learning Philosophy: focus on applications over formal proofs' - startOffset: 319 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=319 - endOffset: 450 -- name: 'Anecdote: mathematical proof vs practical innovation' - startOffset: 450 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=450 - endOffset: 563 -- name: 'Recommended Resources: MIT course, Tim Roughgarden, Grokking Algorithms' - startOffset: 563 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=563 - endOffset: 634 -- name: 'Core Data Structures: arrays, lists, sets, dictionaries, stacks, queues' - startOffset: 634 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=634 - endOffset: 737 -- name: 'Abstraction vs Implementation: APIs, performance trade-offs' - startOffset: 737 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=737 - endOffset: 957 -- name: 'Practicing Algorithms Outside Work: competitions and side projects' - startOffset: 957 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=957 - endOffset: 1154 -- name: 'Using Libraries & Profiling: spotting algorithmic wins in production' - startOffset: 1154 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1154 - endOffset: 1214 -- name: 'Performance Pitfalls: containment checks and wrong list usage' - startOffset: 1214 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1214 - endOffset: 1332 -- name: 'Data-Science Use Cases: Bloom filters and nearest-neighbour search' - startOffset: 1332 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1332 - endOffset: 1419 -- name: 'Book Overview: bridging theory and practical use cases' - startOffset: 1419 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1419 - endOffset: 1504 -- name: 'Book Structure: basics, nearest-neighbour & MapReduce, graphs & optimization' - startOffset: 1504 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1504 - endOffset: 1591 -- name: 'Prerequisites & Format: appendices, pseudocode, who the book is for' - startOffset: 1591 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1591 - endOffset: 1717 -- name: 'Code Repository: implementations in Java, JavaScript, Python (and more)' - startOffset: 1717 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1717 - endOffset: 1809 -- name: 'Bloom Filter Explained: memory-efficient containment with false positives' - startOffset: 1809 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1809 - endOffset: 2083 -- name: 'Bloom Filter Applications: crawlers, routing tables, marketing/adtech' - startOffset: 2083 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2083 - endOffset: 2159 -- name: 'Adtech Example: device IDs and returning-user targeting with Bloom filters' - startOffset: 2159 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2159 - endOffset: 2350 -- name: 'Nearest-Neighbour Need: KD-tree limits and high-dimensional data challenges' - startOffset: 2350 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2350 - endOffset: 2564 -- name: 'Approximate Nearest-Neighbour: R-trees, SS-trees for geolocation & logistics' - startOffset: 2564 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2564 - endOffset: 2686 -- name: 'Vector Similarity: embeddings, recommender systems, Faiss usage' - startOffset: 2686 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2686 - endOffset: 2867 -- name: 'Frameworks vs Internals: when to trust libraries and when to inspect them' - startOffset: 2867 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2867 - endOffset: 2992 -- name: 'Cross-language Compatibility: serializing Bloom filters and hash seeds' - startOffset: 2992 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2992 - endOffset: 3175 -- name: 'Tech Interviews: algorithm emphasis, balanced assessment approaches' - startOffset: 3175 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3175 - endOffset: 3533 -- name: 'Hands-on Learning: LeetCode, contests, open-source projects' - startOffset: 3533 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3533 - endOffset: 3639 -- name: 'Language Trade-offs: Python vs C++ and using Cython for performance' - startOffset: 3639 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3639 - endOffset: 3781 -- name: 'Closing: contact info and book links' - startOffset: 3781 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3781 - endOffset: 3711 --- diff --git a/_podcast/s03e11-analytics-engineer.md b/_podcast/analytics-engineer-skills-tools.md similarity index 97% rename from _podcast/s03e11-analytics-engineer.md rename to _podcast/analytics-engineer-skills-tools.md index 0b016688..f0795f23 100644 --- a/_podcast/s03e11-analytics-engineer.md +++ b/_podcast/analytics-engineer-skills-tools.md @@ -1,11 +1,11 @@ --- title: 'Master Analytics Engineering: Skills, Toolstack, Career Roadmap' short: 'Analytics Engineer: New Role in a Data Team' +season: 3 +episode: 11 guests: - victoriaperezmola image: images/podcast/s03e11-analytics-engineer.jpg -season: 3 -episode: 11 ids: youtube: C5UcxBwdCEg anchor: Analytics-Engineer-New-Role-in-a-Data-Team---Victoria-Perez-Mola-e131e3n @@ -14,8 +14,115 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Analytics-Engineer-New-Role-in-a-Data-Team---Victoria-Perez-Mola-e131e3n spotify: https://open.spotify.com/episode/4rLQ5ulsYR9LqXxbFe2MlN apple: https://podcasts.apple.com/us/podcast/analytics-engineer-new-role-in-data-team-victoria-perez/id1541710331?i=1000526036141 + +description: 'Master analytics engineering with dbt and data modeling: learn pipelines, testing, Snowflake basics and a clear career roadmap to advance your data career.' +intro: How do you become an effective analytics engineer and what skills, tools, and career steps matter most? In this episode, Victoria Perez Mola—born in Argentina, trained as a Systems Engineer and now an Analytics Engineer at Tier in Berlin—walks us through her move from ERP and finance reporting into analytics engineering. We cover daily responsibilities like data modeling, pipelines, data quality and Looker; the DBT workflow (SQL transformations, version control, tests, DAG); and a practical analytics toolstack including DBT, Snowflake, Adlib ETL and Looker. Victoria contrasts analytics engineer, data analyst and data engineer roles, explains role origins, and outlines typical job expectations such as pipeline ownership, auditing and dashboarding. She digs into core skills—SQL, dimensional modeling, Snowflake—strategies for handling bad data and schema changes with DBT macros and tests, and team structures from platform teams to embedded roles. Listen for a clear career roadmap, concrete learning resources (DBT tutorials and an 'Analytics readings' Notion list), and indicators of role fit if you enjoy modeling, data quality and engineering best practices +topics: +- analytics engineering +dateadded: 2021-06-19 + +duration: PT00H49M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=0 + endOffset: 108 +- name: 'Guest Introduction: Victoria Perez Mola overview' + startOffset: 108 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=108 + endOffset: 165 +- name: 'Career Journey: Systems engineering, ERP & finance reporting' + startOffset: 165 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=165 + endOffset: 245 +- name: 'Daily Responsibilities: Data modeling, pipelines, data quality, Looker' + startOffset: 245 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=245 + endOffset: 409 +- name: 'DBT Overview: SQL transformations, version control, tests, DAG' + startOffset: 409 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=409 + endOffset: 604 +- name: 'Analytics Toolstack: DBT, Snowflake, Adlib ETL, Looker' + startOffset: 604 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=604 + endOffset: 708 +- name: 'Transition Story: From BI/ERP work to analytics engineering' + startOffset: 708 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=708 + endOffset: 874 +- name: 'Role Comparison: Analytics Engineer vs Data Analyst vs Data Engineer' + startOffset: 874 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=874 + endOffset: 1014 +- name: 'Role Origins & Purpose: Spotify, reducing analysts'' cleaning workload' + startOffset: 1014 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1014 + endOffset: 1252 +- name: 'Job Expectations: Example posting traits (pipelines, auditing, dashboards)' + startOffset: 1252 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1252 + endOffset: 1570 +- name: 'Core Skills: SQL, dimensional modeling, Snowflake and tooling variance' + startOffset: 1570 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1570 + endOffset: 1806 +- name: 'DBT Ecosystem: DBT''s role in the analytics engineer movement' + startOffset: 1806 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1806 + endOffset: 1869 +- name: 'Organizational Variability: Team setups and role definitions across companies' + startOffset: 1869 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1869 + endOffset: 1982 +- name: 'Cross-functional Collaboration: Working with analysts, data scientists, backend' + startOffset: 1982 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1982 + endOffset: 2204 +- name: 'Managing Bad Data & Schema Changes: DBT cleaning, macros, limitations' + startOffset: 2204 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2204 + endOffset: 2333 +- name: 'Data Testing Strategy: DBT tests, upstream checks, warnings vs errors' + startOffset: 2333 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2333 + endOffset: 2442 +- name: 'BI Roles vs Analytics Engineering: Overlaps with BI developer and analyst' + startOffset: 2442 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2442 + endOffset: 2525 +- name: 'Pathway to Analytics Engineering: Software practices, Kimball, DBT learning' + startOffset: 2525 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2525 + endOffset: 2619 +- name: 'Learning Resources: DBT tutorials and ''Analytics readings'' Notion list' + startOffset: 2619 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2619 + endOffset: 2692 +- name: 'Role Fit Signals: Enjoy modeling, quality, and best practices' + startOffset: 2692 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2692 + endOffset: 2788 +- name: 'Job Frustrations: Enforcing guidelines, ad-hoc firefights, limited raw control' + startOffset: 2788 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2788 + endOffset: 2916 +- name: 'Team Scale & Placement: Platform teams vs embedded analytics engineers' + startOffset: 2916 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2916 + endOffset: 3046 +- name: 'Data Documentation & Profiling: DBT docs strengths and profiling tools (Datafold, + Monte Carlo)' + startOffset: 3046 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3046 + endOffset: 3090 +- name: Episode Wrap-Up & Links + startOffset: 3090 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3090 + endOffset: 2949 + transcript: -- header: Podcast Introduction - header: 'Guest Introduction: Victoria Perez Mola overview' - line: This week, we'll talk about a new role in the data team. This role is the analytics engineer. We have a special guest today, Victoria. Victoria works as @@ -861,122 +968,6 @@ transcript: detailed. It has the code and it has dependencies. It's very easy to go from there and see what else you are going to affect if you touch something. who: Victoria -description: 'Master analytics engineering with dbt and data modeling: learn pipelines, - testing, Snowflake basics and a clear career roadmap to advance your data career.' -intro: How do you become an effective analytics engineer and what skills, tools, and - career steps matter most? In this episode, Victoria Perez Mola—born in Argentina, - trained as a Systems Engineer and now an Analytics Engineer at Tier in Berlin—walks - us through her move from ERP and finance reporting into analytics engineering. We - cover daily responsibilities like data modeling, pipelines, data quality and Looker; - the DBT workflow (SQL transformations, version control, tests, DAG); and a practical - analytics toolstack including DBT, Snowflake, Adlib ETL and Looker. Victoria contrasts - analytics engineer, data analyst and data engineer roles, explains role origins, - and outlines typical job expectations such as pipeline ownership, auditing and dashboarding. - She digs into core skills—SQL, dimensional modeling, Snowflake—strategies for handling - bad data and schema changes with DBT macros and tests, and team structures from - platform teams to embedded roles. Listen for a clear career roadmap, concrete learning - resources (DBT tutorials and an 'Analytics readings' Notion list), and indicators - of role fit if you enjoy modeling, data quality and engineering best practices. -dateadded: '2021-06-19' -duration: PT00H49M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=0 - endOffset: 108 -- name: 'Guest Introduction: Victoria Perez Mola overview' - startOffset: 108 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=108 - endOffset: 165 -- name: 'Career Journey: Systems engineering, ERP & finance reporting' - startOffset: 165 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=165 - endOffset: 245 -- name: 'Daily Responsibilities: Data modeling, pipelines, data quality, Looker' - startOffset: 245 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=245 - endOffset: 409 -- name: 'DBT Overview: SQL transformations, version control, tests, DAG' - startOffset: 409 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=409 - endOffset: 604 -- name: 'Analytics Toolstack: DBT, Snowflake, Adlib ETL, Looker' - startOffset: 604 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=604 - endOffset: 708 -- name: 'Transition Story: From BI/ERP work to analytics engineering' - startOffset: 708 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=708 - endOffset: 874 -- name: 'Role Comparison: Analytics Engineer vs Data Analyst vs Data Engineer' - startOffset: 874 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=874 - endOffset: 1014 -- name: 'Role Origins & Purpose: Spotify, reducing analysts'' cleaning workload' - startOffset: 1014 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1014 - endOffset: 1252 -- name: 'Job Expectations: Example posting traits (pipelines, auditing, dashboards)' - startOffset: 1252 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1252 - endOffset: 1570 -- name: 'Core Skills: SQL, dimensional modeling, Snowflake and tooling variance' - startOffset: 1570 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1570 - endOffset: 1806 -- name: 'DBT Ecosystem: DBT''s role in the analytics engineer movement' - startOffset: 1806 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1806 - endOffset: 1869 -- name: 'Organizational Variability: Team setups and role definitions across companies' - startOffset: 1869 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1869 - endOffset: 1982 -- name: 'Cross-functional Collaboration: Working with analysts, data scientists, backend' - startOffset: 1982 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1982 - endOffset: 2204 -- name: 'Managing Bad Data & Schema Changes: DBT cleaning, macros, limitations' - startOffset: 2204 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2204 - endOffset: 2333 -- name: 'Data Testing Strategy: DBT tests, upstream checks, warnings vs errors' - startOffset: 2333 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2333 - endOffset: 2442 -- name: 'BI Roles vs Analytics Engineering: Overlaps with BI developer and analyst' - startOffset: 2442 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2442 - endOffset: 2525 -- name: 'Pathway to Analytics Engineering: Software practices, Kimball, DBT learning' - startOffset: 2525 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2525 - endOffset: 2619 -- name: 'Learning Resources: DBT tutorials and ''Analytics readings'' Notion list' - startOffset: 2619 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2619 - endOffset: 2692 -- name: 'Role Fit Signals: Enjoy modeling, quality, and best practices' - startOffset: 2692 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2692 - endOffset: 2788 -- name: 'Job Frustrations: Enforcing guidelines, ad-hoc firefights, limited raw control' - startOffset: 2788 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2788 - endOffset: 2916 -- name: 'Team Scale & Placement: Platform teams vs embedded analytics engineers' - startOffset: 2916 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2916 - endOffset: 3046 -- name: 'Data Documentation & Profiling: DBT docs strengths and profiling tools (Datafold, - Monte Carlo)' - startOffset: 3046 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3046 - endOffset: 3090 -- name: Episode Wrap-Up & Links - startOffset: 3090 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3090 - endOffset: 2949 --- diff --git a/_podcast/s03e02-from-analytics-to-data-science.md b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md similarity index 97% rename from _podcast/s03e02-from-analytics-to-data-science.md rename to _podcast/analytics-to-data-science-with-kaggle-portfolio.md index 409881e7..4e8c1188 100644 --- a/_podcast/s03e02-from-analytics-to-data-science.md +++ b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md @@ -1,12 +1,11 @@ --- -title: 'Career Transition from Analytics to Data Science: Build a Kaggle Notebook - Portfolio, Learn Python & Get Hired' +title: 'Career Transition from Analytics to Data Science: Build a Kaggle Notebook Portfolio, Learn Python & Get Hired' short: Shifting Career from Analytics to Data Science +season: 3 +episode: 2 guests: - andradaolteanu image: images/podcast/s03e02-from-analytics-to-data-science.jpg -season: 3 -episode: 2 ids: youtube: ixmTewD5Waw anchor: Shifting-Career-from-Analytics-to-Data-Science---Andrada-Olteanu-ev19ma @@ -15,6 +14,101 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Shifting-Career-from-Analytics-to-Data-Science---Andrada-Olteanu-ev19ma spotify: https://open.spotify.com/episode/1GVuHJzqbcf2BvaLBTgsAL apple: https://podcasts.apple.com/us/podcast/shifting-career-from-analytics-to-data-science-andrada/id1541710331?i=1000517426368 + +description: Build a Kaggle portfolio, learn Python to pivot from analytics to data science—hands-on notebooks, interview prep and hiring strategies to get hired +intro: 'How do you move from analytics into a hireable data science role by building a Kaggle notebook portfolio and learning Python fast? In this episode, Andrada Olteanu — Data Scientist at Endava, Kaggle Notebooks Master, and Z by HP & NVIDIA Data Science Ambassador — walks through her path from a statistics degree and data analyst role at Avon to a master’s in DS and a practical, project-driven transition.

We cover concrete steps: recommended courses like Jose Portilla’s “Python for Data Science & Machine Learning,” using Kaggle as your primary practice environment, and specific notebook work such as the Iowa House Prices project with hyperparameter tuning. Andrada explains how to translate academic dissertations into public notebooks, decompose and reimplement kernels to grow coding skills, and leverage mentorship (including connecting with Gabi Preda on Kaggle) during the job search. Listeners will also learn how to present work on Kaggle and GitHub, navigate interview expectations (algorithmic coding tests vs practical ML), and use LinkedIn/Twitter for networking.

If you’re building a Kaggle notebook portfolio, learning Python, and aiming for data science roles, this episode gives a practical, step-by-step roadmap.' +topics: +- career transition +- analytics +- data science +dateadded: 2021-04-16 + +duration: PT01H02M21S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=0 + endOffset: 97 +- name: 'Episode Overview: Transitioning from Analytics to Data Science' + startOffset: 97 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=97 + endOffset: 130 +- name: 'Career Path: Statistics Degree → Avon Data Analyst → Master’s → Data Scientist' + startOffset: 130 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=130 + endOffset: 321 +- name: 'Recommended Course: Python for Data Science & Machine Learning (Jose Portilla, + Udemy)' + startOffset: 321 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=321 + endOffset: 498 +- name: 'Kaggle Introduction: First Encounters and Community Motivation' + startOffset: 498 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=498 + endOffset: 583 +- name: 'Kaggle Notebooks: Iowa House Prices, Hyperparameter Tuning & Model Improvement' + startOffset: 583 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=583 + endOffset: 866 +- name: 'Project-Based Learning: Kaggle as Primary Practice Environment' + startOffset: 866 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=866 + endOffset: 942 +- name: 'Translating Academic Work: Dissertation and Masters Projects to Public Notebooks' + startOffset: 942 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=942 + endOffset: 1089 +- name: 'Mentorship & Hiring: Connecting with Gabi Preda via Kaggle' + startOffset: 1089 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1089 + endOffset: 1405 +- name: 'Job Search Process: Timeline and Application Strategy' + startOffset: 1405 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1405 + endOffset: 1567 +- name: 'Interview Challenges: Algorithmic Coding Tests vs Practical ML Skills' + startOffset: 1567 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1567 + endOffset: 1934 +- name: 'Showcasing Work: Kaggle Notebooks, GitHub and Portfolio Impact' + startOffset: 1934 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1934 + endOffset: 2201 +- name: 'Transferable Analyst Skills: Data Validation, Domain Knowledge & EDA' + startOffset: 2201 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2201 + endOffset: 2509 +- name: 'Coding Growth Plan: Learn by Doing Competitions and Reproducing Notebooks' + startOffset: 2509 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2509 + endOffset: 2716 +- name: 'Learning Technique: Decompose Notebooks, Reimplement and Debug' + startOffset: 2716 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2716 + endOffset: 2967 +- name: 'Master’s Degree Value: Structured Curriculum vs Independent Study' + startOffset: 2967 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2967 + endOffset: 3174 +- name: 'Self-Paced Pivot: Udemy, Kaggle and YouTube Path to Data Science in ~1 Year' + startOffset: 3174 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3174 + endOffset: 3421 +- name: 'Kaggle Ecosystem: Notebooks, Datasets and Community Discussions' + startOffset: 3421 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3421 + endOffset: 3660 +- name: 'Networking Strategy: Use LinkedIn & Twitter to Showcase Projects and Build + Community' + startOffset: 3660 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3660 + endOffset: 3781 +- name: Episode Wrap-Up and Final Advice + startOffset: 3781 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3781 + endOffset: 3741 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Transitioning from Analytics to Data Science' @@ -871,108 +965,6 @@ transcript: sec: 3838 time: '1:03:58' who: Alexey -description: Build a Kaggle portfolio, learn Python to pivot from analytics to data - science—hands-on notebooks, interview prep and hiring strategies to get hired. -intro: 'How do you move from analytics into a hireable data science role by building - a Kaggle notebook portfolio and learning Python fast? In this episode, Andrada Olteanu - — Data Scientist at Endava, Kaggle Notebooks Master, and Z by HP & NVIDIA Data Science - Ambassador — walks through her path from a statistics degree and data analyst role - at Avon to a master’s in DS and a practical, project-driven transition.

- We cover concrete steps: recommended courses like Jose Portilla’s “Python for Data - Science & Machine Learning,” using Kaggle as your primary practice environment, - and specific notebook work such as the Iowa House Prices project with hyperparameter - tuning. Andrada explains how to translate academic dissertations into public notebooks, - decompose and reimplement kernels to grow coding skills, and leverage mentorship - (including connecting with Gabi Preda on Kaggle) during the job search. Listeners - will also learn how to present work on Kaggle and GitHub, navigate interview expectations - (algorithmic coding tests vs practical ML), and use LinkedIn/Twitter for networking. -

If you’re building a Kaggle notebook portfolio, learning Python, and aiming - for data science roles, this episode gives a practical, step-by-step roadmap.' -dateadded: '2021-04-16' -duration: PT01H02M21S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=0 - endOffset: 97 -- name: 'Episode Overview: Transitioning from Analytics to Data Science' - startOffset: 97 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=97 - endOffset: 130 -- name: 'Career Path: Statistics Degree → Avon Data Analyst → Master’s → Data Scientist' - startOffset: 130 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=130 - endOffset: 321 -- name: 'Recommended Course: Python for Data Science & Machine Learning (Jose Portilla, - Udemy)' - startOffset: 321 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=321 - endOffset: 498 -- name: 'Kaggle Introduction: First Encounters and Community Motivation' - startOffset: 498 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=498 - endOffset: 583 -- name: 'Kaggle Notebooks: Iowa House Prices, Hyperparameter Tuning & Model Improvement' - startOffset: 583 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=583 - endOffset: 866 -- name: 'Project-Based Learning: Kaggle as Primary Practice Environment' - startOffset: 866 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=866 - endOffset: 942 -- name: 'Translating Academic Work: Dissertation and Masters Projects to Public Notebooks' - startOffset: 942 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=942 - endOffset: 1089 -- name: 'Mentorship & Hiring: Connecting with Gabi Preda via Kaggle' - startOffset: 1089 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1089 - endOffset: 1405 -- name: 'Job Search Process: Timeline and Application Strategy' - startOffset: 1405 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1405 - endOffset: 1567 -- name: 'Interview Challenges: Algorithmic Coding Tests vs Practical ML Skills' - startOffset: 1567 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1567 - endOffset: 1934 -- name: 'Showcasing Work: Kaggle Notebooks, GitHub and Portfolio Impact' - startOffset: 1934 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1934 - endOffset: 2201 -- name: 'Transferable Analyst Skills: Data Validation, Domain Knowledge & EDA' - startOffset: 2201 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2201 - endOffset: 2509 -- name: 'Coding Growth Plan: Learn by Doing Competitions and Reproducing Notebooks' - startOffset: 2509 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2509 - endOffset: 2716 -- name: 'Learning Technique: Decompose Notebooks, Reimplement and Debug' - startOffset: 2716 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2716 - endOffset: 2967 -- name: 'Master’s Degree Value: Structured Curriculum vs Independent Study' - startOffset: 2967 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2967 - endOffset: 3174 -- name: 'Self-Paced Pivot: Udemy, Kaggle and YouTube Path to Data Science in ~1 Year' - startOffset: 3174 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3174 - endOffset: 3421 -- name: 'Kaggle Ecosystem: Notebooks, Datasets and Community Discussions' - startOffset: 3421 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3421 - endOffset: 3660 -- name: 'Networking Strategy: Use LinkedIn & Twitter to Showcase Projects and Build - Community' - startOffset: 3660 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3660 - endOffset: 3781 -- name: Episode Wrap-Up and Final Advice - startOffset: 3781 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3781 - endOffset: 3741 --- Links: diff --git a/_podcast/s06e05-post-doctoral-research.md b/_podcast/big-data-analytics-and-postdoc-research.md similarity index 97% rename from _podcast/s06e05-post-doctoral-research.md rename to _podcast/big-data-analytics-and-postdoc-research.md index 9a051905..4f9d43ac 100644 --- a/_podcast/s06e05-post-doctoral-research.md +++ b/_podcast/big-data-analytics-and-postdoc-research.md @@ -1,12 +1,11 @@ --- -title: 'Master Spatial Big Data Analytics: Nebula Stream Systems, Postdoc Mentoring - & PhD Tips' +title: 'Master Spatial Big Data Analytics: Nebula Stream Systems, Postdoc Mentoring & PhD Tips' short: 'Advancing Big Data Analytics: Post-Doctoral Research' +season: 6 +episode: 5 guests: - elenitziritazacharatou image: images/podcast/s06e05-post-doctoral-research.jpg -season: 6 -episode: 5 ids: youtube: 7jgmIQGMhGE anchor: Advancing-Big-Data-Analytics-Post-Doctoral-Research---Eleni-Tzirita-Zacharatou-e1b6f41 @@ -15,6 +14,130 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Advancing-Big-Data-Analytics-Post-Doctoral-Research---Eleni-Tzirita-Zacharatou-e1b6f41 spotify: https://open.spotify.com/episode/6rgBSTPRvgNcJ7ouFyZmbH apple: https://podcasts.apple.com/us/podcast/advancing-big-data-analytics-post-doctoral-research/id1541710331?i=1000543884294 + +description: 'Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, publishing, time-management and stream-processing tactics to boost your research.' +intro: How do you master spatial big data analytics while navigating the demands of postdoc research, systems building, and preparing for a PhD? In this episode, Eleni Tzirita‑Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with a PhD from EPFL and award‑winning work in data management — breaks down practical approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) and robust stream processing for IoT. We cover systems‑driven research like the Nebula Stream and Agora infrastructure, spotting research trends via conferences and reviewing, and aligning academic work with industry needs. Eleni also outlines the postdoc role (mentoring, teaching, reviewing, dissemination), time management strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD or master’s thesis. Listeners will gain concrete guidance on research priorities beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, data cleaning evaluation challenges, and steps to increase diversity in CS. Tune in for actionable postdoc mentoring and PhD tips grounded in spatial big data and stream processing research +topics: +- academia +- big data analytics +- tools +- data engineering +dateadded: 2021-12-05 + +duration: PT01H01M37S + +quotableClips: +- name: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at + DIMA, TU Berlin' + startOffset: 73 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=73 + endOffset: 169 +- name: 'Academic Journey: Athens undergrad → EPFL PhD → Berlin postdoc' + startOffset: 169 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=169 + endOffset: 230 +- name: 'Spatial Big Data Analytics: Definitions, examples (GPS, trajectories, satellite + imagery)' + startOffset: 230 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=230 + endOffset: 356 +- name: 'Postdoc Role Overview: Research, mentoring, teaching, reviewing, dissemination' + startOffset: 356 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=356 + endOffset: 449 +- name: 'Time Management for Research: Focused days vs. multitasking' + startOffset: 449 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=449 + endOffset: 526 +- name: 'Publishing Realities: When research yields publishable outcomes' + startOffset: 526 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=526 + endOffset: 603 +- name: 'Top Data Management Venues: VLDB, SIGMOD, ICDE' + startOffset: 603 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=603 + endOffset: 693 +- name: 'Postdoc vs PhD: Increased responsibility, mentoring, and leadership' + startOffset: 693 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=693 + endOffset: 1018 +- name: 'Mentoring Strategy: Advising BSc/MSc topics and evaluating PhD proposals' + startOffset: 1018 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1018 + endOffset: 1388 +- name: 'DIMA Research Programs: Nebula Stream (IoT/stream processing) and Agora infrastructure' + startOffset: 1388 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1388 + endOffset: 1455 +- name: 'System‑Driven Research: From Apache Flink legacy to new Nebula stream systems' + startOffset: 1455 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1455 + endOffset: 1495 +- name: 'Spotting Research Trends: Conferences, reviewing, and community roadmaps' + startOffset: 1495 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1495 + endOffset: 1710 +- name: 'Industry Engagement: Interfacing academic research with industry needs' + startOffset: 1710 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1710 + endOffset: 1827 +- name: 'Peer Reviewing: Invitations, visibility, and networking in academic service' + startOffset: 1827 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1827 + endOffset: 2018 +- name: 'Beyond Performance Metrics: Usability, energy, adoption as research priorities' + startOffset: 2018 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2018 + endOffset: 2181 +- name: 'Data Cleaning Research: Automation challenges and evaluation difficulties' + startOffset: 2181 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2181 + endOffset: 2320 +- name: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross‑domain + work' + startOffset: 2320 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2320 + endOffset: 2470 +- name: 'Facilitating Cross‑Group Collaboration: Physical spaces and informal interactions' + startOffset: 2470 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2470 + endOffset: 2657 +- name: 'Preparing for PhD Applications: Field choice, research quality, and AI/ML + trends' + startOffset: 2657 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2657 + endOffset: 2826 +- name: 'Master''s Thesis Selection: Advertised topics, mentor fit, internships and + skill alignment' + startOffset: 2826 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2826 + endOffset: 3127 +- name: 'Deciding on a PhD: Trial research in Master’s or internships before committing' + startOffset: 3127 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3127 + endOffset: 3299 +- name: 'PhD Expectations: Publication requirements and top‑conference pressure' + startOffset: 3299 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3299 + endOffset: 3319 +- name: 'Increasing Female Participation in CS: Early outreach, role models, institutional + support' + startOffset: 3319 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3319 + endOffset: 3651 +- name: 'Personal Reflections: Stereotypes, belonging, and career persistence' + startOffset: 3651 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3651 + endOffset: 3714 +- name: 'Contact and Follow‑Up: DIMA page and email for questions' + startOffset: 3714 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3714 + endOffset: 3722 +- name: Episode Conclusion and Thanks + startOffset: 3722 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3722 + endOffset: 3697 + transcript: - header: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at DIMA, TU Berlin' @@ -1022,137 +1145,6 @@ transcript: sec: 3770 time: '1:02:50' who: Eleni -description: 'Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, - publishing, time-management and stream-processing tactics to boost your research.' -intro: How do you master spatial big data analytics while navigating the demands of - postdoc research, systems building, and preparing for a PhD? In this episode, Eleni - Tzirita‑Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with - a PhD from EPFL and award‑winning work in data management — breaks down practical - approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) - and robust stream processing for IoT. We cover systems‑driven research like the - Nebula Stream and Agora infrastructure, spotting research trends via conferences - and reviewing, and aligning academic work with industry needs. Eleni also outlines - the postdoc role (mentoring, teaching, reviewing, dissemination), time management - strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring - tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD - or master’s thesis. Listeners will gain concrete guidance on research priorities - beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, - data cleaning evaluation challenges, and steps to increase diversity in CS. Tune - in for actionable postdoc mentoring and PhD tips grounded in spatial big data and - stream processing research. -dateadded: '2021-12-05' -duration: PT01H01M37S -quotableClips: -- name: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at - DIMA, TU Berlin' - startOffset: 73 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=73 - endOffset: 169 -- name: 'Academic Journey: Athens undergrad → EPFL PhD → Berlin postdoc' - startOffset: 169 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=169 - endOffset: 230 -- name: 'Spatial Big Data Analytics: Definitions, examples (GPS, trajectories, satellite - imagery)' - startOffset: 230 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=230 - endOffset: 356 -- name: 'Postdoc Role Overview: Research, mentoring, teaching, reviewing, dissemination' - startOffset: 356 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=356 - endOffset: 449 -- name: 'Time Management for Research: Focused days vs. multitasking' - startOffset: 449 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=449 - endOffset: 526 -- name: 'Publishing Realities: When research yields publishable outcomes' - startOffset: 526 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=526 - endOffset: 603 -- name: 'Top Data Management Venues: VLDB, SIGMOD, ICDE' - startOffset: 603 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=603 - endOffset: 693 -- name: 'Postdoc vs PhD: Increased responsibility, mentoring, and leadership' - startOffset: 693 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=693 - endOffset: 1018 -- name: 'Mentoring Strategy: Advising BSc/MSc topics and evaluating PhD proposals' - startOffset: 1018 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1018 - endOffset: 1388 -- name: 'DIMA Research Programs: Nebula Stream (IoT/stream processing) and Agora infrastructure' - startOffset: 1388 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1388 - endOffset: 1455 -- name: 'System‑Driven Research: From Apache Flink legacy to new Nebula stream systems' - startOffset: 1455 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1455 - endOffset: 1495 -- name: 'Spotting Research Trends: Conferences, reviewing, and community roadmaps' - startOffset: 1495 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1495 - endOffset: 1710 -- name: 'Industry Engagement: Interfacing academic research with industry needs' - startOffset: 1710 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1710 - endOffset: 1827 -- name: 'Peer Reviewing: Invitations, visibility, and networking in academic service' - startOffset: 1827 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1827 - endOffset: 2018 -- name: 'Beyond Performance Metrics: Usability, energy, adoption as research priorities' - startOffset: 2018 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2018 - endOffset: 2181 -- name: 'Data Cleaning Research: Automation challenges and evaluation difficulties' - startOffset: 2181 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2181 - endOffset: 2320 -- name: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross‑domain - work' - startOffset: 2320 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2320 - endOffset: 2470 -- name: 'Facilitating Cross‑Group Collaboration: Physical spaces and informal interactions' - startOffset: 2470 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2470 - endOffset: 2657 -- name: 'Preparing for PhD Applications: Field choice, research quality, and AI/ML - trends' - startOffset: 2657 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2657 - endOffset: 2826 -- name: 'Master''s Thesis Selection: Advertised topics, mentor fit, internships and - skill alignment' - startOffset: 2826 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2826 - endOffset: 3127 -- name: 'Deciding on a PhD: Trial research in Master’s or internships before committing' - startOffset: 3127 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3127 - endOffset: 3299 -- name: 'PhD Expectations: Publication requirements and top‑conference pressure' - startOffset: 3299 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3299 - endOffset: 3319 -- name: 'Increasing Female Participation in CS: Early outreach, role models, institutional - support' - startOffset: 3319 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3319 - endOffset: 3651 -- name: 'Personal Reflections: Stereotypes, belonging, and career persistence' - startOffset: 3651 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3651 - endOffset: 3714 -- name: 'Contact and Follow‑Up: DIMA page and email for questions' - startOffset: 3714 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3714 - endOffset: 3722 -- name: Episode Conclusion and Thanks - startOffset: 3722 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3722 - endOffset: 3697 --- Links: diff --git a/_podcast/s04e03-big-data-engineer-vs-data-scientist.md b/_podcast/big-data-engineer-vs-data-scientist.md similarity index 98% rename from _podcast/s04e03-big-data-engineer-vs-data-scientist.md rename to _podcast/big-data-engineer-vs-data-scientist.md index 74a9dec4..cead0573 100644 --- a/_podcast/s04e03-big-data-engineer-vs-data-scientist.md +++ b/_podcast/big-data-engineer-vs-data-scientist.md @@ -1,11 +1,11 @@ --- title: 'Big Data Engineer vs Data Scientist: Skills, Tools, and Career Paths' short: Big Data Engineer vs Data Scientist +season: 4 +episode: 3 guests: - roksolanadiachuk image: images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg -season: 4 -episode: 3 ids: youtube: yg3d1lFd7Uo anchor: Big-Data-Engineer-vs-Data-Scientist---Roksolana-Diachuk-e139sl8 @@ -14,6 +14,150 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Big-Data-Engineer-vs-Data-Scientist---Roksolana-Diachuk-e139sl8 spotify: https://open.spotify.com/episode/08Mb5JOOo6sWOFgsXILVsj apple: https://podcasts.apple.com/us/podcast/big-data-engineer-vs-data-scientist-roksolana-diachuk/id1541710331?i=1000528386609 + +description: Discover how Big Data Engineer vs Data Scientist roles differ — skills, performance optimization, ETL pipelines and ML deployment tips to advance your career +intro: 'How do the day‑to‑day responsibilities and skill sets really differ between a Big Data Engineer and a Data Scientist—and what should you learn to move between those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her career transition from backend Java into big data engineering and R&D.

We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala and Parquet formats—plus performance tuning: Spark job optimization, cluster resource planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries (data cleaning and feature engineering for data scientists vs pipeline design and formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning with Delta Lake, observability, documentation, starter projects and learning resources.

Listen to learn which skills, tools and projects will help you choose or transition between careers, and what to prioritize when building scalable data pipelines, deploying models, and ensuring data quality.' +topics: +- career transition +- software engineering +- data engineering +- data science +dateadded: 2021-07-10 + +duration: PT01H01M27S + +quotableClips: +- name: Episode Overview & Guest Introduction + startOffset: 112 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=112 + endOffset: 148 +- name: 'Career Path: From Backend Java to Big Data Engineering (Scala, R&D, Captify)' + startOffset: 148 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=148 + endOffset: 266 +- name: 'Core Responsibilities: Building ETL Data Pipelines, HDFS/S3, Impala' + startOffset: 266 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=266 + endOffset: 398 +- name: 'Performance Focus: Spark Job Optimization & Cluster Resource Planning' + startOffset: 398 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=398 + endOffset: 438 +- name: 'Big Data Tooling: Spark, S3/HDFS, Kubernetes, Prometheus, Grafana, Scala + libs' + startOffset: 438 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=438 + endOffset: 484 +- name: 'Storytelling in Tech Talks: "Alice" Series and Conference Presentations' + startOffset: 484 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=484 + endOffset: 552 +- name: 'Role Comparison: Big Data Engineer vs Data Engineer (formats: Avro, Parquet, + ProtoBuf)' + startOffset: 552 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=552 + endOffset: 667 +- name: 'Essential Skills: Coding, SQL, Distributed Systems & Infrastructure Awareness' + startOffset: 667 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=667 + endOffset: 836 +- name: 'Data Scientist Scope: Data Cleaning, Feature Engineering, Model Cycle & Deployment' + startOffset: 836 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=836 + endOffset: 932 +- name: 'Tool Overlap: Spark & Python vs ML Libraries for Modeling' + startOffset: 932 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=932 + endOffset: 986 +- name: 'Collaboration Model: File Interfaces (Parquet) and Team Structures' + startOffset: 986 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=986 + endOffset: 1134 +- name: 'Case Study: Recommendation System — Streaming and Batch Pipeline Design' + startOffset: 1134 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1134 + endOffset: 1371 +- name: 'Streaming vs Batch Choices: Flink for Streaming, Spark for Batch, Parquet + on S3' + startOffset: 1371 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1371 + endOffset: 1420 +- name: 'ML Deployment Stack: MLflow, Kubeflow, Kubernetes & ML Engineer Roles' + startOffset: 1420 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1420 + endOffset: 1489 +- name: 'Cross-Skill Expectations: What Data Scientists Should Know About Pipelines' + startOffset: 1489 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1489 + endOffset: 1650 +- name: 'Upskilling for Engineers: Data Engineers Learning ML Inputs/Outputs (not + algorithms)' + startOffset: 1650 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1650 + endOffset: 1853 +- name: 'Transition Path: Analyst/Data Scientist → Data Engineer (coding, DBs, infra)' + startOffset: 1853 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1853 + endOffset: 2093 +- name: 'Databases to Learn: PostgreSQL, MySQL, MongoDB, Neo4j (SQL vs NoSQL)' + startOffset: 2093 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2093 + endOffset: 2167 +- name: 'Infrastructure Essentials: Docker, Cloud Services, Intro to Kubernetes' + startOffset: 2167 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2167 + endOffset: 2349 +- name: 'Data Quality & Monitoring: Flow Metrics, Spikes, and Schema Change Alerts' + startOffset: 2349 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2349 + endOffset: 2617 +- name: 'Data Documentation & Governance: Schema Descriptions, Confluence, HypeSQL' + startOffset: 2617 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2617 + endOffset: 2774 +- name: 'Software Engineering for Data Scientists: Code Quality, Reproducibility, + DB Skills' + startOffset: 2774 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2774 + endOffset: 2906 +- name: 'Hands-on Learning Resources: Katacoda, Google Codelabs, Databricks Trainings' + startOffset: 2906 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2906 + endOffset: 2969 +- name: 'Career Advice for Graduates: Choosing Data Engineering vs Data Science' + startOffset: 2969 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2969 + endOffset: 3076 +- name: 'Starter Projects: Word Count, Twitter Streaming, Elasticsearch + Kibana' + startOffset: 3076 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3076 + endOffset: 3208 +- name: 'Datasets for Practice: Wikipedia Dumps, CommonCrawl, NASA APIs, Social Media' + startOffset: 3208 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3208 + endOffset: 3368 +- name: 'Pre-built ETL Platforms vs Custom Pipelines: Trade-offs & Scalability' + startOffset: 3368 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3368 + endOffset: 3485 +- name: 'Operational Challenges: Deduplication, Historical Reprocessing, Risk Management' + startOffset: 3485 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3485 + endOffset: 3625 +- name: 'Data Versioning & Time Travel: Delta Lake for Reprocessing and Auditing' + startOffset: 3625 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3625 + endOffset: 3640 +- name: 'Learning Recommendations: Coursera Big Data Specialization; Spark & Data + books' + startOffset: 3640 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3640 + endOffset: 3754 +- name: 'Guest Links & Talks: Twitter, LinkedIn, YouTube (Alice & Kubernetes talks)' + startOffset: 3754 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3754 + endOffset: 3687 + transcript: - header: Episode Overview & Guest Introduction - line: Today we will talk about the difference between big data engineers and data @@ -1318,157 +1462,6 @@ transcript: sec: 3799 time: '1:03:19' who: Alexey -description: Discover how Big Data Engineer vs Data Scientist roles differ — skills, - performance optimization, ETL pipelines and ML deployment tips to advance your career. -intro: 'How do the day‑to‑day responsibilities and skill sets really differ between - a Big Data Engineer and a Data Scientist—and what should you learn to move between - those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, - Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her - career transition from backend Java into big data engineering and R&D.

- We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala - and Parquet formats—plus performance tuning: Spark job optimization, cluster resource - planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries - (data cleaning and feature engineering for data scientists vs pipeline design and - formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink - vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical - topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning - with Delta Lake, observability, documentation, starter projects and learning resources. -

Listen to learn which skills, tools and projects will help you choose or - transition between careers, and what to prioritize when building scalable data pipelines, - deploying models, and ensuring data quality.' -dateadded: '2021-07-10' -duration: PT01H01M27S -quotableClips: -- name: Episode Overview & Guest Introduction - startOffset: 112 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=112 - endOffset: 148 -- name: 'Career Path: From Backend Java to Big Data Engineering (Scala, R&D, Captify)' - startOffset: 148 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=148 - endOffset: 266 -- name: 'Core Responsibilities: Building ETL Data Pipelines, HDFS/S3, Impala' - startOffset: 266 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=266 - endOffset: 398 -- name: 'Performance Focus: Spark Job Optimization & Cluster Resource Planning' - startOffset: 398 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=398 - endOffset: 438 -- name: 'Big Data Tooling: Spark, S3/HDFS, Kubernetes, Prometheus, Grafana, Scala - libs' - startOffset: 438 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=438 - endOffset: 484 -- name: 'Storytelling in Tech Talks: "Alice" Series and Conference Presentations' - startOffset: 484 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=484 - endOffset: 552 -- name: 'Role Comparison: Big Data Engineer vs Data Engineer (formats: Avro, Parquet, - ProtoBuf)' - startOffset: 552 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=552 - endOffset: 667 -- name: 'Essential Skills: Coding, SQL, Distributed Systems & Infrastructure Awareness' - startOffset: 667 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=667 - endOffset: 836 -- name: 'Data Scientist Scope: Data Cleaning, Feature Engineering, Model Cycle & Deployment' - startOffset: 836 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=836 - endOffset: 932 -- name: 'Tool Overlap: Spark & Python vs ML Libraries for Modeling' - startOffset: 932 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=932 - endOffset: 986 -- name: 'Collaboration Model: File Interfaces (Parquet) and Team Structures' - startOffset: 986 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=986 - endOffset: 1134 -- name: 'Case Study: Recommendation System — Streaming and Batch Pipeline Design' - startOffset: 1134 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1134 - endOffset: 1371 -- name: 'Streaming vs Batch Choices: Flink for Streaming, Spark for Batch, Parquet - on S3' - startOffset: 1371 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1371 - endOffset: 1420 -- name: 'ML Deployment Stack: MLflow, Kubeflow, Kubernetes & ML Engineer Roles' - startOffset: 1420 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1420 - endOffset: 1489 -- name: 'Cross-Skill Expectations: What Data Scientists Should Know About Pipelines' - startOffset: 1489 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1489 - endOffset: 1650 -- name: 'Upskilling for Engineers: Data Engineers Learning ML Inputs/Outputs (not - algorithms)' - startOffset: 1650 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1650 - endOffset: 1853 -- name: 'Transition Path: Analyst/Data Scientist → Data Engineer (coding, DBs, infra)' - startOffset: 1853 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1853 - endOffset: 2093 -- name: 'Databases to Learn: PostgreSQL, MySQL, MongoDB, Neo4j (SQL vs NoSQL)' - startOffset: 2093 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2093 - endOffset: 2167 -- name: 'Infrastructure Essentials: Docker, Cloud Services, Intro to Kubernetes' - startOffset: 2167 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2167 - endOffset: 2349 -- name: 'Data Quality & Monitoring: Flow Metrics, Spikes, and Schema Change Alerts' - startOffset: 2349 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2349 - endOffset: 2617 -- name: 'Data Documentation & Governance: Schema Descriptions, Confluence, HypeSQL' - startOffset: 2617 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2617 - endOffset: 2774 -- name: 'Software Engineering for Data Scientists: Code Quality, Reproducibility, - DB Skills' - startOffset: 2774 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2774 - endOffset: 2906 -- name: 'Hands-on Learning Resources: Katacoda, Google Codelabs, Databricks Trainings' - startOffset: 2906 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2906 - endOffset: 2969 -- name: 'Career Advice for Graduates: Choosing Data Engineering vs Data Science' - startOffset: 2969 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2969 - endOffset: 3076 -- name: 'Starter Projects: Word Count, Twitter Streaming, Elasticsearch + Kibana' - startOffset: 3076 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3076 - endOffset: 3208 -- name: 'Datasets for Practice: Wikipedia Dumps, CommonCrawl, NASA APIs, Social Media' - startOffset: 3208 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3208 - endOffset: 3368 -- name: 'Pre-built ETL Platforms vs Custom Pipelines: Trade-offs & Scalability' - startOffset: 3368 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3368 - endOffset: 3485 -- name: 'Operational Challenges: Deduplication, Historical Reprocessing, Risk Management' - startOffset: 3485 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3485 - endOffset: 3625 -- name: 'Data Versioning & Time Travel: Delta Lake for Reprocessing and Auditing' - startOffset: 3625 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3625 - endOffset: 3640 -- name: 'Learning Recommendations: Coursera Big Data Specialization; Spark & Data - books' - startOffset: 3640 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3640 - endOffset: 3754 -- name: 'Guest Links & Talks: Twitter, LinkedIn, YouTube (Alice & Kubernetes talks)' - startOffset: 3754 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3754 - endOffset: 3687 --- Links: diff --git a/_podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.md b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md similarity index 98% rename from _podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.md rename to _podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md index 68a0d4fa..8b5e0d58 100644 --- a/_podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.md +++ b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md @@ -1,20 +1,127 @@ --- +title: 'Actionable Biohacks to Boost Productivity: Sleep, Circadian Light, Dopamine & Habits' +short: Biohacking for Data Scientists and ML Engineers +season: 13 episode: 3 guests: - ruslanshchuchkin +image: images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg ids: anchor: ow/datatalksclub/episodes/Biohacking-for-Data-Scientists-and-ML-Engineers---Ruslan-Shchuchkin-e1vpm1i youtube: uyxUBADZYpU -image: images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Biohacking-for-Data-Scientists-and-ML-Engineers---Ruslan-Shchuchkin-e1vpm1i apple: https://podcasts.apple.com/us/podcast/biohacking-for-data-scientists-and-ml-engineers/id1541710331?i=1000603633848 spotify: https://open.spotify.com/episode/6IuHKMK4CJdcVJNq9uQ9lm?si=PgXZHBCNSu21Nma1ToxGyQ youtube: https://www.youtube.com/watch?v=uyxUBADZYpU -season: 13 -short: Biohacking for Data Scientists and ML Engineers -title: 'Actionable Biohacks to Boost Productivity: Sleep, Circadian Light, Dopamine - & Habits' + +description: Discover actionable biohacks for sleep and dopamine to boost productivity with 90-min cycles, morning light, habit tracking and energy-focused routines +intro: How do small, science-aligned biohacks actually move the needle on focus and productivity? In this episode, Ruslan Shchuchkin, a Berlin-based data scientist who transitioned from business/marketing into data science after experimenting with many techniques to stay focused, walks through practical, evidence-minded strategies for improving performance. We cover the root causes of procrastination and perfectionism, behavioral biohacking versus chemical interventions, and how dopamine-driven problem-solving fuels habits. Ruslan explains meditation and NSDR for prefrontal focus, morning sun and circadian light exposure to regulate cortisol and melatonin, and daylight lamps and wake lighting for low-daylight homes. He shares sleep planning tips based on 90-minute cycles, protein-forward nutrition for sustained focus, and caffeine timing trade-offs. You’ll also hear about habit tracking (logs and Notion dashboards), voluntary discomfort as a dopamine reset, failed experiments worth avoiding, safety considerations, and a simple prioritization framework. If you want actionable biohacks—sleep, circadian light, dopamine management, habit tracking, and meditation—to boost sustainable productivity, this episode offers concrete, practical steps grounded in real-world experience +topics: +- biohacking +- productivity +dateadded: 2023-03-11 + +duration: PT00H57M58S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=0 + endOffset: 87 +- name: Episode Overview & Guest Introduction + startOffset: 87 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=87 + endOffset: 138 +- name: 'Career Journey: From Business/Marketing to Data Science' + startOffset: 138 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=138 + endOffset: 291 +- name: 'Procrastination & Perfectionism: Acceptance and Deadline Effects' + startOffset: 291 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=291 + endOffset: 416 +- name: 'Biohacking Defined: Behavioral Approaches vs. Chemical Interventions' + startOffset: 416 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=416 + endOffset: 571 +- name: 'Dopamine & Motivation: Problem-Solving Rewards and Habit Drivers' + startOffset: 571 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=571 + endOffset: 742 +- name: 'Meditation Benefits: Focus, Prefrontal Cortex, and Non-Sleep Deep Rest' + startOffset: 742 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=742 + endOffset: 1121 +- name: 'Light Exposure & Circadian Health: Morning Sun, Cortisol, Melatonin' + startOffset: 1121 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1121 + endOffset: 1336 +- name: 'Evolutionary Perspective: Simple Behavioral Biohacks for Productivity' + startOffset: 1336 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1336 + endOffset: 1574 +- name: 'Daylight Lamps & Wake Lighting: Alternatives for Low-Daylight Homes' + startOffset: 1574 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1574 + endOffset: 1670 +- name: 'Sleep Planning: 90-Minute Cycles and Alarm Timing Strategies' + startOffset: 1670 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1670 + endOffset: 1978 +- name: 'Nutrition for Focus: Protein Breakfasts, Lunch Effects, and Energy' + startOffset: 1978 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1978 + endOffset: 2287 +- name: 'Productivity Tracking: Logs, Notion Dashboards, and Self-Reflection' + startOffset: 2287 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2287 + endOffset: 2476 +- name: 'Failed Experiments: Intermittent Fasting, Cold Showers, and Limits' + startOffset: 2476 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2476 + endOffset: 2605 +- name: 'Voluntary Discomfort & Dopamine Resets: Stoic Challenges' + startOffset: 2605 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2605 + endOffset: 2747 +- name: 'Safety Considerations: Evidence-Based Biohacking and Medical Advice' + startOffset: 2747 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2747 + endOffset: 2832 +- name: 'Caffeine Strategy: Coffee, Timing, and Sleep Trade-Offs' + startOffset: 2832 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2832 + endOffset: 2901 +- name: 'Habit Tracking in Practice: Steps, Exercise, Hydration Metrics' + startOffset: 2901 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2901 + endOffset: 3051 +- name: 'Mindset Shifts: Gratitude, Prioritization, and Sustainable Goals' + startOffset: 3051 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3051 + endOffset: 3178 +- name: 'Stoicism Reading Recommendation: Meditations and Mental Models' + startOffset: 3178 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3178 + endOffset: 3225 +- name: 'Prioritization Framework: Focusing on Five Impactful Goals' + startOffset: 3225 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3225 + endOffset: 3321 +- name: 'Overcoming Perfectionism: Self-Compassion and Temporal Perspective' + startOffset: 3321 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3321 + endOffset: 3443 +- name: 'Resources for Learning Biohacking: Huberman Lab Podcast & Top Episodes' + startOffset: 3443 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3443 + endOffset: 3533 +- name: Key Takeaways, Next Steps, and Episode Close + startOffset: 3533 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3533 + endOffset: 3478 + transcript: - header: Podcast Introduction - header: Episode Overview & Guest Introduction @@ -1421,123 +1528,6 @@ transcript: sec: 3565 time: '59:25' who: Alexey -description: Discover actionable biohacks for sleep and dopamine to boost productivity - with 90-min cycles, morning light, habit tracking and energy-focused routines. -intro: How do small, science-aligned biohacks actually move the needle on focus and - productivity? In this episode, Ruslan Shchuchkin, a Berlin-based data scientist who - transitioned from business/marketing into data science after experimenting with - many techniques to stay focused, walks through practical, evidence-minded strategies - for improving performance. We cover the root causes of procrastination and perfectionism, - behavioral biohacking versus chemical interventions, and how dopamine-driven problem-solving - fuels habits. Ruslan explains meditation and NSDR for prefrontal focus, morning - sun and circadian light exposure to regulate cortisol and melatonin, and daylight - lamps and wake lighting for low-daylight homes. He shares sleep planning tips based - on 90-minute cycles, protein-forward nutrition for sustained focus, and caffeine - timing trade-offs. You’ll also hear about habit tracking (logs and Notion dashboards), - voluntary discomfort as a dopamine reset, failed experiments worth avoiding, safety - considerations, and a simple prioritization framework. If you want actionable biohacks—sleep, - circadian light, dopamine management, habit tracking, and meditation—to boost sustainable - productivity, this episode offers concrete, practical steps grounded in real-world - experience. -dateadded: '2023-03-11' -duration: PT00H57M58S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=0 - endOffset: 87 -- name: Episode Overview & Guest Introduction - startOffset: 87 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=87 - endOffset: 138 -- name: 'Career Journey: From Business/Marketing to Data Science' - startOffset: 138 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=138 - endOffset: 291 -- name: 'Procrastination & Perfectionism: Acceptance and Deadline Effects' - startOffset: 291 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=291 - endOffset: 416 -- name: 'Biohacking Defined: Behavioral Approaches vs. Chemical Interventions' - startOffset: 416 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=416 - endOffset: 571 -- name: 'Dopamine & Motivation: Problem-Solving Rewards and Habit Drivers' - startOffset: 571 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=571 - endOffset: 742 -- name: 'Meditation Benefits: Focus, Prefrontal Cortex, and Non-Sleep Deep Rest' - startOffset: 742 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=742 - endOffset: 1121 -- name: 'Light Exposure & Circadian Health: Morning Sun, Cortisol, Melatonin' - startOffset: 1121 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1121 - endOffset: 1336 -- name: 'Evolutionary Perspective: Simple Behavioral Biohacks for Productivity' - startOffset: 1336 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1336 - endOffset: 1574 -- name: 'Daylight Lamps & Wake Lighting: Alternatives for Low-Daylight Homes' - startOffset: 1574 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1574 - endOffset: 1670 -- name: 'Sleep Planning: 90-Minute Cycles and Alarm Timing Strategies' - startOffset: 1670 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1670 - endOffset: 1978 -- name: 'Nutrition for Focus: Protein Breakfasts, Lunch Effects, and Energy' - startOffset: 1978 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1978 - endOffset: 2287 -- name: 'Productivity Tracking: Logs, Notion Dashboards, and Self-Reflection' - startOffset: 2287 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2287 - endOffset: 2476 -- name: 'Failed Experiments: Intermittent Fasting, Cold Showers, and Limits' - startOffset: 2476 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2476 - endOffset: 2605 -- name: 'Voluntary Discomfort & Dopamine Resets: Stoic Challenges' - startOffset: 2605 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2605 - endOffset: 2747 -- name: 'Safety Considerations: Evidence-Based Biohacking and Medical Advice' - startOffset: 2747 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2747 - endOffset: 2832 -- name: 'Caffeine Strategy: Coffee, Timing, and Sleep Trade-Offs' - startOffset: 2832 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2832 - endOffset: 2901 -- name: 'Habit Tracking in Practice: Steps, Exercise, Hydration Metrics' - startOffset: 2901 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2901 - endOffset: 3051 -- name: 'Mindset Shifts: Gratitude, Prioritization, and Sustainable Goals' - startOffset: 3051 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3051 - endOffset: 3178 -- name: 'Stoicism Reading Recommendation: Meditations and Mental Models' - startOffset: 3178 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3178 - endOffset: 3225 -- name: 'Prioritization Framework: Focusing on Five Impactful Goals' - startOffset: 3225 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3225 - endOffset: 3321 -- name: 'Overcoming Perfectionism: Self-Compassion and Temporal Perspective' - startOffset: 3321 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3321 - endOffset: 3443 -- name: 'Resources for Learning Biohacking: Huberman Lab Podcast & Top Episodes' - startOffset: 3443 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3443 - endOffset: 3533 -- name: Key Takeaways, Next Steps, and Episode Close - startOffset: 3533 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3533 - endOffset: 3478 --- Links: diff --git a/_podcast/s07e03-product-management-essentials.md b/_podcast/build-and-scale-ai-data-products-with-mlops.md similarity index 97% rename from _podcast/s07e03-product-management-essentials.md rename to _podcast/build-and-scale-ai-data-products-with-mlops.md index d48b5730..cf108398 100644 --- a/_podcast/s07e03-product-management-essentials.md +++ b/_podcast/build-and-scale-ai-data-products-with-mlops.md @@ -1,11 +1,11 @@ --- title: 'Build & Scale Data Products for AI: Roadmaps, MLOps, Customer Research & Metrics' short: Product Management Essentials for Data Professionals +season: 7 +episode: 3 guests: - gregcoquillo image: images/podcast/s07e03-product-management-essentials.jpg -season: 7 -episode: 3 ids: youtube: p4wg0Vd2uD4 anchor: Product-Management-Essentials-for-Data-Professionals---Greg-Coquillo-e1dr8g5 @@ -14,6 +14,95 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Product-Management-Essentials-for-Data-Professionals---Greg-Coquillo-e1dr8g5 spotify: https://open.spotify.com/episode/1Oh6ewUJ2c1jiVcKxWIwDU apple: https://podcasts.apple.com/us/podcast/product-management-essentials-for-data-professionals/id1541710331?i=1000550093434 + +description: Build scalable data products with MLOps roadmaps, customer research and metric-driven templates - prioritize impact, reduce failures, and measure success +intro: How do you move from proofs-of-concept to scalable AI data products that deliver measurable business value? In this episode, Greg Coquillo, a Technology Manager at Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, walks through practical approaches for building and scaling data products, MLOps, customer research, and metrics.

We cover Greg’s transition into AI product work and the role of data product managers (internal vs. external), then dive into customer journey mapping, domain knowledge, and structured customer research—interview techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how to work backwards from business problems, contribute technical input to roadmaps with T‑shirt sizing, and prioritize MLOps by spotting unscalable manual processes. Greg outlines three‑year roadmap thinking (impact, effort, cost), a pragmatic Excel template (problems → solutions → metrics), and SMART and operational metrics like pipeline failures, SLAs, and data quality. He also addresses operating without a PM, aligning team mental models, and on‑the‑job product skill development.

Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer research, and defining success metrics for AI-driven data products +dateadded: 2022-02-06 + +duration: PT00H59M41S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=0 + endOffset: 103 +- name: Career Background & Transition to AI Products + startOffset: 103 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=103 + endOffset: 401 +- name: Role & Responsibilities of Data Product Managers (Internal vs External) + startOffset: 401 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=401 + endOffset: 843 +- name: Customer Journey & Domain Knowledge for Data Professionals + startOffset: 843 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=843 + endOffset: 1081 +- name: 'Customer Research Techniques: Interviews & Documentation' + startOffset: 1081 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1081 + endOffset: 1228 +- name: Structuring Interviews with Business Partners & the Five Whys + startOffset: 1228 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1228 + endOffset: 1400 +- name: Hypothesis Testing & Working Backwards from Business Problems + startOffset: 1400 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1400 + endOffset: 1585 +- name: Product Sense & Product Mindset Explained + startOffset: 1585 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1585 + endOffset: 1733 +- name: 'Contributing to Roadmaps: Technical Input & T‑Shirt Sizing' + startOffset: 1733 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1733 + endOffset: 1905 +- name: 'Working Backwards: Problem‑First Feature Design' + startOffset: 1905 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1905 + endOffset: 2134 +- name: 'Roadmap Types: Technical Roadmaps, MLOps & Scaling Strategies' + startOffset: 2134 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2134 + endOffset: 2341 +- name: Identifying Unscalable Manual Processes & ML Ops Prioritization + startOffset: 2341 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2341 + endOffset: 2504 +- name: 'Three‑Year Roadmap: Prioritization by Impact, Effort & Cost' + startOffset: 2504 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2504 + endOffset: 2838 +- name: 'Practical Roadmap Template in Excel: Problems → Solutions → Metrics' + startOffset: 2838 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2838 + endOffset: 3071 +- name: Success Metrics & SMART Goals for Internal Data Platforms + startOffset: 3071 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3071 + endOffset: 3207 +- name: 'Operational Metrics: Pipeline Failures, SLAs & Data Quality' + startOffset: 3207 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3207 + endOffset: 3332 +- name: 'Operating Without a PM: Identify Customers & Validate Work' + startOffset: 3332 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3332 + endOffset: 3464 +- name: Team Autonomy & Aligning Mental Models for Product Success + startOffset: 3464 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3464 + endOffset: 3522 +- name: 'Career Advice: Learn Product Skills on the Job & Follow‑up Resources' + startOffset: 3522 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3522 + endOffset: 3647 +- name: Episode Close & How to Connect with Guest + startOffset: 3647 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3647 + endOffset: 3581 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we'll talk about learning product management. We have a special @@ -867,107 +956,6 @@ transcript: sec: 3647 time: '1:00:47' who: Alexey -description: Build scalable data products with MLOps roadmaps, customer research and - metric-driven templates - prioritize impact, reduce failures, and measure success. -intro: How do you move from proofs-of-concept to scalable AI data products that deliver - measurable business value? In this episode, Greg Coquillo, a Technology Manager at - Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, - walks through practical approaches for building and scaling data products, MLOps, - customer research, and metrics.

We cover Greg’s transition into AI product - work and the role of data product managers (internal vs. external), then dive into - customer journey mapping, domain knowledge, and structured customer research—interview - techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how - to work backwards from business problems, contribute technical input to roadmaps - with T‑shirt sizing, and prioritize MLOps by spotting unscalable manual processes. - Greg outlines three‑year roadmap thinking (impact, effort, cost), a pragmatic Excel - template (problems → solutions → metrics), and SMART and operational metrics like - pipeline failures, SLAs, and data quality. He also addresses operating without a - PM, aligning team mental models, and on‑the‑job product skill development.

- Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer - research, and defining success metrics for AI-driven data products. -dateadded: '2022-02-06' -duration: PT00H59M41S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=0 - endOffset: 103 -- name: Career Background & Transition to AI Products - startOffset: 103 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=103 - endOffset: 401 -- name: Role & Responsibilities of Data Product Managers (Internal vs External) - startOffset: 401 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=401 - endOffset: 843 -- name: Customer Journey & Domain Knowledge for Data Professionals - startOffset: 843 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=843 - endOffset: 1081 -- name: 'Customer Research Techniques: Interviews & Documentation' - startOffset: 1081 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1081 - endOffset: 1228 -- name: Structuring Interviews with Business Partners & the Five Whys - startOffset: 1228 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1228 - endOffset: 1400 -- name: Hypothesis Testing & Working Backwards from Business Problems - startOffset: 1400 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1400 - endOffset: 1585 -- name: Product Sense & Product Mindset Explained - startOffset: 1585 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1585 - endOffset: 1733 -- name: 'Contributing to Roadmaps: Technical Input & T‑Shirt Sizing' - startOffset: 1733 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1733 - endOffset: 1905 -- name: 'Working Backwards: Problem‑First Feature Design' - startOffset: 1905 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1905 - endOffset: 2134 -- name: 'Roadmap Types: Technical Roadmaps, MLOps & Scaling Strategies' - startOffset: 2134 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2134 - endOffset: 2341 -- name: Identifying Unscalable Manual Processes & ML Ops Prioritization - startOffset: 2341 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2341 - endOffset: 2504 -- name: 'Three‑Year Roadmap: Prioritization by Impact, Effort & Cost' - startOffset: 2504 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2504 - endOffset: 2838 -- name: 'Practical Roadmap Template in Excel: Problems → Solutions → Metrics' - startOffset: 2838 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2838 - endOffset: 3071 -- name: Success Metrics & SMART Goals for Internal Data Platforms - startOffset: 3071 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3071 - endOffset: 3207 -- name: 'Operational Metrics: Pipeline Failures, SLAs & Data Quality' - startOffset: 3207 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3207 - endOffset: 3332 -- name: 'Operating Without a PM: Identify Customers & Validate Work' - startOffset: 3332 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3332 - endOffset: 3464 -- name: Team Autonomy & Aligning Mental Models for Product Success - startOffset: 3464 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3464 - endOffset: 3522 -- name: 'Career Advice: Learn Product Skills on the Job & Follow‑up Resources' - startOffset: 3522 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3522 - endOffset: 3647 -- name: Episode Close & How to Connect with Guest - startOffset: 3647 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3647 - endOffset: 3581 --- Links: diff --git a/_podcast/s15e09-data-engineering-for-fraud-prevention.md b/_podcast/build-and-scale-data-engineering-systems-for-fraud-detection.md similarity index 97% rename from _podcast/s15e09-data-engineering-for-fraud-prevention.md rename to _podcast/build-and-scale-data-engineering-systems-for-fraud-detection.md index 9c00ddce..87b23e03 100644 --- a/_podcast/s15e09-data-engineering-for-fraud-prevention.md +++ b/_podcast/build-and-scale-data-engineering-systems-for-fraud-detection.md @@ -1,20 +1,148 @@ --- +title: 'Build and Scale Data Engineering Systems for Fraud Detection: Feature Pipelines, Real-Time Inference, Graph Databases & Production Debugging' +short: Data Engineering for Fraud Prevention +season: 15 episode: 9 guests: - angelaramirez +image: images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg ids: anchor: atatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab youtube: ZXNKjrrKU_I -image: images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab apple: https://podcasts.apple.com/us/podcast/data-engineering-for-fraud-prevention-angela-ramirez/id1541710331?i=1000630468398 spotify: https://open.spotify.com/episode/4wpYwS8XTlNdws39Zynakf?si=OFAHIkVsQlKvdTnlFNaLGg youtube: https://www.youtube.com/watch?v=ZXNKjrrKU_I -season: 15 -short: Data Engineering for Fraud Prevention -title: 'Retail Fraud Detection with Data Engineering: Real-Time Scoring, Graphs & - MLOps' + +description: "Learn retail fraud detection with real-time scoring and MLOps: build data pipelines, graph investigations, and instant cashier decisions to cut losses." +intro: How do you build data infrastructure that stops stolen-card transactions and return abuse in real time? In this episode, Angela Ramirez, a Sam’s Club data engineer who moved from Sephora and specializes in machine learning for fraud prevention, walks through the engineering behind retail fraud detection. Drawing on her background in NLP and four years as a data engineer, Angela explains pipelines, feature engineering workflows that combine daily batches with real-time scoring, and the MLOps responsibilities for model metrics, deployment, and monitoring.

We cover system design best practices—stakeholder alignment, timing, documentation—and data modeling tradeoffs across relational, document (Elasticsearch), and graph databases (SPARQL, Neo4j) to support network features connecting members, transactions, and products. Angela also discusses hybrid architectures for instant inference, tooling like PySpark, Pandas/PyArrow, Cassandra, GCP/Dataproc, and data quality practices (Great Expectations), plus operational debugging and scaling patterns. Listen to learn practical approaches to real-time scoring, graph-powered investigations, and the engineering decisions that make retail fraud detection reliable and actionable +dateadded: 2023-10-07 + +duration: PT00H59M19S + +quotableClips: +- name: Podcast Introduction & Guest Overview (Angela Ramirez) + startOffset: 0 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=0 + endOffset: 161 +- name: 'Career Journey: Sephora to Sam''s Club' + startOffset: 161 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=161 + endOffset: 225 +- name: 'Fraud Detection in Retail: Stolen Cards & Return Abuse' + startOffset: 225 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=225 + endOffset: 382 +- name: 'Data Engineering for Fraud: Pipelines, Features, Dashboards' + startOffset: 382 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=382 + endOffset: 504 +- name: 'Feature Engineering Workflow: Daily Batches + Real-Time Scoring' + startOffset: 504 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=504 + endOffset: 588 +- name: 'MLOps Responsibilities: Model Metrics, Deployment, Monitoring' + startOffset: 588 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=588 + endOffset: 679 +- name: 'Team Structure: Data Engineers, ML Engineers, Data Scientists' + startOffset: 679 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=679 + endOffset: 768 +- name: 'Academic Background: Cognitive Science, NLP, HCI' + startOffset: 768 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=768 + endOffset: 854 +- name: 'Data-Centric Mindset: Why Data Engineering Powers ML' + startOffset: 854 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=854 + endOffset: 962 +- name: 'Career Transition: Process Improvement → Data Analyst → Data Engineer' + startOffset: 962 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=962 + endOffset: 1155 +- name: 'System Design Best Practices: Stakeholders, Timing, Documentation' + startOffset: 1155 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1155 + endOffset: 1230 +- name: 'Data Modeling Decisions: Relational vs Document vs Graph' + startOffset: 1230 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1230 + endOffset: 1290 +- name: Elasticsearch & Document Indexing for Entity Data + startOffset: 1290 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1290 + endOffset: 1384 +- name: 'Graph Databases & SPARQL: Wikidata and Entity Relationships' + startOffset: 1384 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1384 + endOffset: 1755 +- name: 'Network Features for Fraud: Members, Transactions, Products' + startOffset: 1755 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1755 + endOffset: 2014 +- name: 'Real-Time Decisioning: Front-End Signals for Cashiers & Security' + startOffset: 2014 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2014 + endOffset: 2086 +- name: 'Hybrid Architecture: Batch Computation with Instant Inference' + startOffset: 2086 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2086 + endOffset: 2133 +- name: 'Database Selection Criteria: Static Schema vs Dynamic Data' + startOffset: 2133 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2133 + endOffset: 2291 +- name: 'Graph Visualization for Investigations: Neo4j Use Cases' + startOffset: 2291 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2291 + endOffset: 2450 +- name: 'Software Engineering for Data Engineers: Testing & Code Quality (PySpark)' + startOffset: 2450 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2450 + endOffset: 2608 +- name: 'Data Quality Tooling: Great Expectations and Cloud Monitoring' + startOffset: 2608 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2608 + endOffset: 2681 +- name: 'Operational Challenges: Job Failures, Schema Changes, Scaling' + startOffset: 2681 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2681 + endOffset: 2901 +- name: 'Debugging Playbook: Logs, Runbooks, and Error Documentation' + startOffset: 2901 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2901 + endOffset: 3023 +- name: 'Tech Stack Overview: GCP, Dataproc/Databricks, PySpark, Cassandra' + startOffset: 3023 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3023 + endOffset: 3083 +- name: 'Managed vs Serverless Spark: Dataproc, EMR, Serverless Execution' + startOffset: 3083 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3083 + endOffset: 3198 +- name: 'Pandas & PyArrow: Performance Improvements for Big Data' + startOffset: 3198 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3198 + endOffset: 3297 +- name: 'Cassandra Use Cases: Scalability, Fault Tolerance, Clusters' + startOffset: 3297 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3297 + endOffset: 3379 +- name: 'External Data Integration: APIs, Data Contracts, Stability' + startOffset: 3379 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3379 + endOffset: 3600 +- name: 'Recommended Resources: Designing Data-Intensive Applications, PySpark, SQL' + startOffset: 3600 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3600 + endOffset: 3675 +- name: Episode Wrap-Up & Contact Links + startOffset: 3675 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3675 + endOffset: 3559 + transcript: - header: Podcast Introduction & Guest Overview (Angela Ramirez) - line: This week, we will talk about data engineering and fraud detection. We have @@ -1151,146 +1279,6 @@ transcript: sec: 3675 time: '1:01:15' who: Alexey -description: 'Learn retail fraud detection with real-time scoring and MLOps: build - data pipelines, graph investigations, and instant cashier decisions to cut losses.' -intro: How do you build data infrastructure that stops stolen-card transactions and - return abuse in real time? In this episode, Angela Ramirez, a Sam’s Club data engineer - who moved from Sephora and specializes in machine learning for fraud prevention, - walks through the engineering behind retail fraud detection. Drawing on her background - in NLP and four years as a data engineer, Angela explains pipelines, feature engineering - workflows that combine daily batches with real-time scoring, and the MLOps responsibilities - for model metrics, deployment, and monitoring.

We cover system design best - practices—stakeholder alignment, timing, documentation—and data modeling tradeoffs - across relational, document (Elasticsearch), and graph databases (SPARQL, Neo4j) - to support network features connecting members, transactions, and products. Angela - also discusses hybrid architectures for instant inference, tooling like PySpark, - Pandas/PyArrow, Cassandra, GCP/Dataproc, and data quality practices (Great Expectations), - plus operational debugging and scaling patterns. Listen to learn practical approaches - to real-time scoring, graph-powered investigations, and the engineering decisions - that make retail fraud detection reliable and actionable. -dateadded: '2023-10-07' -duration: PT00H59M19S -quotableClips: -- name: Podcast Introduction & Guest Overview (Angela Ramirez) - startOffset: 0 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=0 - endOffset: 161 -- name: 'Career Journey: Sephora to Sam''s Club' - startOffset: 161 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=161 - endOffset: 225 -- name: 'Fraud Detection in Retail: Stolen Cards & Return Abuse' - startOffset: 225 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=225 - endOffset: 382 -- name: 'Data Engineering for Fraud: Pipelines, Features, Dashboards' - startOffset: 382 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=382 - endOffset: 504 -- name: 'Feature Engineering Workflow: Daily Batches + Real-Time Scoring' - startOffset: 504 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=504 - endOffset: 588 -- name: 'MLOps Responsibilities: Model Metrics, Deployment, Monitoring' - startOffset: 588 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=588 - endOffset: 679 -- name: 'Team Structure: Data Engineers, ML Engineers, Data Scientists' - startOffset: 679 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=679 - endOffset: 768 -- name: 'Academic Background: Cognitive Science, NLP, HCI' - startOffset: 768 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=768 - endOffset: 854 -- name: 'Data-Centric Mindset: Why Data Engineering Powers ML' - startOffset: 854 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=854 - endOffset: 962 -- name: 'Career Transition: Process Improvement → Data Analyst → Data Engineer' - startOffset: 962 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=962 - endOffset: 1155 -- name: 'System Design Best Practices: Stakeholders, Timing, Documentation' - startOffset: 1155 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1155 - endOffset: 1230 -- name: 'Data Modeling Decisions: Relational vs Document vs Graph' - startOffset: 1230 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1230 - endOffset: 1290 -- name: Elasticsearch & Document Indexing for Entity Data - startOffset: 1290 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1290 - endOffset: 1384 -- name: 'Graph Databases & SPARQL: Wikidata and Entity Relationships' - startOffset: 1384 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1384 - endOffset: 1755 -- name: 'Network Features for Fraud: Members, Transactions, Products' - startOffset: 1755 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1755 - endOffset: 2014 -- name: 'Real-Time Decisioning: Front-End Signals for Cashiers & Security' - startOffset: 2014 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2014 - endOffset: 2086 -- name: 'Hybrid Architecture: Batch Computation with Instant Inference' - startOffset: 2086 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2086 - endOffset: 2133 -- name: 'Database Selection Criteria: Static Schema vs Dynamic Data' - startOffset: 2133 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2133 - endOffset: 2291 -- name: 'Graph Visualization for Investigations: Neo4j Use Cases' - startOffset: 2291 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2291 - endOffset: 2450 -- name: 'Software Engineering for Data Engineers: Testing & Code Quality (PySpark)' - startOffset: 2450 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2450 - endOffset: 2608 -- name: 'Data Quality Tooling: Great Expectations and Cloud Monitoring' - startOffset: 2608 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2608 - endOffset: 2681 -- name: 'Operational Challenges: Job Failures, Schema Changes, Scaling' - startOffset: 2681 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2681 - endOffset: 2901 -- name: 'Debugging Playbook: Logs, Runbooks, and Error Documentation' - startOffset: 2901 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2901 - endOffset: 3023 -- name: 'Tech Stack Overview: GCP, Dataproc/Databricks, PySpark, Cassandra' - startOffset: 3023 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3023 - endOffset: 3083 -- name: 'Managed vs Serverless Spark: Dataproc, EMR, Serverless Execution' - startOffset: 3083 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3083 - endOffset: 3198 -- name: 'Pandas & PyArrow: Performance Improvements for Big Data' - startOffset: 3198 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3198 - endOffset: 3297 -- name: 'Cassandra Use Cases: Scalability, Fault Tolerance, Clusters' - startOffset: 3297 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3297 - endOffset: 3379 -- name: 'External Data Integration: APIs, Data Contracts, Stability' - startOffset: 3379 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3379 - endOffset: 3600 -- name: 'Recommended Resources: Designing Data-Intensive Applications, PySpark, SQL' - startOffset: 3600 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3600 - endOffset: 3675 -- name: Episode Wrap-Up & Contact Links - startOffset: 3675 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3675 - endOffset: 3559 --- Links: diff --git a/_podcast/s01e03-building-ds-team.md b/_podcast/build-data-team.md similarity index 97% rename from _podcast/s01e03-building-ds-team.md rename to _podcast/build-data-team.md index 07343e62..2d8188df 100644 --- a/_podcast/s01e03-building-ds-team.md +++ b/_podcast/build-data-team.md @@ -1,11 +1,11 @@ --- title: 'How to Build and Scale ML Teams: Hiring, MLOps & Product-Driven AI for Startups' short: Building a Data Science Team +season: 1 +episode: 3 guests: - dattran image: images/podcast/s01e03-building-ds-team.jpg -season: 1 -episode: 3 ids: youtube: ScDIB-3O77A anchor: Building-a-Data-Science-Team---Dat-Tran-enlmef @@ -14,21 +14,141 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-a-Data-Science-Team---Dat-Tran-enlmef spotify: https://open.spotify.com/episode/0daFpY1z2J4Uop1XdMNsnY apple: https://podcasts.apple.com/us/podcast/building-a-data-science-team-dat-tran/id1541710331?i=1000502061864 -intro: 'How do you build and scale an ML team that delivers product-driven AI without - getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner - & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks - through practical strategies for hiring, MLOps, and shaping data teams for startups. -

Dat draws on a decade of production ML experience to unpack the MLOps mindset - (day‑two operations, model maintenance), how to hire early (T‑shaped generalists, - take‑home assessments, key hiring signals), and when to shift to specialists as - you scale. He also explains product-centric practices: aligning hiring to prototype - vs. MVP needs, prioritizing impact over technical perfection, and building human‑centric - AI (augmenting pricing managers at Priceloop). Other topics include open research - and open source as strategic advantages, bootstrapping data capabilities, retention - through autonomy and interesting work, and educating leadership about realistic - AI expectations.

Listen for actionable guidance on building ML teams, hiring - machine learning engineers, and implementing MLOps and product-driven AI in early‑stage - startups.' + +description: 'Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven AI for startups—scale with T-shaped engineers, ship robust models.' +intro: 'How do you build and scale an ML team that delivers product-driven AI without getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks through practical strategies for hiring, MLOps, and shaping data teams for startups.

Dat draws on a decade of production ML experience to unpack the MLOps mindset (day‑two operations, model maintenance), how to hire early (T‑shaped generalists, take‑home assessments, key hiring signals), and when to shift to specialists as you scale. He also explains product-centric practices: aligning hiring to prototype vs. MVP needs, prioritizing impact over technical perfection, and building human‑centric AI (augmenting pricing managers at Priceloop). Other topics include open research and open source as strategic advantages, bootstrapping data capabilities, retention through autonomy and interesting work, and educating leadership about realistic AI expectations.

Listen for actionable guidance on building ML teams, hiring machine learning engineers, and implementing MLOps and product-driven AI in early‑stage startups.' +topics: +- leadership +- team building +- machine learning +- MLOps +- startup +dateadded: 2021-02-23 + +duration: PT00H58M44S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=0 + endOffset: 126 +- name: Guest Overview & Career Snapshot + startOffset: 126 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=126 + endOffset: 192 +- name: 'Early Background: Economics, Investment Banking & Early Coding' + startOffset: 192 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=192 + endOffset: 263 +- name: From VBA Automation to Machine Learning Interest + startOffset: 263 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=263 + endOffset: 373 +- name: 'Accenture & Big Data: Spark, MPP Databases and Early ML Projects' + startOffset: 373 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=373 + endOffset: 486 +- name: 'Pivotal Experience: Production ML, DevOps Practices & Engineering Rigor' + startOffset: 486 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=486 + endOffset: 560 +- name: 'MLOps Mindset: Day‑Two Operations and Model Maintenance' + startOffset: 560 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=560 + endOffset: 667 +- name: Creating a Head of Data Role at Idealo + startOffset: 667 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=667 + endOffset: 804 +- name: 'Team Building & Open Source: Sustainable Machine Learning Culture' + startOffset: 804 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=804 + endOffset: 908 +- name: 'Axel Springer: Corporate Tech Transformation, Research & Evangelism' + startOffset: 908 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=908 + endOffset: 1158 +- name: 'Career Transition: Leaving Corporate to Found a Startup' + startOffset: 1158 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1158 + endOffset: 1226 +- name: 'Founding Priceloop: Technical Co‑founder and Pricing Opportunity' + startOffset: 1226 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1226 + endOffset: 1399 +- name: 'Pricing Product Vision: White‑Box AI Framework for Dynamic Pricing' + startOffset: 1399 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1399 + endOffset: 1492 +- name: 'Human‑Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' + startOffset: 1492 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1492 + endOffset: 1525 +- name: 'Early‑Stage Hiring Plan: Building a Tactical Product Team' + startOffset: 1525 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1525 + endOffset: 1645 +- name: 'Open Research Strategy: Community, Open‑Source & Competitive Advantage' + startOffset: 1645 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1645 + endOffset: 1737 +- name: 'Aligning Hiring with Vision: Prototype, MVP & Feature Uncertainty' + startOffset: 1737 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1737 + endOffset: 1780 +- name: 'Cross‑Functional Roles: ML Engineers, Data Engineers, PMs & Designers' + startOffset: 1780 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1780 + endOffset: 1839 +- name: 'Generalists First: T‑Shaped Engineers for Early Startups' + startOffset: 1839 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1839 + endOffset: 2015 +- name: 'Mid‑Stage Hiring: Shifting Toward Specialists as Maturity Grows' + startOffset: 2015 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2015 + endOffset: 2243 +- name: 'Product‑Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' + startOffset: 2243 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2243 + endOffset: 2371 +- name: 'Encouraging Open Source: Managerial Coaching and Leading by Example' + startOffset: 2371 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2371 + endOffset: 2607 +- name: 'Hiring Signals: CVs, Coding Skills, Math Background & Soft Skills' + startOffset: 2607 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2607 + endOffset: 2851 +- name: 'Take‑Home Assessments: Code Quality, Naming, Consistency & Detail' + startOffset: 2851 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2851 + endOffset: 2991 +- name: 'Project Prioritization: Impact vs Technical Feasibility & Fail‑Fast' + startOffset: 2991 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2991 + endOffset: 3152 +- name: 'Bootstrapping Data Teams: When to Hire Engineers Versus Analysts' + startOffset: 3152 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3152 + endOffset: 3215 +- name: 'Corporate IT in a Tech Transformation: From Central IT to DevOps' + startOffset: 3215 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3215 + endOffset: 3263 +- name: 'Retention Strategies: Competitive Pay, Interesting Work & Autonomy' + startOffset: 3263 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3263 + endOffset: 3400 +- name: 'Expectation Management: Educating Leadership on AI Capabilities' + startOffset: 3400 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3400 + endOffset: 3619 +- name: Episode Wrap‑Up & Key Takeaways + startOffset: 3619 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3619 + endOffset: 3524 + transcript: - header: Podcast Introduction - header: Guest Overview & Career Snapshot @@ -967,131 +1087,6 @@ transcript: sec: 3650 time: '60:50' who: Alexey -description: 'Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven - AI for startups—scale with T-shaped engineers, ship robust models.' -dateadded: '2021-02-23' -duration: PT00H58M44S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=0 - endOffset: 126 -- name: Guest Overview & Career Snapshot - startOffset: 126 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=126 - endOffset: 192 -- name: 'Early Background: Economics, Investment Banking & Early Coding' - startOffset: 192 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=192 - endOffset: 263 -- name: From VBA Automation to Machine Learning Interest - startOffset: 263 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=263 - endOffset: 373 -- name: 'Accenture & Big Data: Spark, MPP Databases and Early ML Projects' - startOffset: 373 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=373 - endOffset: 486 -- name: 'Pivotal Experience: Production ML, DevOps Practices & Engineering Rigor' - startOffset: 486 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=486 - endOffset: 560 -- name: 'MLOps Mindset: Day‑Two Operations and Model Maintenance' - startOffset: 560 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=560 - endOffset: 667 -- name: Creating a Head of Data Role at Idealo - startOffset: 667 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=667 - endOffset: 804 -- name: 'Team Building & Open Source: Sustainable Machine Learning Culture' - startOffset: 804 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=804 - endOffset: 908 -- name: 'Axel Springer: Corporate Tech Transformation, Research & Evangelism' - startOffset: 908 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=908 - endOffset: 1158 -- name: 'Career Transition: Leaving Corporate to Found a Startup' - startOffset: 1158 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1158 - endOffset: 1226 -- name: 'Founding Priceloop: Technical Co‑founder and Pricing Opportunity' - startOffset: 1226 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1226 - endOffset: 1399 -- name: 'Pricing Product Vision: White‑Box AI Framework for Dynamic Pricing' - startOffset: 1399 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1399 - endOffset: 1492 -- name: 'Human‑Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' - startOffset: 1492 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1492 - endOffset: 1525 -- name: 'Early‑Stage Hiring Plan: Building a Tactical Product Team' - startOffset: 1525 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1525 - endOffset: 1645 -- name: 'Open Research Strategy: Community, Open‑Source & Competitive Advantage' - startOffset: 1645 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1645 - endOffset: 1737 -- name: 'Aligning Hiring with Vision: Prototype, MVP & Feature Uncertainty' - startOffset: 1737 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1737 - endOffset: 1780 -- name: 'Cross‑Functional Roles: ML Engineers, Data Engineers, PMs & Designers' - startOffset: 1780 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1780 - endOffset: 1839 -- name: 'Generalists First: T‑Shaped Engineers for Early Startups' - startOffset: 1839 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1839 - endOffset: 2015 -- name: 'Mid‑Stage Hiring: Shifting Toward Specialists as Maturity Grows' - startOffset: 2015 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2015 - endOffset: 2243 -- name: 'Product‑Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' - startOffset: 2243 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2243 - endOffset: 2371 -- name: 'Encouraging Open Source: Managerial Coaching and Leading by Example' - startOffset: 2371 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2371 - endOffset: 2607 -- name: 'Hiring Signals: CVs, Coding Skills, Math Background & Soft Skills' - startOffset: 2607 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2607 - endOffset: 2851 -- name: 'Take‑Home Assessments: Code Quality, Naming, Consistency & Detail' - startOffset: 2851 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2851 - endOffset: 2991 -- name: 'Project Prioritization: Impact vs Technical Feasibility & Fail‑Fast' - startOffset: 2991 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2991 - endOffset: 3152 -- name: 'Bootstrapping Data Teams: When to Hire Engineers Versus Analysts' - startOffset: 3152 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3152 - endOffset: 3215 -- name: 'Corporate IT in a Tech Transformation: From Central IT to DevOps' - startOffset: 3215 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3215 - endOffset: 3263 -- name: 'Retention Strategies: Competitive Pay, Interesting Work & Autonomy' - startOffset: 3263 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3263 - endOffset: 3400 -- name: 'Expectation Management: Educating Leadership on AI Capabilities' - startOffset: 3400 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3400 - endOffset: 3619 -- name: Episode Wrap‑Up & Key Takeaways - startOffset: 3619 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3619 - endOffset: 3524 --- ## Books diff --git a/_podcast/s14e09-interpretable-ai-and-ml.md b/_podcast/build-explainable-and-actionable-ai-ml-systems.md similarity index 97% rename from _podcast/s14e09-interpretable-ai-and-ml.md rename to _podcast/build-explainable-and-actionable-ai-ml-systems.md index eaf25149..f9b8f739 100644 --- a/_podcast/s14e09-interpretable-ai-and-ml.md +++ b/_podcast/build-explainable-and-actionable-ai-ml-systems.md @@ -1,20 +1,148 @@ --- +title: 'Build Explainable and Actionable AI/ML Systems: Industrial PhD, Trust Theory & Production Deployment' +short: Build Explainable and Actionable AI/ML Systems +season: 14 episode: 9 guests: - polinamosolova +image: images/podcast/s14e09-interpretable-ai-and-ml.jpg ids: anchor: atatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq youtube: EQcY83VA0Us -image: images/podcast/s14e09-interpretable-ai-and-ml.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq apple: https://podcasts.apple.com/us/podcast/interpretable-ai-and-ml-polina-mosolova/id1541710331?i=1000619926085 spotify: https://open.spotify.com/episode/0p84r6bZmgKO514oC1HE2L?si=30L5gJoSS6Wtrghtdr3jYA youtube: https://www.youtube.com/watch?v=EQcY83VA0Us -season: 14 -short: Interpretable AI and ML -title: 'Actionable Churn Prediction: Explainable AI, Organizational Trust (ABI) & - MLOps' + +description: "Build trustworthy ML systems that drive business decisions through explainable AI, organizational trust theory, and actionable model deployment." +intro: "How do you build ML systems that business teams trust and can act on? In this episode, Polina Mosolova — a data scientist at SAP who completed an industrial PhD building end‑to‑end ML pipelines — demonstrates how to bridge research and production through explainable AI grounded in organizational trust theory. Drawing from her churn prediction research, Polina shows how the ABI framework (Ability, Benevolence, Integrity) transforms model explanations into actionable business interventions.

We explore the industrial PhD path as a vehicle for building trustworthy ML systems, covering the practical tensions of research and production deliverables, supervision dynamics, and how academic rigor enhances deployable models. Technical deep-dives include interpretability versus explainability versus actionable ML, model architecture choices (glass‑box models, GAMs, Neural Additive Models), explainability tooling (random forest + SHAP), computer vision activation maps, and why LLM explainability faces unique challenges compared to tabular models. The conversation ties together trust proxies, KPIs, and MLOps practices that make explanations business‑relevant.

Listen to learn a systematic approach for building ML systems where explanations drive decisions — essential for data scientists who need to deploy models that stakeholders understand, trust, and can act upon to achieve measurable business outcomes." +topics: +- machine learning +- AI +- MLOps +- explainable AI +- interpretability +dateadded: 2023-07-08 + +duration: PT01H01M48S + +quotableClips: +- name: Episode Introduction & Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=0 + endOffset: 74 +- name: 'Guest Introduction: Polina Mosolova — Industrial PhD and Churn Prediction' + startOffset: 74 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=74 + endOffset: 125 +- name: 'Career Journey: Industrial PhD to Full‑Stack Data Scientist at SAP' + startOffset: 125 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=125 + endOffset: 439 +- name: 'Role Evolution: From Full‑Stack Data Scientist to MLOps Specialization' + startOffset: 439 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=439 + endOffset: 559 +- name: 'PhD Practice: Building End‑to‑End ML Pipelines During Doctoral Research' + startOffset: 559 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=559 + endOffset: 634 +- name: 'Dual Goals: Balancing Academic Research and Production Deliverables' + startOffset: 634 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=634 + endOffset: 753 +- name: 'Dissertation Focus: Churn Prediction Informed by Organizational Trust Theory' + startOffset: 753 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=753 + endOffset: 842 +- name: 'Production Challenges: Deploying Research Models in Industry' + startOffset: 842 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=842 + endOffset: 1077 +- name: 'Supervision & Stakeholders: Academic and Company Support Structures' + startOffset: 1077 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1077 + endOffset: 1145 +- name: 'Research‑Industry Bridge: Academic Conferences and Summer Schools' + startOffset: 1145 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1145 + endOffset: 1237 +- name: 'Time Management: Balancing PhD Writing with Industrial Responsibilities' + startOffset: 1237 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1237 + endOffset: 1478 +- name: 'Finding Industrial PhDs: Prevalence, Companies, and How to Search' + startOffset: 1478 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1478 + endOffset: 1661 +- name: 'Practical Tips: Job Postings, Language Requirements, and Application Search' + startOffset: 1661 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1661 + endOffset: 1792 +- name: 'Organizational Trust Theory: ABI Framework — Ability, Benevolence, Integrity' + startOffset: 1792 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1792 + endOffset: 2076 +- name: Pricing, Contracts, and Trust Dynamics in Subscription Services + startOffset: 2076 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2076 + endOffset: 2299 +- name: Linking Organizational Trust to Explainable AI and Feature Design + startOffset: 2299 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2299 + endOffset: 2514 +- name: 'Actionability: Turning Explanations into Usable Business Interventions' + startOffset: 2514 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2514 + endOffset: 2643 +- name: 'Definitions: Interpretability vs Explainability vs Actionable ML' + startOffset: 2643 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2643 + endOffset: 2842 +- name: 'Model Choices: Glass‑Box Models, Generalized Additive Models, Neural Additive + Models' + startOffset: 2842 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2842 + endOffset: 2940 +- name: 'Explainability Tools: Random Forest + SHAP — Explainable vs Interpretable' + startOffset: 2940 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2940 + endOffset: 3047 +- name: 'Computer Vision Explainability: Activation Maps and Human Interpretability' + startOffset: 3047 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3047 + endOffset: 3108 +- name: 'Summary: Interpretable Models, Explainable Outputs, and Actionable Decisions' + startOffset: 3108 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3108 + endOffset: 3159 +- name: 'Audience Matters: Explainable Feature Spaces and Tailoring Explanations' + startOffset: 3159 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3159 + endOffset: 3323 +- name: 'Explainable AI and Trust: User Confidence, Provenance, and Transparency' + startOffset: 3323 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3323 + endOffset: 3463 +- name: 'LLMs and Hallucinations: Explainability Challenges Versus Tabular Models' + startOffset: 3463 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3463 + endOffset: 3498 +- name: 'Measuring Trust: KPIs, Proxies, and Ethical Constraints' + startOffset: 3498 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3498 + endOffset: 3629 +- name: 'Business Relevance: Practical Proxies for Trust and Prioritizing Product + Ability' + startOffset: 3629 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3629 + endOffset: 3761 +- name: Episode Wrap‑Up and Closing Remarks + startOffset: 3761 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3761 + endOffset: 3708 + transcript: - header: Episode Introduction & Overview - header: 'Guest Introduction: Polina Mosolova — Industrial PhD and Churn Prediction' @@ -1084,142 +1212,6 @@ transcript: sec: 3782 time: '1:03:02' who: Polina -description: Master churn prediction with explainable AI and MLOps—learn ABI trust, - interpretable feature design, and deploy actionable models to reduce subscription - loss. -intro: How do you turn churn prediction research into models that business teams trust - and can act on? In this episode, Polina Mosolova — a data scientist at SAP who completed - an industrial PhD building end‑to‑end ML pipelines — walks through her applied framework - for churn prediction that integrates explainable AI with organizational trust theory. -

We cover Polina’s journey from full‑stack data scientist to MLOps specialization, - the practical tensions of producing research and production deliverables, and supervision - and stakeholder dynamics for industrial PhDs. The conversation centers on the ABI - framework (Ability, Benevolence, Integrity) and how trust proxies and KPIs make - churn models business‑relevant. Technical topics include interpretability versus - explainability versus actionable ML, model choices (glass‑box models, GAMs, Neural - Additive Models), explainability tools (random forest + SHAP), computer vision activation - maps, and the limits of LLM explainability and hallucinations compared to tabular - models.

Listen to learn concrete guidance for deploying explainable churn - models, translating explanations into interventions, and operationalizing trust - through MLOps and practical metrics — essential for data scientists building production - churn prediction systems. -dateadded: '2023-07-08' -duration: PT01H01M48S -quotableClips: -- name: Episode Introduction & Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=0 - endOffset: 74 -- name: 'Guest Introduction: Polina Mosolova — Industrial PhD and Churn Prediction' - startOffset: 74 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=74 - endOffset: 125 -- name: 'Career Journey: Industrial PhD to Full‑Stack Data Scientist at SAP' - startOffset: 125 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=125 - endOffset: 439 -- name: 'Role Evolution: From Full‑Stack Data Scientist to MLOps Specialization' - startOffset: 439 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=439 - endOffset: 559 -- name: 'PhD Practice: Building End‑to‑End ML Pipelines During Doctoral Research' - startOffset: 559 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=559 - endOffset: 634 -- name: 'Dual Goals: Balancing Academic Research and Production Deliverables' - startOffset: 634 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=634 - endOffset: 753 -- name: 'Dissertation Focus: Churn Prediction Informed by Organizational Trust Theory' - startOffset: 753 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=753 - endOffset: 842 -- name: 'Production Challenges: Deploying Research Models in Industry' - startOffset: 842 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=842 - endOffset: 1077 -- name: 'Supervision & Stakeholders: Academic and Company Support Structures' - startOffset: 1077 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1077 - endOffset: 1145 -- name: 'Research‑Industry Bridge: Academic Conferences and Summer Schools' - startOffset: 1145 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1145 - endOffset: 1237 -- name: 'Time Management: Balancing PhD Writing with Industrial Responsibilities' - startOffset: 1237 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1237 - endOffset: 1478 -- name: 'Finding Industrial PhDs: Prevalence, Companies, and How to Search' - startOffset: 1478 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1478 - endOffset: 1661 -- name: 'Practical Tips: Job Postings, Language Requirements, and Application Search' - startOffset: 1661 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1661 - endOffset: 1792 -- name: 'Organizational Trust Theory: ABI Framework — Ability, Benevolence, Integrity' - startOffset: 1792 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1792 - endOffset: 2076 -- name: Pricing, Contracts, and Trust Dynamics in Subscription Services - startOffset: 2076 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2076 - endOffset: 2299 -- name: Linking Organizational Trust to Explainable AI and Feature Design - startOffset: 2299 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2299 - endOffset: 2514 -- name: 'Actionability: Turning Explanations into Usable Business Interventions' - startOffset: 2514 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2514 - endOffset: 2643 -- name: 'Definitions: Interpretability vs Explainability vs Actionable ML' - startOffset: 2643 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2643 - endOffset: 2842 -- name: 'Model Choices: Glass‑Box Models, Generalized Additive Models, Neural Additive - Models' - startOffset: 2842 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2842 - endOffset: 2940 -- name: 'Explainability Tools: Random Forest + SHAP — Explainable vs Interpretable' - startOffset: 2940 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2940 - endOffset: 3047 -- name: 'Computer Vision Explainability: Activation Maps and Human Interpretability' - startOffset: 3047 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3047 - endOffset: 3108 -- name: 'Summary: Interpretable Models, Explainable Outputs, and Actionable Decisions' - startOffset: 3108 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3108 - endOffset: 3159 -- name: 'Audience Matters: Explainable Feature Spaces and Tailoring Explanations' - startOffset: 3159 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3159 - endOffset: 3323 -- name: 'Explainable AI and Trust: User Confidence, Provenance, and Transparency' - startOffset: 3323 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3323 - endOffset: 3463 -- name: 'LLMs and Hallucinations: Explainability Challenges Versus Tabular Models' - startOffset: 3463 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3463 - endOffset: 3498 -- name: 'Measuring Trust: KPIs, Proxies, and Ethical Constraints' - startOffset: 3498 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3498 - endOffset: 3629 -- name: 'Business Relevance: Practical Proxies for Trust and Prioritizing Product - Ability' - startOffset: 3629 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3629 - endOffset: 3761 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3761 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3761 - endOffset: 3708 --- Links: diff --git a/_podcast/s11e05-building-data-science-practice.md b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md similarity index 97% rename from _podcast/s11e05-building-data-science-practice.md rename to _podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md index 14edcd69..37cce8cb 100644 --- a/_podcast/s11e05-building-data-science-practice.md +++ b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md @@ -1,20 +1,118 @@ --- +title: 'Building and Scaling Data Science Practice in Industrial Enterprises: AI Adoption, MLOps Maturity & Career Growth' +short: Building and Scaling Data Science Practice in Industrial Enterprises +season: 11 episode: 5 guests: - andreyshtylenko +image: images/podcast/s11e05-building-data-science-practice.jpg ids: anchor: Building-Data-Science-Practice---Andrey-Shtylenko-e1q2ka6 youtube: XbDQv8FTA4U -image: images/podcast/s11e05-building-data-science-practice.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Building-Data-Science-Practice---Andrey-Shtylenko-e1q2ka6 apple: https://podcasts.apple.com/us/podcast/building-data-science-practice-andrey-shtylenko/id1541710331?i=1000585100407 spotify: https://open.spotify.com/episode/0M7Y77MFToxtKuyfdF5W22?si=jgWR6EchQnWe6nYWW44ZxQ youtube: https://www.youtube.com/watch?v=XbDQv8FTA4U -season: 11 -short: Building Data Science Practice -title: 'Scale Industrial AI: MLOps, Sensorization, POC Strategy & Hub‑and‑Spoke Data - Teams' + +description: 'Discover Industrial AI tactics, MLOps & sensorization to scale projects: hub-and-spoke data teams, proving value, tooling and career steps to productionize ML.' +intro: "How do industrial enterprises move from pilots to production-ready AI—and what team structures, MLOps practices, and career moves make that possible? In this episode Andrey Shtylenko, Director of Engineering at Honeywell and leader of its Advanced Technology Group and AI practice, walks through practical approaches for building and scaling data science teams in industrial enterprises. Drawing on Honeywell use cases—smart sensors, computer vision, and robotics—Andrey explains the data and machine learning practices that enable AI adoption, the role of sensorization and cloud processing, and the common challenges traditional industrial companies face.

You’ll hear a concrete data practice maturity model (crawl → walk → run), POC strategy recommendations for proving value with end-to-end projects, and trade-offs between centralized, embedded, and hybrid hub-and-spoke team models. We cover MLOps standardization, shared services (experiment tracking, annotation, procurement), reporting-line impacts (CTO vs CIO vs CEO), and career guidance for engineers pivoting into data science or production ML roles. Listen to gain frameworks and actionable insights to structure teams, mature MLOps, and grow careers within industrial AI initiatives." +topics: +- data science +- industrial AI +- ai adoption +- ai +- MLOps +dateadded: 2022-11-05 + +duration: PT01H49S + +quotableClips: +- name: Introduction & Live Chat Poll Results + startOffset: 0 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=0 + endOffset: 149 +- name: 'Guest Introduction: Andrey Shtylenko, Honeywell' + startOffset: 149 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=149 + endOffset: 196 +- name: 'Career Journey: Startups, Organizational Development, and Honeywell' + startOffset: 196 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=196 + endOffset: 534 +- name: 'Honeywell Use Cases: Smart Sensors, Computer Vision, and Robotics' + startOffset: 534 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=534 + endOffset: 682 +- name: Defining Organizational Data and Machine Learning Practices + startOffset: 682 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=682 + endOffset: 826 +- name: Challenges of AI Adoption in Traditional Industrial Companies + startOffset: 826 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=826 + endOffset: 942 +- name: Sensorization and Cloud Processing to Enable Advanced Models + startOffset: 942 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=942 + endOffset: 1146 +- name: 'Reporting Line Impact: CTO vs CIO vs CMO vs CEO' + startOffset: 1146 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1146 + endOffset: 1466 +- name: 'Data Practice Maturity Model: Crawl → Walk → Run' + startOffset: 1466 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1466 + endOffset: 1920 +- name: 'POC Strategy: Single End-to-End Project to Prove Value' + startOffset: 1920 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1920 + endOffset: 2306 +- name: 'Centralized Team: Roles, Tooling, and MLOps Standardization' + startOffset: 2306 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2306 + endOffset: 2619 +- name: 'Transition Risks: Centralized vs Decentralized Approaches' + startOffset: 2619 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2619 + endOffset: 2764 +- name: 'Embedded Teams: Reporting Structure, Ownership, and Trust' + startOffset: 2764 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2764 + endOffset: 2893 +- name: 'Hybrid Hub-and-Spoke Model: Balancing Autonomy and Standards' + startOffset: 2893 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2893 + endOffset: 3014 +- name: 'Shared Services: Experiment Tracking, Annotation, and Procurement' + startOffset: 3014 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3014 + endOffset: 3107 +- name: Recommended Reading and Resources for Building Data Teams + startOffset: 3107 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3107 + endOffset: 3159 +- name: 'Career Pivot: From Software Engineer to Data Scientist Internally' + startOffset: 3159 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3159 + endOffset: 3307 +- name: Timing and Strategies for Internal Role Transitions + startOffset: 3307 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3307 + endOffset: 3404 +- name: 'Research vs Production: ML Engineers and Productionizing Models' + startOffset: 3404 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3404 + endOffset: 3584 +- name: 'Career Advice: Expanding Scope to Increase Organizational Impact' + startOffset: 3584 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3584 + endOffset: 3693 +- name: 'Connect with Andrey: LinkedIn and Follow-up Resources' + startOffset: 3693 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3693 + endOffset: 3649 + transcript: - header: Introduction & Live Chat Poll Results - line: You might notice that there is a poll right now in the live chat, which asks @@ -955,111 +1053,6 @@ transcript: sec: 3702 time: '1:01:42' who: Alexey -description: 'Discover Industrial AI tactics, MLOps & sensorization to scale projects: - hub-and-spoke data teams, proving value, tooling and career steps to productionize - ML.' -intro: How do you move industrial AI from pilots to production at scale while keeping - MLOps, sensorization, and organizational design aligned? In this episode, Andrey - Shtylenko, Director of Engineering at Honeywell and head of the Advanced Technology - Group, walks through practical approaches to scaling industrial AI across healthcare, - industrial, and logistics verticals. Drawing on Honeywell use cases in smart sensors, - computer vision, and robotics, Andrey outlines the core challenges of AI adoption - in traditional industrial companies and the technical levers—sensorization, cloud - processing, and model productionization—that enable advanced models.

We - cover a data practice maturity model (crawl → walk → run), a focused POC strategy - using a single end‑to‑end project to prove value, and how to standardize MLOps through - centralized tooling and shared services like experiment tracking, annotation, and - procurement. Andrey also discusses organizational tradeoffs—centralized, embedded, - and hybrid hub‑and‑spoke data teams—and the reporting-line impacts on velocity and - trust. Listeners will get concrete guidance on building data teams, deploying MLOps, - and transitioning research into production-ready machine learning systems. -dateadded: '2022-11-05' -duration: PT01H49S -quotableClips: -- name: Introduction & Live Chat Poll Results - startOffset: 0 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=0 - endOffset: 149 -- name: 'Guest Introduction: Andrey Shtylenko, Honeywell' - startOffset: 149 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=149 - endOffset: 196 -- name: 'Career Journey: Startups, Organizational Development, and Honeywell' - startOffset: 196 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=196 - endOffset: 534 -- name: 'Honeywell Use Cases: Smart Sensors, Computer Vision, and Robotics' - startOffset: 534 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=534 - endOffset: 682 -- name: Defining Organizational Data and Machine Learning Practices - startOffset: 682 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=682 - endOffset: 826 -- name: Challenges of AI Adoption in Traditional Industrial Companies - startOffset: 826 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=826 - endOffset: 942 -- name: Sensorization and Cloud Processing to Enable Advanced Models - startOffset: 942 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=942 - endOffset: 1146 -- name: 'Reporting Line Impact: CTO vs CIO vs CMO vs CEO' - startOffset: 1146 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1146 - endOffset: 1466 -- name: 'Data Practice Maturity Model: Crawl → Walk → Run' - startOffset: 1466 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1466 - endOffset: 1920 -- name: 'POC Strategy: Single End-to-End Project to Prove Value' - startOffset: 1920 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1920 - endOffset: 2306 -- name: 'Centralized Team: Roles, Tooling, and MLOps Standardization' - startOffset: 2306 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2306 - endOffset: 2619 -- name: 'Transition Risks: Centralized vs Decentralized Approaches' - startOffset: 2619 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2619 - endOffset: 2764 -- name: 'Embedded Teams: Reporting Structure, Ownership, and Trust' - startOffset: 2764 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2764 - endOffset: 2893 -- name: 'Hybrid Hub-and-Spoke Model: Balancing Autonomy and Standards' - startOffset: 2893 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2893 - endOffset: 3014 -- name: 'Shared Services: Experiment Tracking, Annotation, and Procurement' - startOffset: 3014 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3014 - endOffset: 3107 -- name: Recommended Reading and Resources for Building Data Teams - startOffset: 3107 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3107 - endOffset: 3159 -- name: 'Career Pivot: From Software Engineer to Data Scientist Internally' - startOffset: 3159 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3159 - endOffset: 3307 -- name: Timing and Strategies for Internal Role Transitions - startOffset: 3307 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3307 - endOffset: 3404 -- name: 'Research vs Production: ML Engineers and Productionizing Models' - startOffset: 3404 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3404 - endOffset: 3584 -- name: 'Career Advice: Expanding Scope to Increase Organizational Impact' - startOffset: 3584 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3584 - endOffset: 3693 -- name: 'Connect with Andrey: LinkedIn and Follow-up Resources' - startOffset: 3693 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3693 - endOffset: 3649 --- Links: diff --git a/_podcast/s05e06-building-and-leading-data-teams.md b/_podcast/building-and-scaling-data-team.md similarity index 97% rename from _podcast/s05e06-building-and-leading-data-teams.md rename to _podcast/building-and-scaling-data-team.md index 7606c39b..2d1663c1 100644 --- a/_podcast/s05e06-building-and-leading-data-teams.md +++ b/_podcast/building-and-scaling-data-team.md @@ -1,12 +1,11 @@ --- -title: 'How to Build & Scale a Data Team: Hiring, Production ML, Forecasting & Driving - Adoption' +title: 'How to Build & Scale a Data Team: Hiring, Production ML, Forecasting & Driving Adoption' short: Building and Leading Data Teams +season: 5 +episode: 6 guests: - tammyliang image: images/podcast/s05e06-building-and-leading-data-teams.jpg -season: 5 -episode: 6 ids: youtube: kI4V2iBbaH0 anchor: Building-and-Leading-Data-Teams---Tammy-Liang-e18efdl @@ -15,6 +14,138 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-and-Leading-Data-Teams---Tammy-Liang-e18efdl spotify: https://open.spotify.com/episode/3hlzKwORlOsCPKrawuW4YQ apple: https://podcasts.apple.com/us/podcast/building-and-leading-data-teams-tammy-liang/id1541710331?i=1000537994433 + +description: 'Learn to build a scalable data team: hiring, production ML delivery, demand forecasting and driving adoption—practical staffing, stack, and governance tips.' +intro: How do you build and scale a data team that moves beyond dashboards to production ML, reliable forecasting, and real adoption across the business? In this episode Tammy Liang, Chief of Data at Platanomelón and co‑host of Data for Future, walks through her journey building data capabilities for marketing, e‑commerce, and operations at a mission‑driven consumer brand.

Tammy breaks down practical hiring decisions—why she hired an analyst first, then a data engineer, and why early senior hires matter—plus the tradeoffs between analyst, engineer, and business‑facing roles. She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, Notion) to enable forecasting and production ML, and describes common model delivery challenges moving work out of notebooks. The conversation also covers demand forecasting, time‑series and basic machine learning skills, data accuracy and governance, dbt tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building trust.

Listen to learn concrete steps for hiring a data team, setting up a data warehouse for forecasting, delivering models to production, and creating data products that stakeholders actually use +topics: +- team building +- data teams +- data engineering +- data analytics +- leadership +dateadded: 2021-10-09 + +duration: PT00H59M10S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=0 + endOffset: 74 +- name: 'Guest Background: Tammy Liang’s career path into data' + startOffset: 74 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=74 + endOffset: 247 +- name: 'Chief of Data Responsibilities: Marketing, e‑commerce, and operations' + startOffset: 247 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=247 + endOffset: 404 +- name: 'Data Challenges for Sensitive Products: Social media restrictions & creative + tracking' + startOffset: 404 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=404 + endOffset: 442 +- name: 'First Project: Business health monitoring and dashboards' + startOffset: 442 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=442 + endOffset: 531 +- name: 'Cross‑team Collaboration: Streamlining reporting and building trust' + startOffset: 531 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=531 + endOffset: 606 +- name: 'Handling Resistance: Spreadsheet culture and adoption hurdles' + startOffset: 606 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=606 + endOffset: 720 +- name: Scaling from Dashboards to Predictive Projects + startOffset: 720 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=720 + endOffset: 883 +- name: 'Model Delivery Challenges: From notebooks to production' + startOffset: 883 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=883 + endOffset: 904 +- name: 'Hiring Progression: First analyst then data engineer' + startOffset: 904 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=904 + endOffset: 1031 +- name: Building a Data Warehouse to Enable Forecasting + startOffset: 1031 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1031 + endOffset: 1121 +- name: 'Business‑Facing Role: Hiring for adoption and communication' + startOffset: 1121 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1121 + endOffset: 1352 +- name: 'Data Stack Overview: Stitch, GCP, dbt, Data Studio, and Notion wiki' + startOffset: 1352 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1352 + endOffset: 1391 +- name: 'Rethinking Hiring Order: Importance of senior hires early' + startOffset: 1391 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1391 + endOffset: 1586 +- name: 'Prioritizing Roles: Analyst, engineer, and business analyst tradeoffs' + startOffset: 1586 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1586 + endOffset: 1760 +- name: 'Demand Forecasting: Data provision, stakeholder input, and iteration' + startOffset: 1760 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1760 + endOffset: 1857 +- name: 'Analyst Skills: Time series and basic machine learning as advantages' + startOffset: 1857 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1857 + endOffset: 1989 +- name: 'First‑Hire Qualities: Business alignment and leadership mindset' + startOffset: 1989 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1989 + endOffset: 2138 +- name: 'Data Accuracy & Governance: Errors, playbook, and rebuilding trust' + startOffset: 2138 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2138 + endOffset: 2409 +- name: 'Data Testing & Monitoring: dbt tests and regular dashboard checks' + startOffset: 2409 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2409 + endOffset: 2502 +- name: 'Timely Insights: Operational visibility and campaign monitoring' + startOffset: 2502 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2502 + endOffset: 2739 +- name: 'Offline Attribution: Surveys, community sampling, and measuring TV/banners' + startOffset: 2739 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2739 + endOffset: 2828 +- name: 'Useful Data Products: Product mindset and business alignment' + startOffset: 2828 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2828 + endOffset: 2940 +- name: 'Driving Adoption: Workshops, Q&A sessions, and building data culture' + startOffset: 2940 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2940 + endOffset: 3052 +- name: 'Leadership Approach: Delegation, ownership, and team empowerment' + startOffset: 3052 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3052 + endOffset: 3159 +- name: 'Resources for New Data Leaders: Communities, courses, and mentors' + startOffset: 3159 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3159 + endOffset: 3249 +- name: 'Data For Future Podcast: Data + sustainability focus' + startOffset: 3249 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3249 + endOffset: 3379 +- name: 'Supporting Stuck Team Members: Google, communities, and networks' + startOffset: 3379 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3379 + endOffset: 3537 +- name: Closing Remarks & Where to Find Tammy (LinkedIn, dataforfuture.org) + startOffset: 3537 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3537 + endOffset: 3550 + transcript: - header: Podcast Introduction - header: 'Guest Background: Tammy Liang’s career path into data' @@ -1186,144 +1317,6 @@ transcript: sec: 3624 time: '1:00:24' who: Alexey -description: 'Learn to build a scalable data team: hiring, production ML delivery, - demand forecasting and driving adoption—practical staffing, stack, and governance - tips.' -intro: How do you build and scale a data team that moves beyond dashboards to production - ML, reliable forecasting, and real adoption across the business? In this episode - Tammy Liang, Chief of Data at Platanomelón and co‑host of Data for Future, walks - through her journey building data capabilities for marketing, e‑commerce, and operations - at a mission‑driven consumer brand.

Tammy breaks down practical hiring - decisions—why she hired an analyst first, then a data engineer, and why early senior - hires matter—plus the tradeoffs between analyst, engineer, and business‑facing roles. - She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, - Notion) to enable forecasting and production ML, and describes common model delivery - challenges moving work out of notebooks. The conversation also covers demand forecasting, - time‑series and basic machine learning skills, data accuracy and governance, dbt - tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building - trust.

Listen to learn concrete steps for hiring a data team, setting up - a data warehouse for forecasting, delivering models to production, and creating - data products that stakeholders actually use. -dateadded: '2021-10-09' -duration: PT00H59M10S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=0 - endOffset: 74 -- name: 'Guest Background: Tammy Liang’s career path into data' - startOffset: 74 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=74 - endOffset: 247 -- name: 'Chief of Data Responsibilities: Marketing, e‑commerce, and operations' - startOffset: 247 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=247 - endOffset: 404 -- name: 'Data Challenges for Sensitive Products: Social media restrictions & creative - tracking' - startOffset: 404 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=404 - endOffset: 442 -- name: 'First Project: Business health monitoring and dashboards' - startOffset: 442 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=442 - endOffset: 531 -- name: 'Cross‑team Collaboration: Streamlining reporting and building trust' - startOffset: 531 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=531 - endOffset: 606 -- name: 'Handling Resistance: Spreadsheet culture and adoption hurdles' - startOffset: 606 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=606 - endOffset: 720 -- name: Scaling from Dashboards to Predictive Projects - startOffset: 720 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=720 - endOffset: 883 -- name: 'Model Delivery Challenges: From notebooks to production' - startOffset: 883 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=883 - endOffset: 904 -- name: 'Hiring Progression: First analyst then data engineer' - startOffset: 904 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=904 - endOffset: 1031 -- name: Building a Data Warehouse to Enable Forecasting - startOffset: 1031 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1031 - endOffset: 1121 -- name: 'Business‑Facing Role: Hiring for adoption and communication' - startOffset: 1121 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1121 - endOffset: 1352 -- name: 'Data Stack Overview: Stitch, GCP, dbt, Data Studio, and Notion wiki' - startOffset: 1352 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1352 - endOffset: 1391 -- name: 'Rethinking Hiring Order: Importance of senior hires early' - startOffset: 1391 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1391 - endOffset: 1586 -- name: 'Prioritizing Roles: Analyst, engineer, and business analyst tradeoffs' - startOffset: 1586 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1586 - endOffset: 1760 -- name: 'Demand Forecasting: Data provision, stakeholder input, and iteration' - startOffset: 1760 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1760 - endOffset: 1857 -- name: 'Analyst Skills: Time series and basic machine learning as advantages' - startOffset: 1857 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1857 - endOffset: 1989 -- name: 'First‑Hire Qualities: Business alignment and leadership mindset' - startOffset: 1989 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1989 - endOffset: 2138 -- name: 'Data Accuracy & Governance: Errors, playbook, and rebuilding trust' - startOffset: 2138 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2138 - endOffset: 2409 -- name: 'Data Testing & Monitoring: dbt tests and regular dashboard checks' - startOffset: 2409 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2409 - endOffset: 2502 -- name: 'Timely Insights: Operational visibility and campaign monitoring' - startOffset: 2502 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2502 - endOffset: 2739 -- name: 'Offline Attribution: Surveys, community sampling, and measuring TV/banners' - startOffset: 2739 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2739 - endOffset: 2828 -- name: 'Useful Data Products: Product mindset and business alignment' - startOffset: 2828 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2828 - endOffset: 2940 -- name: 'Driving Adoption: Workshops, Q&A sessions, and building data culture' - startOffset: 2940 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2940 - endOffset: 3052 -- name: 'Leadership Approach: Delegation, ownership, and team empowerment' - startOffset: 3052 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3052 - endOffset: 3159 -- name: 'Resources for New Data Leaders: Communities, courses, and mentors' - startOffset: 3159 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3159 - endOffset: 3249 -- name: 'Data For Future Podcast: Data + sustainability focus' - startOffset: 3249 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3249 - endOffset: 3379 -- name: 'Supporting Stuck Team Members: Google, communities, and networks' - startOffset: 3379 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3379 - endOffset: 3537 -- name: Closing Remarks & Where to Find Tammy (LinkedIn, dataforfuture.org) - startOffset: 3537 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3537 - endOffset: 3550 --- diff --git a/_podcast/s11e06-product-owners-in-data-science.md b/_podcast/building-data-products-product-owner-vs-product-manager.md similarity index 97% rename from _podcast/s11e06-product-owners-in-data-science.md rename to _podcast/building-data-products-product-owner-vs-product-manager.md index 231b575c..2012e1b3 100644 --- a/_podcast/s11e06-product-owners-in-data-science.md +++ b/_podcast/building-data-products-product-owner-vs-product-manager.md @@ -1,20 +1,116 @@ --- +title: 'Building Data Products at Scale: Recommenders, Domain Ownership, and Hiring for Production ML' +short: Product Owners in Data Science +season: 11 episode: 6 guests: - annahannemann +image: images/podcast/s11e06-product-owners-in-data-science.jpg ids: anchor: Product-Owners-in-Data-Science---Anna-Hannemann-e1q0ord youtube: rTRTjB6cGng -image: images/podcast/s11e06-product-owners-in-data-science.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Product-Owners-in-Data-Science---Anna-Hannemann-e1q0ord apple: https://podcasts.apple.com/us/podcast/product-owners-in-data-science-anna-hannemann/id1541710331?i=1000585888321 spotify: https://open.spotify.com/episode/5deNrH5E6802ClwVt2Re4A?si=Xdg7qlT1TPCrH318MvS2RA youtube: https://www.youtube.com/watch?v=rTRTjB6cGng -season: 11 -short: Product Owners in Data Science -title: 'Data Product Leadership: Scaling Recommenders, Production ML Hiring & Price - Markdown Modeling' + +description: Discover scaling recommender systems, production ML hiring strategies and price markdown modeling to cut waste, optimize discounts, and lead data product teams +intro: 'How do you scale recommender systems, hire for production ML, and model price markdowns to reduce waste—and who should own those decisions? In this episode, Anna Hannemann, Domain Owner for Data Science at Metro.digital, walks through practical answers informed by her PhD in Data Science and prior leadership of recommender and robotics/smart logistics teams.

We cover customer data completeness, API-first recommender design, and algorithm choices like collaborative filtering and Word2Vec variants, plus the trade-offs product owners must manage. Anna contrasts product owner and product manager responsibilities, describes the domain owner role for aligning data scientists across teams, and lays out hiring strategies for production ML—data scientists, ML engineers, and MLOps. You’ll also hear how to source problems from operations, evaluate new data domains with MVPs and manual fixes, and take a portfolio approach to staging data product investments.

If you work in data product leadership, product management, or machine learning operations, this episode delivers actionable frameworks for scaling recommenders, building production ML capabilities, and applying price markdown modeling to optimize discounting and reduce waste. Recommended reading: Data Science for Business.' +topics: +- data products +- product owners +- product managers +- data science +- machine learning +- MLOps +dateadded: 2022-11-11 + +duration: PT00H59M17S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=0 + endOffset: 92 +- name: Guest & METRO overview and customer data completeness + startOffset: 92 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=92 + endOffset: 289 +- name: Anna's academic and career background (PhD, web science, logistics) + startOffset: 289 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=289 + endOffset: 769 +- name: Value of technical expertise for data product leads + startOffset: 769 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=769 + endOffset: 911 +- name: Core product owner responsibilities and team advocacy + startOffset: 911 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=911 + endOffset: 1200 +- name: 'Role comparison: product owner versus product manager' + startOffset: 1200 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1200 + endOffset: 1328 +- name: 'Recommender systems at METRO: API-first design and scaling' + startOffset: 1328 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1328 + endOffset: 1801 +- name: 'Hiring strategy for production ML: data scientist, ML engineer, MLOps' + startOffset: 1801 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1801 + endOffset: 2093 +- name: 'Recommender algorithms: collaborative filtering and Word2Vec variants' + startOffset: 2093 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2093 + endOffset: 2155 +- name: 'Essential skills: metrics, trade-offs, and technical literacy for product + owners' + startOffset: 2155 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2155 + endOffset: 2312 +- name: 'Domain owner role: aligning data scientists across product teams' + startOffset: 2312 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2312 + endOffset: 2401 +- name: 'People management at scale: directs, reviews, and cross-team enablement' + startOffset: 2401 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2401 + endOffset: 2494 +- name: 'Price markdown modeling: reducing waste and optimal discounting' + startOffset: 2494 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2494 + endOffset: 2688 +- name: 'Sourcing problems from operations: business-driven prioritization' + startOffset: 2688 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2688 + endOffset: 2757 +- name: 'Managing multiple data domains: delegation, rotations, and budget ownership' + startOffset: 2757 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2757 + endOffset: 2924 +- name: 'Evaluating new domains: MVPs, manual fixes, and business justification' + startOffset: 2924 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2924 + endOffset: 3189 +- name: 'Portfolio approach: validating and staging data product investments' + startOffset: 3189 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3189 + endOffset: 3261 +- name: 'Community leadership: organizing ProductTank meetups' + startOffset: 3261 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3261 + endOffset: 3468 +- name: 'Recommended resource: "Data Science for Business" for data product roles' + startOffset: 3468 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3468 + endOffset: 3625 +- name: Episode wrap-up and live chat highlights + startOffset: 3625 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3625 + endOffset: 3557 + transcript: - header: Episode Introduction - header: Guest & METRO overview and customer data completeness @@ -1061,109 +1157,6 @@ transcript: sec: 3649 time: '1:00:49' who: Anna -description: Discover scaling recommender systems, production ML hiring strategies - and price markdown modeling to cut waste, optimize discounts, and lead data product - teams. -intro: 'How do you scale recommender systems, hire for production ML, and model price - markdowns to reduce waste—and who should own those decisions? In this episode, Anna - Hannemann, Domain Owner for Data Science at Metro.digital, walks through practical - answers informed by her PhD in Data Science and prior leadership of recommender - and robotics/smart logistics teams.

We cover customer data completeness, - API-first recommender design, and algorithm choices like collaborative filtering - and Word2Vec variants, plus the trade-offs product owners must manage. Anna contrasts - product owner and product manager responsibilities, describes the domain owner role - for aligning data scientists across teams, and lays out hiring strategies for production - ML—data scientists, ML engineers, and MLOps. You’ll also hear how to source problems - from operations, evaluate new data domains with MVPs and manual fixes, and take - a portfolio approach to staging data product investments.

If you work in - data product leadership, product management, or machine learning operations, this - episode delivers actionable frameworks for scaling recommenders, building production - ML capabilities, and applying price markdown modeling to optimize discounting and - reduce waste. Recommended reading: Data Science for Business.' -dateadded: '2022-11-11' -duration: PT00H59M17S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=0 - endOffset: 92 -- name: Guest & METRO overview and customer data completeness - startOffset: 92 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=92 - endOffset: 289 -- name: Anna's academic and career background (PhD, web science, logistics) - startOffset: 289 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=289 - endOffset: 769 -- name: Value of technical expertise for data product leads - startOffset: 769 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=769 - endOffset: 911 -- name: Core product owner responsibilities and team advocacy - startOffset: 911 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=911 - endOffset: 1200 -- name: 'Role comparison: product owner versus product manager' - startOffset: 1200 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1200 - endOffset: 1328 -- name: 'Recommender systems at METRO: API-first design and scaling' - startOffset: 1328 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1328 - endOffset: 1801 -- name: 'Hiring strategy for production ML: data scientist, ML engineer, MLOps' - startOffset: 1801 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1801 - endOffset: 2093 -- name: 'Recommender algorithms: collaborative filtering and Word2Vec variants' - startOffset: 2093 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2093 - endOffset: 2155 -- name: 'Essential skills: metrics, trade-offs, and technical literacy for product - owners' - startOffset: 2155 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2155 - endOffset: 2312 -- name: 'Domain owner role: aligning data scientists across product teams' - startOffset: 2312 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2312 - endOffset: 2401 -- name: 'People management at scale: directs, reviews, and cross-team enablement' - startOffset: 2401 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2401 - endOffset: 2494 -- name: 'Price markdown modeling: reducing waste and optimal discounting' - startOffset: 2494 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2494 - endOffset: 2688 -- name: 'Sourcing problems from operations: business-driven prioritization' - startOffset: 2688 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2688 - endOffset: 2757 -- name: 'Managing multiple data domains: delegation, rotations, and budget ownership' - startOffset: 2757 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2757 - endOffset: 2924 -- name: 'Evaluating new domains: MVPs, manual fixes, and business justification' - startOffset: 2924 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2924 - endOffset: 3189 -- name: 'Portfolio approach: validating and staging data product investments' - startOffset: 3189 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3189 - endOffset: 3261 -- name: 'Community leadership: organizing ProductTank meetups' - startOffset: 3261 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3261 - endOffset: 3468 -- name: 'Recommended resource: "Data Science for Business" for data product roles' - startOffset: 3468 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3468 - endOffset: 3625 -- name: Episode wrap-up and live chat highlights - startOffset: 3625 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3625 - endOffset: 3557 --- Links: diff --git a/_podcast/s10e08-leading-data-research.md b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md similarity index 97% rename from _podcast/s10e08-leading-data-research.md rename to _podcast/building-data-science-programs-and-democratizing-high-performance-computing.md index 443fd221..e915e828 100644 --- a/_podcast/s10e08-leading-data-research.md +++ b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md @@ -1,19 +1,141 @@ --- +title: Build Data Science Programs, Democratize HPC & Scale Graph Analytics with Arkouda +short: Leading Data Research +season: 10 episode: 8 guests: - davidbader +image: images/podcast/s10e08-leading-data-research.jpg ids: anchor: Leading-Data-Research---David-Bader-e1nmt3r youtube: vZLlpsUlchQ -image: images/podcast/s10e08-leading-data-research.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Leading-Data-Research---David-Bader-e1nmt3r apple: https://podcasts.apple.com/us/podcast/leading-data-research-david-bader/id1541710331?i=1000579710785 spotify: https://open.spotify.com/episode/7DmFWFHUwxx4Wf0X6GbKBf?si=2DW0G2EMQ7ebB9K60LfJyQ youtube: https://www.youtube.com/watch?v=vZLlpsUlchQ -season: 10 -short: Leading Data Research -title: Build Data Science Programs, Democratize HPC & Scale Graph Analytics with Arkouda + +description: Learn to build data science programs, democratize HPC and scale graph analytics with Arkouda - practical curriculum, performance tips and recruitment tips +intro: How do you build effective data science programs, democratize high-performance computing, and scale graph analytics so researchers and practitioners can solve real-world problems? In this episode, David Bader — Director of the Institute for Data Science at NJIT, founder of NJIT’s Department of Data Science, and a distinguished professor with deep expertise in HPC, big data, and analytics — walks through his career, leadership in launching academic units, and practical lessons for curriculum design and regional workforce alignment.

We explore Arkouda and ARACHNE — interactive, massive-scale Python analytics and graph tools — and the Chapel-backed supercomputing techniques that aim to democratize HPC for broader use. David discusses research lab-as-startup practices (open source releases, datasets like synthetic/SNAP, and industry partnerships with NSF, Accenture, NVIDIA), building usable systems to achieve adoption (including a NASA example), and underappreciated advances such as STINGER and streaming graph analytics. He also covers mentorship models, recruiting PhD and MS students, conference strategies, and balancing teaching, research, and service.

Listen to learn concrete approaches to creating data science programs, practical steps to scale graph analytics with Arkouda, and tactics for turning research into real-world impact +topics: +- data science +- data analytics +- tools +dateadded: 2022-09-16 + +duration: PT01H03M03S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=0 + endOffset: 107 +- name: 'Guest Intro: David Bader — NJIT Institute for Data Science, research focus' + startOffset: 107 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=107 + endOffset: 200 +- name: Career Journey & Academic Appointments + startOffset: 200 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=200 + endOffset: 293 +- name: 'Daily Responsibilities: Research, Teaching, and Institute Leadership' + startOffset: 293 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=293 + endOffset: 371 +- name: Active Projects & Industry Partnerships (NSF, Accenture, NVIDIA) + startOffset: 371 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=371 + endOffset: 510 +- name: 'Launching Academic Units: Starting Departments and Degree Programs' + startOffset: 510 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=510 + endOffset: 541 +- name: Designing Data Science Curricula & Regional Workforce Alignment + startOffset: 541 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=541 + endOffset: 835 +- name: 'Academic Ranks: Assistant, Associate, Full, and Distinguished Professor' + startOffset: 835 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=835 + endOffset: 1061 +- name: 'Career Pathways: PhD, Postdoc, and Faculty Entry' + startOffset: 1061 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1061 + endOffset: 1143 +- name: 'Academic CV vs. Industry Resume: Documentation and Expectations' + startOffset: 1143 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1143 + endOffset: 1450 +- name: 'Arkouda & ARACHNE: Interactive, Massive-scale Python Analytics and Graph + Tools' + startOffset: 1450 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1450 + endOffset: 1655 +- name: 'Backend Performance: Chapel, Supercomputing, and Democratizing HPC' + startOffset: 1655 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1655 + endOffset: 1772 +- name: 'Research Lab as Startup: Open Source, Code Release, and Student Output' + startOffset: 1772 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1772 + endOffset: 1830 +- name: 'Finding Datasets: Synthetic Data, SNAP, and Industry Collaboration' + startOffset: 1830 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1830 + endOffset: 1958 +- name: Lab Composition & Mentorship Model (PhD, MS, undergrads, high school) + startOffset: 1958 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1958 + endOffset: 2139 +- name: 'Time Allocation: Balancing Teaching Load, Research, and Service' + startOffset: 2139 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2139 + endOffset: 2226 +- name: 'Most Rewarding Work: Linear-time Algorithm & Pancake-flipping Variant' + startOffset: 2226 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2226 + endOffset: 2429 +- name: 'Underappreciated Impact: STINGER and Streaming Graph Analytics' + startOffset: 2429 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2429 + endOffset: 2745 +- name: Virtual Seminar Series & NJIT Data Science YouTube Channel + startOffset: 2745 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2745 + endOffset: 2812 +- name: 'Teaching-focused Careers: Universities Prioritizing Instruction over Research' + startOffset: 2812 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2812 + endOffset: 2941 +- name: 'Staying Current: Journals, Conferences, and Information Triage' + startOffset: 2941 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2941 + endOffset: 3232 +- name: 'Favorite Conferences: Supercomputing, IPDPS, HPEC' + startOffset: 3232 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3232 + endOffset: 3298 +- name: 'Selecting Research Topics: Domain-driven, Impact-first Approach' + startOffset: 3298 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3298 + endOffset: 3400 +- name: 'Building Usable Systems: From Research to Real-world Adoption (NASA example)' + startOffset: 3400 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3400 + endOffset: 3491 +- name: 'Recruiting Students: PhD vs. Industry and Collaborative Opportunities' + startOffset: 3491 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3491 + endOffset: 3811 +- name: 'Contact & Resources: davidbader.net, Arkouda, NJIT Data Science links' + startOffset: 3811 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3811 + endOffset: 3846 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3846 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3846 + endOffset: 3783 + transcript: - header: Podcast Introduction - header: 'Guest Intro: David Bader — NJIT Institute for Data Science, research focus' @@ -1138,138 +1260,6 @@ transcript: sec: 3890 time: '1:04:50' who: Alexey -description: Learn to build data science programs, democratize HPC and scale graph - analytics with Arkouda - practical curriculum, performance tips and recruitment - tips -intro: How do you build effective data science programs, democratize high-performance - computing, and scale graph analytics so researchers and practitioners can solve - real-world problems? In this episode, David Bader — Director of the Institute for - Data Science at NJIT, founder of NJIT’s Department of Data Science, and a distinguished - professor with deep expertise in HPC, big data, and analytics — walks through his - career, leadership in launching academic units, and practical lessons for curriculum - design and regional workforce alignment.

We explore Arkouda and ARACHNE - — interactive, massive-scale Python analytics and graph tools — and the Chapel-backed - supercomputing techniques that aim to democratize HPC for broader use. David discusses - research lab-as-startup practices (open source releases, datasets like synthetic/SNAP, - and industry partnerships with NSF, Accenture, NVIDIA), building usable systems - to achieve adoption (including a NASA example), and underappreciated advances such - as STINGER and streaming graph analytics. He also covers mentorship models, recruiting - PhD and MS students, conference strategies, and balancing teaching, research, and - service.

Listen to learn concrete approaches to creating data science programs, - practical steps to scale graph analytics with Arkouda, and tactics for turning research - into real-world impact. -dateadded: '2022-09-16' -duration: PT01H03M03S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=0 - endOffset: 107 -- name: 'Guest Intro: David Bader — NJIT Institute for Data Science, research focus' - startOffset: 107 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=107 - endOffset: 200 -- name: Career Journey & Academic Appointments - startOffset: 200 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=200 - endOffset: 293 -- name: 'Daily Responsibilities: Research, Teaching, and Institute Leadership' - startOffset: 293 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=293 - endOffset: 371 -- name: Active Projects & Industry Partnerships (NSF, Accenture, NVIDIA) - startOffset: 371 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=371 - endOffset: 510 -- name: 'Launching Academic Units: Starting Departments and Degree Programs' - startOffset: 510 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=510 - endOffset: 541 -- name: Designing Data Science Curricula & Regional Workforce Alignment - startOffset: 541 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=541 - endOffset: 835 -- name: 'Academic Ranks: Assistant, Associate, Full, and Distinguished Professor' - startOffset: 835 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=835 - endOffset: 1061 -- name: 'Career Pathways: PhD, Postdoc, and Faculty Entry' - startOffset: 1061 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1061 - endOffset: 1143 -- name: 'Academic CV vs. Industry Resume: Documentation and Expectations' - startOffset: 1143 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1143 - endOffset: 1450 -- name: 'Arkouda & ARACHNE: Interactive, Massive-scale Python Analytics and Graph - Tools' - startOffset: 1450 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1450 - endOffset: 1655 -- name: 'Backend Performance: Chapel, Supercomputing, and Democratizing HPC' - startOffset: 1655 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1655 - endOffset: 1772 -- name: 'Research Lab as Startup: Open Source, Code Release, and Student Output' - startOffset: 1772 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1772 - endOffset: 1830 -- name: 'Finding Datasets: Synthetic Data, SNAP, and Industry Collaboration' - startOffset: 1830 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1830 - endOffset: 1958 -- name: Lab Composition & Mentorship Model (PhD, MS, undergrads, high school) - startOffset: 1958 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1958 - endOffset: 2139 -- name: 'Time Allocation: Balancing Teaching Load, Research, and Service' - startOffset: 2139 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2139 - endOffset: 2226 -- name: 'Most Rewarding Work: Linear-time Algorithm & Pancake-flipping Variant' - startOffset: 2226 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2226 - endOffset: 2429 -- name: 'Underappreciated Impact: STINGER and Streaming Graph Analytics' - startOffset: 2429 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2429 - endOffset: 2745 -- name: Virtual Seminar Series & NJIT Data Science YouTube Channel - startOffset: 2745 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2745 - endOffset: 2812 -- name: 'Teaching-focused Careers: Universities Prioritizing Instruction over Research' - startOffset: 2812 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2812 - endOffset: 2941 -- name: 'Staying Current: Journals, Conferences, and Information Triage' - startOffset: 2941 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2941 - endOffset: 3232 -- name: 'Favorite Conferences: Supercomputing, IPDPS, HPEC' - startOffset: 3232 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3232 - endOffset: 3298 -- name: 'Selecting Research Topics: Domain-driven, Impact-first Approach' - startOffset: 3298 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3298 - endOffset: 3400 -- name: 'Building Usable Systems: From Research to Real-world Adoption (NASA example)' - startOffset: 3400 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3400 - endOffset: 3491 -- name: 'Recruiting Students: PhD vs. Industry and Collaborative Opportunities' - startOffset: 3491 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3491 - endOffset: 3811 -- name: 'Contact & Resources: davidbader.net, Arkouda, NJIT Data Science links' - startOffset: 3811 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3811 - endOffset: 3846 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3846 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3846 - endOffset: 3783 --- Links: diff --git a/_podcast/s13e01-accelerating-adoption-of-ai-through-diversity.md b/_podcast/building-ml-communities-diversity-and-career-growth.md similarity index 97% rename from _podcast/s13e01-accelerating-adoption-of-ai-through-diversity.md rename to _podcast/building-ml-communities-diversity-and-career-growth.md index 839d80f2..acb7f218 100644 --- a/_podcast/s13e01-accelerating-adoption-of-ai-through-diversity.md +++ b/_podcast/building-ml-communities-diversity-and-career-growth.md @@ -1,20 +1,139 @@ --- +title: 'How to Build & Scale a Data Science Community: Diversity, ML Deployment & Career Growth' +short: Accelerating the Adoption of AI through Diversity +season: 13 episode: 1 guests: - daniameira +image: images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg ids: anchor: Accelerating-the-Adoption-of-AI-through-Diversity---Dnia-Meira-e1v9obp youtube: SRUwwvk_YCk -image: images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Accelerating-the-Adoption-of-AI-through-Diversity---Dnia-Meira-e1v9obp apple: https://podcasts.apple.com/us/podcast/accelerating-the-adoption-of-ai-through-diversity/id1541710331?i=1000601491838 spotify: https://open.spotify.com/episode/6pRkAK9Zo2QrXZCAzh2veV?si=ixEmGK5-RemknBcHrChMNA youtube: https://www.youtube.com/watch?v=SRUwwvk_YCk -season: 13 -short: Accelerating the Adoption of AI through Diversity -title: 'How to Build & Scale a Data Science Community: Diversity, ML Deployment & - Career Growth' + +description: Discover how to build and scale a data science community, boost diversity, deploy ML, and accelerate career growth with mentoring & hiring strategies +intro: 'How do you build and scale a data science community that actually advances diversity, supports machine learning deployment, and accelerates career growth? In this episode, Dânia Meira — AI Guild co‑founder, data scientist, teacher and speaker with a Master’s in Computer Science (AI) — walks through her journey from applied math and marketing analytics to founding a global data science community in Berlin.

We cover practical community building: turning women’s meetups into monthly dinners and an international membership, curating meetup content and the Datalift Summit, and policies like visibility-first speaker invites, codes of conduct, and misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) improves product fit and market reach, how to create psychological safety, and how to source and train diverse talent for regulated industries. She also outlines a vendor‑agnostic consulting model for machine learning deployment, community‑to‑client matching, and scaling from a freelance network to full‑time teams.' +topics: +- data science +- machine learning +- community building +- diversity +- career growth +dateadded: 2023-02-25 + +duration: PT00H59M44S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=0 + endOffset: 94 +- name: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' + startOffset: 94 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=94 + endOffset: 152 +- name: 'Early Career: Applied math, Spark vs Hadoop thesis and marketing analytics' + startOffset: 152 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=152 + endOffset: 216 +- name: 'Move to Berlin: Startup roles and building end‑to‑end data skills' + startOffset: 216 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=216 + endOffset: 319 +- name: 'Role Evolution: Data scientist generalist to specialized career paths' + startOffset: 319 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=319 + endOffset: 392 +- name: 'Teaching & Mentoring: Bootcamps, Data Science for Good, and skills sharing' + startOffset: 392 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=392 + endOffset: 677 +- name: 'Community Origin Story: From women’s meetups to a broader support network' + startOffset: 677 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=677 + endOffset: 921 +- name: 'Community Growth: Monthly dinners, global expansion, and membership scale' + startOffset: 921 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=921 + endOffset: 1005 +- name: 'Datalift Summit Origin: Organizing the first in‑person conference post‑COVID' + startOffset: 1005 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1005 + endOffset: 1191 +- name: 'Meetup Content Strategy: Curating panels on career options and practical + topics' + startOffset: 1191 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1191 + endOffset: 1395 +- name: 'Diversity in Berlin: Internationality, gender balance, and workplace culture' + startOffset: 1395 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1395 + endOffset: 1579 +- name: 'Broader Diversity Dimensions: Backgrounds, nationality, and neurodiversity' + startOffset: 1579 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1579 + endOffset: 1639 +- name: 'Business Case for Diversity: Inclusive teams, product fit, and market reach' + startOffset: 1639 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1639 + endOffset: 1884 +- name: 'Psychological Safety: Leadership signals and enabling open conversations' + startOffset: 1884 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1884 + endOffset: 2023 +- name: 'Consulting Model: Vendor‑agnostic machine learning deployment support' + startOffset: 2023 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2023 + endOffset: 2061 +- name: 'Recruitment & Training: Sourcing diverse talent pools for regulated industries' + startOffset: 2061 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2061 + endOffset: 2376 +- name: 'Visibility Policy: Inviting women speakers first to increase participation' + startOffset: 2376 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2376 + endOffset: 2601 +- name: 'Career Growth Advice: Networks, visibility, and stepping into leadership' + startOffset: 2601 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2601 + endOffset: 2736 +- name: 'Code of Conduct: Crafting practical rules and expected community behavior' + startOffset: 2736 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2736 + endOffset: 2970 +- name: 'Responding to Misconduct: Reporting, case‑by‑case handling, and consequences' + startOffset: 2970 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2970 + endOffset: 3228 +- name: 'Community‑to‑Client Matching: Leveraging member expertise for projects' + startOffset: 3228 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3228 + endOffset: 3373 +- name: 'Team Roles: Founders’ split — machine learning delivery and career coaching' + startOffset: 3373 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3373 + endOffset: 3476 +- name: 'Scaling Strategy: Freelance network today, hiring full‑time as demand grows' + startOffset: 3476 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3476 + endOffset: 3551 +- name: 'Recommended Resources: Weapons of Math Destruction and Coded Bias' + startOffset: 3551 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3551 + endOffset: 3642 +- name: 'Datalift Summit 2023: Call for speakers, workshops, and production use cases' + startOffset: 3642 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3642 + endOffset: 3678 +- name: Closing Remarks and Sign‑off + startOffset: 3678 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3678 + endOffset: 3584 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' @@ -1169,132 +1288,6 @@ transcript: sec: 3678 time: '1:01:18' who: Alexey -description: Discover how to build and scale a data science community, boost diversity, - deploy ML, and accelerate career growth with mentoring & hiring strategies. -intro: 'How do you build and scale a data science community that actually advances - diversity, supports machine learning deployment, and accelerates career growth? - In this episode, Dânia Meira — AI Guild co‑founder, data scientist, teacher and speaker - with a Master’s in Computer Science (AI) — walks through her journey from applied - math and marketing analytics to founding a global data science community in Berlin. -

We cover practical community building: turning women’s meetups into monthly - dinners and an international membership, curating meetup content and the Datalift - Summit, and policies like visibility-first speaker invites, codes of conduct, and - misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) - improves product fit and market reach, how to create psychological safety, and how - to source and train diverse talent for regulated industries. She also outlines a - vendor‑agnostic consulting model for machine learning deployment, community‑to‑client - matching, and scaling from a freelance network to full‑time teams. Recommended readings - include Weapons of Math Destruction and Coded Bias.

Listen to gain actionable - tactics for community building, inclusive leadership, ML deployment strategies, - and career growth pathways for data scientists and AI practitioners.' -dateadded: '2023-02-25' -duration: PT00H59M44S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=0 - endOffset: 94 -- name: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' - startOffset: 94 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=94 - endOffset: 152 -- name: 'Early Career: Applied math, Spark vs Hadoop thesis and marketing analytics' - startOffset: 152 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=152 - endOffset: 216 -- name: 'Move to Berlin: Startup roles and building end‑to‑end data skills' - startOffset: 216 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=216 - endOffset: 319 -- name: 'Role Evolution: Data scientist generalist to specialized career paths' - startOffset: 319 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=319 - endOffset: 392 -- name: 'Teaching & Mentoring: Bootcamps, Data Science for Good, and skills sharing' - startOffset: 392 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=392 - endOffset: 677 -- name: 'Community Origin Story: From women’s meetups to a broader support network' - startOffset: 677 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=677 - endOffset: 921 -- name: 'Community Growth: Monthly dinners, global expansion, and membership scale' - startOffset: 921 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=921 - endOffset: 1005 -- name: 'Datalift Summit Origin: Organizing the first in‑person conference post‑COVID' - startOffset: 1005 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1005 - endOffset: 1191 -- name: 'Meetup Content Strategy: Curating panels on career options and practical - topics' - startOffset: 1191 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1191 - endOffset: 1395 -- name: 'Diversity in Berlin: Internationality, gender balance, and workplace culture' - startOffset: 1395 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1395 - endOffset: 1579 -- name: 'Broader Diversity Dimensions: Backgrounds, nationality, and neurodiversity' - startOffset: 1579 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1579 - endOffset: 1639 -- name: 'Business Case for Diversity: Inclusive teams, product fit, and market reach' - startOffset: 1639 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1639 - endOffset: 1884 -- name: 'Psychological Safety: Leadership signals and enabling open conversations' - startOffset: 1884 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1884 - endOffset: 2023 -- name: 'Consulting Model: Vendor‑agnostic machine learning deployment support' - startOffset: 2023 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2023 - endOffset: 2061 -- name: 'Recruitment & Training: Sourcing diverse talent pools for regulated industries' - startOffset: 2061 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2061 - endOffset: 2376 -- name: 'Visibility Policy: Inviting women speakers first to increase participation' - startOffset: 2376 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2376 - endOffset: 2601 -- name: 'Career Growth Advice: Networks, visibility, and stepping into leadership' - startOffset: 2601 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2601 - endOffset: 2736 -- name: 'Code of Conduct: Crafting practical rules and expected community behavior' - startOffset: 2736 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2736 - endOffset: 2970 -- name: 'Responding to Misconduct: Reporting, case‑by‑case handling, and consequences' - startOffset: 2970 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2970 - endOffset: 3228 -- name: 'Community‑to‑Client Matching: Leveraging member expertise for projects' - startOffset: 3228 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3228 - endOffset: 3373 -- name: 'Team Roles: Founders’ split — machine learning delivery and career coaching' - startOffset: 3373 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3373 - endOffset: 3476 -- name: 'Scaling Strategy: Freelance network today, hiring full‑time as demand grows' - startOffset: 3476 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3476 - endOffset: 3551 -- name: 'Recommended Resources: Weapons of Math Destruction and Coded Bias' - startOffset: 3551 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3551 - endOffset: 3642 -- name: 'Datalift Summit 2023: Call for speakers, workshops, and production use cases' - startOffset: 3642 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3642 - endOffset: 3678 -- name: Closing Remarks and Sign‑off - startOffset: 3678 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3678 - endOffset: 3584 --- Links: diff --git a/_podcast/s04e04-ml-startup.md b/_podcast/building-mlops-startup.md similarity index 98% rename from _podcast/s04e04-ml-startup.md rename to _podcast/building-mlops-startup.md index d5ea9c3c..aecba523 100644 --- a/_podcast/s04e04-ml-startup.md +++ b/_podcast/building-mlops-startup.md @@ -1,12 +1,11 @@ --- -title: 'How to Build a Successful ML Startup: MLOps, Model Monitoring, Open Source - & Founder Fit' +title: 'How to Build a Successful ML Startup: MLOps, Model Monitoring, Open Source & Founder Fit' short: I Want to Build a Machine Learning Startup! +season: 4 +episode: 4 guests: - elenasamuylova image: images/podcast/s04e04-ml-startup.jpg -season: 4 -episode: 4 ids: youtube: DiDs5aMjEWg anchor: I-Want-to-Build-a-Machine-Learning-Startup----Elena-Samuylova-e139ste @@ -15,6 +14,150 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/I-Want-to-Build-a-Machine-Learning-Startup----Elena-Samuylova-e139ste spotify: https://open.spotify.com/episode/7fwbqo5tDrtakuqWaIuEjc apple: https://podcasts.apple.com/us/podcast/i-want-to-build-a-machine-learning-startup-elena-samuylova/id1541710331?i=1000529106923 + +description: 'Discover practical MLOps, model monitoring and founder‑fit tactics to build an ML startup: hire, fund, productize, and reach product‑market fit faster.' +intro: 'What does it take to build a successful ML startup—especially around MLOps, model monitoring, open source, and founder fit? Elena Samuylova, Co‑founder & CEO of Evidently AI, joins to answer that question drawing on her applied machine learning experience since 2014, including roles at Yandex Data Factory and an industrial AI startup.

This episode walks through practical founder decisions: sourcing problem‑first ideas, finding compatible co‑founders and establishing pre‑launch alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena explains what “AI‑first” positioning really means, how developer tools and open source shape go‑to‑market strategies (open core, cloud, monetization and cloning risks), and how Evidently validated model monitoring as a business. You’ll hear tactical guidance on customer discovery, persuading engineers to adopt your tool, data safety and on‑prem deployments, hiring and scaling tradeoffs, funding paths, productizing services for non‑technical founders, and normalizing failure and work–life tradeoffs.

Listen to gain actionable frameworks for building an ML startup—covering model monitoring, MLOps, open source strategy, founder‑market fit, and the concrete signals that indicate product–market fit.' +topics: +- startup +- machine learning +- MLOps +- open-source +- entrepreneurship +- founder +dateadded: 2021-07-16 + +duration: PT00H58M30S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=0 + endOffset: 121 +- name: 'Guest Background: Elena Samuylova’s ML & Startup Journey' + startOffset: 121 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=121 + endOffset: 202 +- name: 'Career Highlights: Yandex, Data Factory, and Industrial AI' + startOffset: 202 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=202 + endOffset: 318 +- name: 'Motivations: Startup vs. Employee Trade-offs' + startOffset: 318 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=318 + endOffset: 443 +- name: 'Sourcing Ideas: Problem-First Approach for ML Startups' + startOffset: 443 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=443 + endOffset: 704 +- name: 'Co-founder Search: Compatibility, Founder–Market Fit, and Finding Partners' + startOffset: 704 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=704 + endOffset: 1015 +- name: 'Pre-Launch Alignment: Commitment, Company Type, and Fundraising Path' + startOffset: 1015 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1015 + endOffset: 1294 +- name: 'Market Choice: Vertical Solutions vs. Infrastructure & MLOps' + startOffset: 1294 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1294 + endOffset: 1390 +- name: 'AI-First Positioning: What It Really Means' + startOffset: 1390 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1390 + endOffset: 1473 +- name: 'Developer Tools Market: Selling to Engineers and Open Source Dynamics' + startOffset: 1473 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1473 + endOffset: 1581 +- name: 'Founder Skills: Self‑Starter Mindset and Learning Agility' + startOffset: 1581 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1581 + endOffset: 1697 +- name: 'Startup Risks: Financial, Cultural, and Career Considerations' + startOffset: 1697 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1697 + endOffset: 1910 +- name: 'Failure Preparedness: Normalizing Risk and Learning from Failure' + startOffset: 1910 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1910 + endOffset: 1967 +- name: 'Work–Life Tradeoffs: Time Commitment in Early Stages' + startOffset: 1967 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1967 + endOffset: 2046 +- name: 'Part‑Time Startups: Weekend MVPs, Bootstrapping, and Grants' + startOffset: 2046 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2046 + endOffset: 2147 +- name: 'Funding Models: Accelerators, Angels, and Equity Considerations' + startOffset: 2147 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2147 + endOffset: 2288 +- name: 'Non‑Technical Founders: No-Code MVPs and Productizing Services' + startOffset: 2288 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2288 + endOffset: 2365 +- name: 'Productizing Services: From Manual Delivery to Scalable SaaS' + startOffset: 2365 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2365 + endOffset: 2413 +- name: 'Hiring Expertise: When to Bring in Domain or Technical Help' + startOffset: 2413 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2413 + endOffset: 2535 +- name: 'Customer Discovery: Interview Counts and Signals for Product–Market Fit' + startOffset: 2535 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2535 + endOffset: 2639 +- name: 'Evidently Origin: Validating Model Monitoring as a Business' + startOffset: 2639 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2639 + endOffset: 2792 +- name: 'Founder Role at Evidently: CEO Tasks, Content, and Community' + startOffset: 2792 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2792 + endOffset: 2891 +- name: 'Open Source Strategy: Open Core, Cloud, and Monetization Paths' + startOffset: 2891 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2891 + endOffset: 2969 +- name: 'Open Source Risks: Cloning, Cloud Providers, and Licensing' + startOffset: 2969 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2969 + endOffset: 3108 +- name: 'Bottom‑Up Adoption: Engineers First, Enterprise Later' + startOffset: 3108 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3108 + endOffset: 3189 +- name: 'Demonstrating Value: Persuading Clients to Share Data' + startOffset: 3189 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3189 + endOffset: 3237 +- name: 'Geographic Differences: Market Dynamics and Data Attitudes' + startOffset: 3237 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3237 + endOffset: 3377 +- name: 'Data Safety Options: On‑Premise Deployments with Open Source' + startOffset: 3377 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3377 + endOffset: 3426 +- name: 'Scaling Teams: When to Hire Engineers vs. Stay Small' + startOffset: 3426 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3426 + endOffset: 3494 +- name: 'Market Intelligence: Following Startups, Investors, and Trends' + startOffset: 3494 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3494 + endOffset: 3572 +- name: 'Final Advice: Build from Genuine Interest, Not Just Hype' + startOffset: 3572 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3572 + endOffset: 3611 +- name: 'Contact & Resources: Evidently, LinkedIn, and Twitter' + startOffset: 3611 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3611 + endOffset: 3510 + transcript: - header: Podcast Introduction - header: 'Guest Background: Elena Samuylova’s ML & Startup Journey' @@ -1366,155 +1509,6 @@ transcript: sec: 3631 time: '1:00:31' who: Elena -description: 'Discover practical MLOps, model monitoring and founder‑fit tactics to - build an ML startup: hire, fund, productize, and reach product‑market fit faster.' -intro: 'What does it take to build a successful ML startup—especially around MLOps, - model monitoring, open source, and founder fit? Elena Samuylova, Co‑founder & CEO - of Evidently AI, joins to answer that question drawing on her applied machine learning - experience since 2014, including roles at Yandex Data Factory and an industrial - AI startup.

This episode walks through practical founder decisions: sourcing - problem‑first ideas, finding compatible co‑founders and establishing pre‑launch - alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena - explains what “AI‑first” positioning really means, how developer tools and open - source shape go‑to‑market strategies (open core, cloud, monetization and cloning - risks), and how Evidently validated model monitoring as a business. You’ll hear - tactical guidance on customer discovery, persuading engineers to adopt your tool, - data safety and on‑prem deployments, hiring and scaling tradeoffs, funding paths, - productizing services for non‑technical founders, and normalizing failure and work–life - tradeoffs.

Listen to gain actionable frameworks for building an ML startup—covering - model monitoring, MLOps, open source strategy, founder‑market fit, and the concrete - signals that indicate product–market fit.' -dateadded: '2021-07-16' -duration: PT00H58M30S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=0 - endOffset: 121 -- name: 'Guest Background: Elena Samuylova’s ML & Startup Journey' - startOffset: 121 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=121 - endOffset: 202 -- name: 'Career Highlights: Yandex, Data Factory, and Industrial AI' - startOffset: 202 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=202 - endOffset: 318 -- name: 'Motivations: Startup vs. Employee Trade-offs' - startOffset: 318 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=318 - endOffset: 443 -- name: 'Sourcing Ideas: Problem-First Approach for ML Startups' - startOffset: 443 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=443 - endOffset: 704 -- name: 'Co-founder Search: Compatibility, Founder–Market Fit, and Finding Partners' - startOffset: 704 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=704 - endOffset: 1015 -- name: 'Pre-Launch Alignment: Commitment, Company Type, and Fundraising Path' - startOffset: 1015 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1015 - endOffset: 1294 -- name: 'Market Choice: Vertical Solutions vs. Infrastructure & MLOps' - startOffset: 1294 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1294 - endOffset: 1390 -- name: 'AI-First Positioning: What It Really Means' - startOffset: 1390 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1390 - endOffset: 1473 -- name: 'Developer Tools Market: Selling to Engineers and Open Source Dynamics' - startOffset: 1473 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1473 - endOffset: 1581 -- name: 'Founder Skills: Self‑Starter Mindset and Learning Agility' - startOffset: 1581 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1581 - endOffset: 1697 -- name: 'Startup Risks: Financial, Cultural, and Career Considerations' - startOffset: 1697 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1697 - endOffset: 1910 -- name: 'Failure Preparedness: Normalizing Risk and Learning from Failure' - startOffset: 1910 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1910 - endOffset: 1967 -- name: 'Work–Life Tradeoffs: Time Commitment in Early Stages' - startOffset: 1967 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1967 - endOffset: 2046 -- name: 'Part‑Time Startups: Weekend MVPs, Bootstrapping, and Grants' - startOffset: 2046 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2046 - endOffset: 2147 -- name: 'Funding Models: Accelerators, Angels, and Equity Considerations' - startOffset: 2147 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2147 - endOffset: 2288 -- name: 'Non‑Technical Founders: No-Code MVPs and Productizing Services' - startOffset: 2288 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2288 - endOffset: 2365 -- name: 'Productizing Services: From Manual Delivery to Scalable SaaS' - startOffset: 2365 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2365 - endOffset: 2413 -- name: 'Hiring Expertise: When to Bring in Domain or Technical Help' - startOffset: 2413 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2413 - endOffset: 2535 -- name: 'Customer Discovery: Interview Counts and Signals for Product–Market Fit' - startOffset: 2535 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2535 - endOffset: 2639 -- name: 'Evidently Origin: Validating Model Monitoring as a Business' - startOffset: 2639 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2639 - endOffset: 2792 -- name: 'Founder Role at Evidently: CEO Tasks, Content, and Community' - startOffset: 2792 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2792 - endOffset: 2891 -- name: 'Open Source Strategy: Open Core, Cloud, and Monetization Paths' - startOffset: 2891 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2891 - endOffset: 2969 -- name: 'Open Source Risks: Cloning, Cloud Providers, and Licensing' - startOffset: 2969 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2969 - endOffset: 3108 -- name: 'Bottom‑Up Adoption: Engineers First, Enterprise Later' - startOffset: 3108 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3108 - endOffset: 3189 -- name: 'Demonstrating Value: Persuading Clients to Share Data' - startOffset: 3189 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3189 - endOffset: 3237 -- name: 'Geographic Differences: Market Dynamics and Data Attitudes' - startOffset: 3237 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3237 - endOffset: 3377 -- name: 'Data Safety Options: On‑Premise Deployments with Open Source' - startOffset: 3377 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3377 - endOffset: 3426 -- name: 'Scaling Teams: When to Hire Engineers vs. Stay Small' - startOffset: 3426 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3426 - endOffset: 3494 -- name: 'Market Intelligence: Following Startups, Investors, and Trends' - startOffset: 3494 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3494 - endOffset: 3572 -- name: 'Final Advice: Build from Genuine Interest, Not Just Hype' - startOffset: 3572 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3572 - endOffset: 3611 -- name: 'Contact & Resources: Evidently, LinkedIn, and Twitter' - startOffset: 3611 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3611 - endOffset: 3510 --- diff --git a/_podcast/s11e04-large-scale-entity-resolution.md b/_podcast/building-open-source-data-product-for-identity-resolution.md similarity index 97% rename from _podcast/s11e04-large-scale-entity-resolution.md rename to _podcast/building-open-source-data-product-for-identity-resolution.md index 4713f36e..ae943ac9 100644 --- a/_podcast/s11e04-large-scale-entity-resolution.md +++ b/_podcast/building-open-source-data-product-for-identity-resolution.md @@ -1,20 +1,164 @@ --- +title: "Building an Open-Source ML-Powered Identity Resolution Tool in the Modern Data Stack" +short: "Building an Open-Source ML-Powered Identity Resolution Tool" +season: 11 episode: 4 guests: - sonalgoyal +image: images/podcast/s11e04-large-scale-entity-resolution.jpg ids: anchor: Large-Scale-Entity-Resolution---Sonal-Goyal-e1pibrh youtube: lpjffCOPxlY -image: images/podcast/s11e04-large-scale-entity-resolution.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Large-Scale-Entity-Resolution---Sonal-Goyal-e1pibrh apple: https://podcasts.apple.com/us/podcast/large-scale-entity-resolution-sonal-goyal/id1541710331?i=1000584270745 spotify: https://open.spotify.com/episode/54DufG1ZVj0GMSoWTbJsen?si=d7XNSW2_Tfa4qKJxmFQpIA youtube: https://www.youtube.com/watch?v=lpjffCOPxlY -season: 11 -short: Large-Scale Entity Resolution -title: Eliminate Duplicate Records with ML-Powered Identity Resolution — Snowflake-native - & Open Source + +description: Discover how to build an open-source, ML-powered identity resolution tool. Learn about the practical challenges across industries. +intro: 'How do you build an open-source, ML-powered identity resolution tool that becomes the single source of truth in a modern data stack? In this episode Sonal Goyal—founder of Zingg and a 23-year data product veteran—walks through the practical challenges of identity resolution and entity resolution across industries like investment banking, telecom, gaming, and insurance. Sonal explains why ML-powered approaches matter, how an open-source framework like Zingg can fit into your modern data stack, and what it takes to reconcile customer and supplier records into a reliable single source of truth.

Expect discussion of architecture and integration trade-offs, the role of machine learning in matching and deduplication, and lessons from building production data products at scale. If you manage customer data, data integration, or are evaluating open-source identity resolution solutions, this episode offers concrete insights and pointers—including Zingg’s open-source repository—to help you evaluate adoption, reduce duplicate records, and improve downstream analytics and personalization' +dateadded: 2022-10-29 + +duration: PT01H23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=0 + endOffset: 71 +- name: 'Guest Overview: Sonal Goyal and Zingg identity resolution' + startOffset: 71 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=71 + endOffset: 126 +- name: 'Career Overview: 24 years in tech, data consulting background' + startOffset: 126 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=126 + endOffset: 178 +- name: 'Origin Story: Consulting projects reveal recurring identity gaps' + startOffset: 178 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=178 + endOffset: 291 +- name: 'Modern Data Stack: Centralized data exposing identity challenges' + startOffset: 291 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=291 + endOffset: 343 +- name: 'Product Overview: Zingg — ML-powered identity resolution' + startOffset: 343 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=343 + endOffset: 434 +- name: 'Terminology: Entity resolution vs identity resolution' + startOffset: 434 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=434 + endOffset: 472 +- name: 'Duplicate Detection vs Deduplication: Outcomes and use cases' + startOffset: 472 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=472 + endOffset: 548 +- name: 'Motivation: Recurring duplicate problems across domains' + startOffset: 548 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=548 + endOffset: 669 +- name: 'Solution Generality: Customers, products, patients and suppliers' + startOffset: 669 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=669 + endOffset: 818 +- name: 'Related Terms: Record linkage, entity matching, entity disambiguation' + startOffset: 818 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=818 + endOffset: 842 +- name: 'Core Approach: ML training, blocking, indexing for scale' + startOffset: 842 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=842 + endOffset: 1093 +- name: 'Implementation: Spark distribution, Snowflake-native & Python API' + startOffset: 1093 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1093 + endOffset: 1241 +- name: 'Interfaces & Integrations: CLI, Python SDK, Databricks, dbt, UI plans' + startOffset: 1241 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1241 + endOffset: 1311 +- name: 'Founder Transition: From consultancy to full-time product build' + startOffset: 1311 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1311 + endOffset: 1380 +- name: 'Development Timeline: Proof-of-concept to public release (~18 months)' + startOffset: 1380 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1380 + endOffset: 1454 +- name: 'Open Source Strategy: Community, adoption, and business rationale' + startOffset: 1454 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1454 + endOffset: 1620 +- name: 'Licensing Choice: AGPL to prevent SaaS rehosting and protect IP' + startOffset: 1620 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1620 + endOffset: 1870 +- name: 'Open Source Trade-offs: IP concerns vs discoverability and growth' + startOffset: 1870 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1870 + endOffset: 1920 +- name: 'Team Evolution: Solo founder, consultants, and initial hires' + startOffset: 1920 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1920 + endOffset: 1979 +- name: 'Founder Role: Product, ecosystem integrations, community and hiring' + startOffset: 1979 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1979 + endOffset: 2114 +- name: 'Team & Hiring: First developer hire and fully remote setup' + startOffset: 2114 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2114 + endOffset: 2241 +- name: 'Scaling Challenge: Recruiting the right engineering talent' + startOffset: 2241 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2241 + endOffset: 2323 +- name: 'Prevention Limits: Data governance won’t fully eliminate identity issues' + startOffset: 2323 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2323 + endOffset: 2436 +- name: 'Beyond Joins: When fuzzy joins and basic ETL aren’t enough' + startOffset: 2436 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2436 + endOffset: 2665 +- name: 'Deterministic Rules vs Probabilistic ML: Trade-offs for accuracy' + startOffset: 2665 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2665 + endOffset: 2750 +- name: 'Fraud Use Cases: Identity resolution for AML and fraud detection' + startOffset: 2750 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2750 + endOffset: 2963 +- name: 'Graph + ML: Pairwise matching, graph clustering and downstream use' + startOffset: 2963 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2963 + endOffset: 3020 +- name: 'Data Mapping: Need to specify field correspondences for matching' + startOffset: 3020 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3020 + endOffset: 3099 +- name: 'Impact Case Studies: Public-data donors, e‑commerce and classifieds' + startOffset: 3099 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3099 + endOffset: 3251 +- name: 'Retrospective: Seeking cofounder earlier and open-sourcing sooner' + startOffset: 3251 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3251 + endOffset: 3367 +- name: 'Founder Advice: Validate use cases, distribution channels, and conviction' + startOffset: 3367 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3367 + endOffset: 3566 +- name: 'Recommended Reading: Creative Selection on product design' + startOffset: 3566 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3566 + endOffset: 3638 +- name: 'Closing Remarks: Follow-ups, demos and contact options' + startOffset: 3638 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3638 + endOffset: 3623 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Sonal Goyal and Zingg identity resolution' @@ -1117,162 +1261,6 @@ transcript: sec: 3694 time: '1:01:34' who: Sonal -description: Discover ML-powered identity resolution to remove duplicate records, - Snowflake-native, open-source deduplication for scalable fraud detection and integrations. -intro: 'How do you eliminate duplicate records across modern data stacks without breaking - pipelines or overfitting rules? In this episode, Sonal Goyal, founder of Zingg and - a 24‑year veteran in data consulting, walks through ML‑powered identity resolution - and entity resolution approaches to create a single source of truth for customers, - suppliers, products and patients.

We cover core distinctions—entity vs - identity resolution, duplicate detection vs deduplication—and practical architecture: - ML training, blocking and indexing for scale, Spark distribution, Snowflake‑native - deployment and a Python API. Sonal explains integrations (CLI, Python SDK, Databricks, - dbt), graph + ML workflows, data mapping needs, and real-world impact examples from - public‑data donors, e‑commerce and classifieds. She also discusses open source strategy - and licensing (AGPL), trade‑offs between deterministic rules and probabilistic ML, - and fraud/AML use cases.

If you’re wrestling with record linkage, entity - matching, or persistent duplicate records in a centralized data stack, this episode - offers concrete implementation patterns, scaling considerations, and open‑source - tradeoffs to help you choose the right identity resolution path.' -dateadded: '2022-10-29' -duration: PT01H23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=0 - endOffset: 71 -- name: 'Guest Overview: Sonal Goyal and Zingg identity resolution' - startOffset: 71 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=71 - endOffset: 126 -- name: 'Career Overview: 24 years in tech, data consulting background' - startOffset: 126 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=126 - endOffset: 178 -- name: 'Origin Story: Consulting projects reveal recurring identity gaps' - startOffset: 178 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=178 - endOffset: 291 -- name: 'Modern Data Stack: Centralized data exposing identity challenges' - startOffset: 291 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=291 - endOffset: 343 -- name: 'Product Overview: Zingg — ML-powered identity resolution' - startOffset: 343 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=343 - endOffset: 434 -- name: 'Terminology: Entity resolution vs identity resolution' - startOffset: 434 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=434 - endOffset: 472 -- name: 'Duplicate Detection vs Deduplication: Outcomes and use cases' - startOffset: 472 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=472 - endOffset: 548 -- name: 'Motivation: Recurring duplicate problems across domains' - startOffset: 548 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=548 - endOffset: 669 -- name: 'Solution Generality: Customers, products, patients and suppliers' - startOffset: 669 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=669 - endOffset: 818 -- name: 'Related Terms: Record linkage, entity matching, entity disambiguation' - startOffset: 818 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=818 - endOffset: 842 -- name: 'Core Approach: ML training, blocking, indexing for scale' - startOffset: 842 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=842 - endOffset: 1093 -- name: 'Implementation: Spark distribution, Snowflake-native & Python API' - startOffset: 1093 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1093 - endOffset: 1241 -- name: 'Interfaces & Integrations: CLI, Python SDK, Databricks, dbt, UI plans' - startOffset: 1241 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1241 - endOffset: 1311 -- name: 'Founder Transition: From consultancy to full-time product build' - startOffset: 1311 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1311 - endOffset: 1380 -- name: 'Development Timeline: Proof-of-concept to public release (~18 months)' - startOffset: 1380 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1380 - endOffset: 1454 -- name: 'Open Source Strategy: Community, adoption, and business rationale' - startOffset: 1454 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1454 - endOffset: 1620 -- name: 'Licensing Choice: AGPL to prevent SaaS rehosting and protect IP' - startOffset: 1620 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1620 - endOffset: 1870 -- name: 'Open Source Trade-offs: IP concerns vs discoverability and growth' - startOffset: 1870 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1870 - endOffset: 1920 -- name: 'Team Evolution: Solo founder, consultants, and initial hires' - startOffset: 1920 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1920 - endOffset: 1979 -- name: 'Founder Role: Product, ecosystem integrations, community and hiring' - startOffset: 1979 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1979 - endOffset: 2114 -- name: 'Team & Hiring: First developer hire and fully remote setup' - startOffset: 2114 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2114 - endOffset: 2241 -- name: 'Scaling Challenge: Recruiting the right engineering talent' - startOffset: 2241 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2241 - endOffset: 2323 -- name: 'Prevention Limits: Data governance won’t fully eliminate identity issues' - startOffset: 2323 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2323 - endOffset: 2436 -- name: 'Beyond Joins: When fuzzy joins and basic ETL aren’t enough' - startOffset: 2436 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2436 - endOffset: 2665 -- name: 'Deterministic Rules vs Probabilistic ML: Trade-offs for accuracy' - startOffset: 2665 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2665 - endOffset: 2750 -- name: 'Fraud Use Cases: Identity resolution for AML and fraud detection' - startOffset: 2750 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2750 - endOffset: 2963 -- name: 'Graph + ML: Pairwise matching, graph clustering and downstream use' - startOffset: 2963 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2963 - endOffset: 3020 -- name: 'Data Mapping: Need to specify field correspondences for matching' - startOffset: 3020 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3020 - endOffset: 3099 -- name: 'Impact Case Studies: Public-data donors, e‑commerce and classifieds' - startOffset: 3099 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3099 - endOffset: 3251 -- name: 'Retrospective: Seeking cofounder earlier and open-sourcing sooner' - startOffset: 3251 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3251 - endOffset: 3367 -- name: 'Founder Advice: Validate use cases, distribution channels, and conviction' - startOffset: 3367 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3367 - endOffset: 3566 -- name: 'Recommended Reading: Creative Selection on product design' - startOffset: 3566 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3566 - endOffset: 3638 -- name: 'Closing Remarks: Follow-ups, demos and contact options' - startOffset: 3638 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3638 - endOffset: 3623 --- Links: diff --git a/_podcast/s13e09-building-open-source-nlp-tool.md b/_podcast/building-open-source-nlp-tool.md similarity index 97% rename from _podcast/s13e09-building-open-source-nlp-tool.md rename to _podcast/building-open-source-nlp-tool.md index a5a1dd35..24b903c3 100644 --- a/_podcast/s13e09-building-open-source-nlp-tool.md +++ b/_podcast/building-open-source-nlp-tool.md @@ -1,19 +1,150 @@ --- +title: 'Build Open-Source NLP Tools: Weak Supervision, LLM Heuristics & Enterprise ML Product Strategy' +short: Build Open-Source NLP Tools +season: 13 episode: 9 guests: - johanneshotter +image: images/podcast/s13e09-building-open-source-nlp-tool.jpg ids: anchor: ow/datatalksclub/episodes/Building-an-Open-Source-NLP-Tool---Johannes-Htter-e22lbn4 youtube: WIpnyiHp4IE -image: images/podcast/s13e09-building-open-source-nlp-tool.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Building-an-Open-Source-NLP-Tool---Johannes-Htter-e22lbn4 apple: https://podcasts.apple.com/us/podcast/building-an-open-source-nlp-tool-johannes-h%C3%B6tter/id1541710331?i=1000610117894 spotify: https://open.spotify.com/episode/5SjY4vatlUYFCZUMV7dE7W?si=MC4ZZrKbSTKUEDVEfedGwA youtube: https://www.youtube.com/watch?v=WIpnyiHp4IE -season: 13 -short: Building an Open-Source NLP Tool -title: 'Improve NLP Labeling with Weak Supervision: Refinery, Bricks & GPT Heuristics' + +description: Discover weak supervision, NLP labeling & GPT heuristics to build high-quality datasets faster — combine Refinery, Bricks, ensemble heuristics & active learning +intro: 'How can teams scale high-quality NLP labeling without hand-labeling every example? In this episode, Johannes Hötter, data scientist, engineer, and co-founder of kern, explains practical approaches to that problem using weak supervision, heuristics, and open-source tooling. We walk through demos of Refinery and Bricks, with a close look at Refinery’s weak supervision and labeling workflows, and why Jupyter widgets leave a gap for NLP tooling.

You’ll hear about common NLP challenges—messy labels and text metadata—and how ChatGPT can be used as a labeling heuristic. Johannes outlines combining heuristics: GPT-driven rules, active learning, and crowd labels as an ensemble of “workers,” plus foundations like Hugging Face, embeddings, and robust data management. Bricks is presented as a heuristic library with recipes and ensemble methods to streamline labeling.

The conversation also covers productization choices (open-source vs commercial), targeting engineers, enterprise workflows, community support, and niche document/PDF NLP issues. Listen to learn actionable strategies to improve NLP labeling quality, adopt weak supervision and GPT heuristics, and make tooling and go-to-market decisions for scalable data labeling and model training.' +topics: +- NLP +- machine learning +- strategy +- entrepreneurship +- founder +dateadded: 2023-04-23 + +duration: PT01H27S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=0 + endOffset: 96 +- name: Background & early AI curiosity + startOffset: 96 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=96 + endOffset: 273 +- name: 'Open-source demos overview: Refinery and Bricks' + startOffset: 273 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=273 + endOffset: 393 +- name: 'Refinery features: weak supervision & labeling workflows' + startOffset: 393 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=393 + endOffset: 540 +- name: Jupyter widgets gap and NLP tooling needs + startOffset: 540 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=540 + endOffset: 614 +- name: 'NLP challenges: text metadata and messy labels' + startOffset: 614 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=614 + endOffset: 802 +- name: ChatGPT as a labeling heuristic + startOffset: 802 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=802 + endOffset: 958 +- name: 'Combining heuristics: GPT, active learning, crowd labels' + startOffset: 958 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=958 + endOffset: 1054 +- name: 'Foundations: Hugging Face, embeddings, and data management' + startOffset: 1054 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1054 + endOffset: 1113 +- name: 'Bricks: heuristic library, recipes, and ensemble methods' + startOffset: 1113 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1113 + endOffset: 1188 +- name: 'Weak supervision analogy: heuristics as ensemble workers' + startOffset: 1188 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1188 + endOffset: 1222 +- name: 'Productization: consultancy to Kern and product pivot' + startOffset: 1222 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1222 + endOffset: 1440 +- name: 'Targeting engineers: control over training data' + startOffset: 1440 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1440 + endOffset: 1582 +- name: 'Choosing open source: motivations and concerns' + startOffset: 1582 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1582 + endOffset: 1691 +- name: 'Open-source trade-offs: distribution versus revenue' + startOffset: 1691 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1691 + endOffset: 1799 +- name: 'Open-source adoption: free users vs paying customers' + startOffset: 1799 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1799 + endOffset: 1907 +- name: 'Business model: open-core, multi-user SaaS, and services' + startOffset: 1907 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1907 + endOffset: 2043 +- name: 'Enterprise engagements: workshops, customization, and domain expertise' + startOffset: 2043 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2043 + endOffset: 2160 +- name: 'Community support: Discord, workarounds, and feedback loops' + startOffset: 2160 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2160 + endOffset: 2303 +- name: 'Enterprise outreach: networking and segment strategies' + startOffset: 2303 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2303 + endOffset: 2421 +- name: 'Developer-focused sales: DevRel, education, and trust-building' + startOffset: 2421 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2421 + endOffset: 2592 +- name: 'Team structure: development, developer relations, go-to-market' + startOffset: 2592 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2592 + endOffset: 2840 +- name: 'Founder role evolution: prototyping, GTM, and coding balance' + startOffset: 2840 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2840 + endOffset: 2991 +- name: 'Co-founder division: complementary strengths and responsibilities' + startOffset: 2991 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2991 + endOffset: 3160 +- name: 'Niche use cases: PDF and document NLP challenges' + startOffset: 3160 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3160 + endOffset: 3363 +- name: Open source as trust-builder with developer teams + startOffset: 3363 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3363 + endOffset: 3422 +- name: 'Fundraising recap: 2.7M raise and investor interest in open source ML' + startOffset: 3422 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3422 + endOffset: 3598 +- name: 'Recommended reading: Prediction Machines (applied AI economics)' + startOffset: 3598 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3598 + endOffset: 3675 +- name: Podcast Outro and closing remarks + startOffset: 3675 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3675 + endOffset: 3627 + transcript: - header: Podcast Introduction - line: Today we will talk about open source and creating startups in open source @@ -1163,143 +1294,6 @@ transcript: sec: 3688 time: '1:01:28' who: Johannes -description: Discover weak supervision, NLP labeling & GPT heuristics to build high-quality - datasets faster — combine Refinery, Bricks, ensemble heuristics & active learning. -intro: 'How can teams scale high-quality NLP labeling without hand-labeling every - example? In this episode, Johannes Hötter, data scientist, engineer, and co-founder - of kern, explains practical approaches to that problem using weak supervision, heuristics, - and open-source tooling. We walk through demos of Refinery and Bricks, with a close - look at Refinery’s weak supervision and labeling workflows, and why Jupyter widgets - leave a gap for NLP tooling.

You’ll hear about common NLP challenges—messy - labels and text metadata—and how ChatGPT can be used as a labeling heuristic. Johannes - outlines combining heuristics: GPT-driven rules, active learning, and crowd labels - as an ensemble of “workers,” plus foundations like Hugging Face, embeddings, and - robust data management. Bricks is presented as a heuristic library with recipes - and ensemble methods to streamline labeling.

The conversation also covers - productization choices (open-source vs commercial), targeting engineers, enterprise - workflows, community support, and niche document/PDF NLP issues. Listen to learn - actionable strategies to improve NLP labeling quality, adopt weak supervision and - GPT heuristics, and make tooling and go-to-market decisions for scalable data labeling - and model training.' -dateadded: '2023-04-23' -duration: PT01H27S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=0 - endOffset: 96 -- name: Background & early AI curiosity - startOffset: 96 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=96 - endOffset: 273 -- name: 'Open-source demos overview: Refinery and Bricks' - startOffset: 273 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=273 - endOffset: 393 -- name: 'Refinery features: weak supervision & labeling workflows' - startOffset: 393 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=393 - endOffset: 540 -- name: Jupyter widgets gap and NLP tooling needs - startOffset: 540 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=540 - endOffset: 614 -- name: 'NLP challenges: text metadata and messy labels' - startOffset: 614 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=614 - endOffset: 802 -- name: ChatGPT as a labeling heuristic - startOffset: 802 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=802 - endOffset: 958 -- name: 'Combining heuristics: GPT, active learning, crowd labels' - startOffset: 958 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=958 - endOffset: 1054 -- name: 'Foundations: Hugging Face, embeddings, and data management' - startOffset: 1054 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1054 - endOffset: 1113 -- name: 'Bricks: heuristic library, recipes, and ensemble methods' - startOffset: 1113 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1113 - endOffset: 1188 -- name: 'Weak supervision analogy: heuristics as ensemble workers' - startOffset: 1188 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1188 - endOffset: 1222 -- name: 'Productization: consultancy to Kern and product pivot' - startOffset: 1222 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1222 - endOffset: 1440 -- name: 'Targeting engineers: control over training data' - startOffset: 1440 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1440 - endOffset: 1582 -- name: 'Choosing open source: motivations and concerns' - startOffset: 1582 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1582 - endOffset: 1691 -- name: 'Open-source trade-offs: distribution versus revenue' - startOffset: 1691 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1691 - endOffset: 1799 -- name: 'Open-source adoption: free users vs paying customers' - startOffset: 1799 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1799 - endOffset: 1907 -- name: 'Business model: open-core, multi-user SaaS, and services' - startOffset: 1907 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1907 - endOffset: 2043 -- name: 'Enterprise engagements: workshops, customization, and domain expertise' - startOffset: 2043 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2043 - endOffset: 2160 -- name: 'Community support: Discord, workarounds, and feedback loops' - startOffset: 2160 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2160 - endOffset: 2303 -- name: 'Enterprise outreach: networking and segment strategies' - startOffset: 2303 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2303 - endOffset: 2421 -- name: 'Developer-focused sales: DevRel, education, and trust-building' - startOffset: 2421 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2421 - endOffset: 2592 -- name: 'Team structure: development, developer relations, go-to-market' - startOffset: 2592 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2592 - endOffset: 2840 -- name: 'Founder role evolution: prototyping, GTM, and coding balance' - startOffset: 2840 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2840 - endOffset: 2991 -- name: 'Co-founder division: complementary strengths and responsibilities' - startOffset: 2991 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2991 - endOffset: 3160 -- name: 'Niche use cases: PDF and document NLP challenges' - startOffset: 3160 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3160 - endOffset: 3363 -- name: Open source as trust-builder with developer teams - startOffset: 3363 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3363 - endOffset: 3422 -- name: 'Fundraising recap: 2.7M raise and investor interest in open source ML' - startOffset: 3422 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3422 - endOffset: 3598 -- name: 'Recommended reading: Prediction Machines (applied AI economics)' - startOffset: 3598 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3598 - endOffset: 3675 -- name: Podcast Outro and closing remarks - startOffset: 3675 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3675 - endOffset: 3627 --- Links: diff --git a/_podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md b/_podcast/building-production-ml-platform-and-mlops-team.md similarity index 97% rename from _podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md rename to _podcast/building-production-ml-platform-and-mlops-team.md index 8a1e87a7..a6259ad0 100644 --- a/_podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md +++ b/_podcast/building-production-ml-platform-and-mlops-team.md @@ -1,19 +1,154 @@ --- +title: 'Building Production ML Platforms: Infrastructure, Workflows, Teams & Governance That Scale' +short: 'From Scratch to Success: Building an MLOps Team and ML Platform' +season: 14 episode: 8 guests: - simonstiebellehner +image: images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg ids: anchor: atatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c youtube: CB1YIsxQRtc -image: images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c apple: https://podcasts.apple.com/us/podcast/from-scratch-to-success-building-an-mlops-team-and/id1541710331?i=1000618899065 spotify: https://open.spotify.com/episode/0raudIf9XsKdUfr5m2YlUE?si=x1PuaBqwTVyMlfNlGape2A youtube: https://www.youtube.com/watch?v=CB1YIsxQRtc -season: 14 -short: 'From Scratch to Success: Building an MLOps Team and ML Platform' -title: 'Designing MLOps Platforms: Deploy, Track Experiments, Manage Models & Compliance' + +description: Discover MLOps strategies to build an ML platform with experiment tracking, improved reproducibility, faster releases and compliance-ready model operations +intro: How do you design an ML platform that reliably deploys models, tracks experiments, and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining & Data Warehousing — walks through practical MLOps platform design grounded in real-world deployment challenges.

We cover a clear definition of MLOps as people, processes, and technology, and dig into core platform skills (cloud infrastructure, Kubernetes, Terraform), user‑centric design for notebooks and data science workflows, and software engineering fundamentals for production ML. Simon explains experiment tracking, model registry practices, deployment patterns (batch vs online), orchestration choices like Airflow, and stitching SaaS and open‑source tools into a coherent ML platform. The episode also addresses compliance and data governance — GDPR, fintech security constraints — plus metadata, lineage, API design, and monitoring. We close with build vs buy trade‑offs, staffing and on‑call considerations, and how emerging LLM needs affect platforms.

Listen to learn concrete guidance on model deployment, reproducibility, orchestration, and compliance to help you design a pragmatic, scalable ML platform +topics: +- MLOps +- machine learning +- leadership +- career growth +dateadded: 2023-07-02 + +duration: PT00H58M42S + +quotableClips: +- name: 'Episode Introduction: MLOps & ML platform conversation with Simon' + startOffset: 74 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=74 + endOffset: 120 +- name: 'Career & Transition: Research to industry, early platform work and management' + startOffset: 120 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=120 + endOffset: 282 +- name: 'MLOps Definition: People, processes, and technology' + startOffset: 282 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=282 + endOffset: 415 +- name: 'Deployment Challenges: Early blockers that launched MLOps work' + startOffset: 415 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=415 + endOffset: 491 +- name: 'Core Platform Skills: Cloud infrastructure, Kubernetes, Terraform' + startOffset: 491 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=491 + endOffset: 647 +- name: 'User-Centric Platform Design: Understanding data science workflows and notebooks' + startOffset: 647 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=647 + endOffset: 805 +- name: 'Engineering Fundamentals: Software engineering for ML platforms' + startOffset: 805 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=805 + endOffset: 830 +- name: 'Team Composition: Specialist vs generalist skill balance' + startOffset: 830 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=830 + endOffset: 934 +- name: 'Team Size & On‑Call: Staffing and operational considerations' + startOffset: 934 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=934 + endOffset: 1012 +- name: 'Build vs Buy Decision: When to consider building an ML platform' + startOffset: 1012 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1012 + endOffset: 1034 +- name: 'Platform Triggers: Signs you need standardization across teams' + startOffset: 1034 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1034 + endOffset: 1204 +- name: 'Single-Team Value: SaaS components and incremental platform adoption' + startOffset: 1204 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1204 + endOffset: 1263 +- name: 'Data Science Workflow: Exploration to training and evaluation' + startOffset: 1263 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1263 + endOffset: 1700 +- name: 'Self‑Service Compute: Notebooks, BigQuery, Databricks provisioning' + startOffset: 1700 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1700 + endOffset: 1781 +- name: 'Experiment Tracking: Low‑hanging fruit for reproducibility and collaboration' + startOffset: 1781 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1781 + endOffset: 1832 +- name: 'Model Registry: Persisting models for downstream consumption' + startOffset: 1832 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1832 + endOffset: 1875 +- name: 'Deployment Patterns: Batch inference versus online serving' + startOffset: 1875 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1875 + endOffset: 1911 +- name: 'Orchestration Choices: Airflow, pipelines, and production workflows' + startOffset: 1911 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1911 + endOffset: 2041 +- name: 'Tool Integration: Stitching SaaS and open-source into a coherent platform' + startOffset: 2041 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2041 + endOffset: 2126 +- name: 'LLMs & Emerging Needs: Platform implications and vendor updates' + startOffset: 2126 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2126 + endOffset: 2320 +- name: 'Developer Experience: Thin abstraction layers over cloud providers' + startOffset: 2320 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2320 + endOffset: 2394 +- name: 'Regulatory Constraints: Fintech, security, and compliance impact' + startOffset: 2394 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2394 + endOffset: 2568 +- name: 'Metadata & Lineage: Reproducibility, artifact logging, and tracking' + startOffset: 2568 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2568 + endOffset: 2750 +- name: 'Data Governance: GDPR implications of logging and dataset storage' + startOffset: 2750 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2750 + endOffset: 2828 +- name: 'Business-First Strategy: Models before heavy platform investment' + startOffset: 2828 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2828 + endOffset: 2959 +- name: 'Parallelization Strategy: Building minimal platform pieces alongside use + cases' + startOffset: 2959 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2959 + endOffset: 3101 +- name: 'MLOps Skill Focus: When platform engineers should learn model internals' + startOffset: 3101 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3101 + endOffset: 3255 +- name: 'API Design & Logging: Unified prediction schemas for monitoring and analytics' + startOffset: 3255 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3255 + endOffset: 3452 +- name: 'Learning Resources: Books, practical projects, and MLOps training' + startOffset: 3452 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3452 + endOffset: 3579 +- name: Episode Wrap‑Up and Closing Remarks + startOffset: 3579 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3579 + endOffset: 3522 + transcript: - header: 'Episode Introduction: MLOps & ML platform conversation with Simon' - line: This week we'll talk about MLOps and building machine learning platforms. @@ -1146,148 +1281,6 @@ transcript: sec: 3596 time: '59:56' who: Alexey -description: Discover MLOps strategies to build an ML platform with experiment tracking, - improved reproducibility, faster releases and compliance-ready model operations. -intro: How do you design an ML platform that reliably deploys models, tracks experiments, - and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps - Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining - & Data Warehousing — walks through practical MLOps platform design grounded in real-world - deployment challenges.

We cover a clear definition of MLOps as people, - processes, and technology, and dig into core platform skills (cloud infrastructure, - Kubernetes, Terraform), user‑centric design for notebooks and data science workflows, - and software engineering fundamentals for production ML. Simon explains experiment - tracking, model registry practices, deployment patterns (batch vs online), orchestration - choices like Airflow, and stitching SaaS and open‑source tools into a coherent ML - platform. The episode also addresses compliance and data governance — GDPR, fintech - security constraints — plus metadata, lineage, API design, and monitoring. We close - with build vs buy trade‑offs, staffing and on‑call considerations, and how emerging - LLM needs affect platforms.

Listen to learn concrete guidance on model - deployment, reproducibility, orchestration, and compliance to help you design a - pragmatic, scalable ML platform. -dateadded: '2023-07-02' -duration: PT00H58M42S -quotableClips: -- name: 'Episode Introduction: MLOps & ML platform conversation with Simon' - startOffset: 74 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=74 - endOffset: 120 -- name: 'Career & Transition: Research to industry, early platform work and management' - startOffset: 120 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=120 - endOffset: 282 -- name: 'MLOps Definition: People, processes, and technology' - startOffset: 282 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=282 - endOffset: 415 -- name: 'Deployment Challenges: Early blockers that launched MLOps work' - startOffset: 415 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=415 - endOffset: 491 -- name: 'Core Platform Skills: Cloud infrastructure, Kubernetes, Terraform' - startOffset: 491 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=491 - endOffset: 647 -- name: 'User-Centric Platform Design: Understanding data science workflows and notebooks' - startOffset: 647 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=647 - endOffset: 805 -- name: 'Engineering Fundamentals: Software engineering for ML platforms' - startOffset: 805 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=805 - endOffset: 830 -- name: 'Team Composition: Specialist vs generalist skill balance' - startOffset: 830 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=830 - endOffset: 934 -- name: 'Team Size & On‑Call: Staffing and operational considerations' - startOffset: 934 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=934 - endOffset: 1012 -- name: 'Build vs Buy Decision: When to consider building an ML platform' - startOffset: 1012 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1012 - endOffset: 1034 -- name: 'Platform Triggers: Signs you need standardization across teams' - startOffset: 1034 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1034 - endOffset: 1204 -- name: 'Single-Team Value: SaaS components and incremental platform adoption' - startOffset: 1204 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1204 - endOffset: 1263 -- name: 'Data Science Workflow: Exploration to training and evaluation' - startOffset: 1263 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1263 - endOffset: 1700 -- name: 'Self‑Service Compute: Notebooks, BigQuery, Databricks provisioning' - startOffset: 1700 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1700 - endOffset: 1781 -- name: 'Experiment Tracking: Low‑hanging fruit for reproducibility and collaboration' - startOffset: 1781 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1781 - endOffset: 1832 -- name: 'Model Registry: Persisting models for downstream consumption' - startOffset: 1832 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1832 - endOffset: 1875 -- name: 'Deployment Patterns: Batch inference versus online serving' - startOffset: 1875 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1875 - endOffset: 1911 -- name: 'Orchestration Choices: Airflow, pipelines, and production workflows' - startOffset: 1911 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1911 - endOffset: 2041 -- name: 'Tool Integration: Stitching SaaS and open-source into a coherent platform' - startOffset: 2041 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2041 - endOffset: 2126 -- name: 'LLMs & Emerging Needs: Platform implications and vendor updates' - startOffset: 2126 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2126 - endOffset: 2320 -- name: 'Developer Experience: Thin abstraction layers over cloud providers' - startOffset: 2320 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2320 - endOffset: 2394 -- name: 'Regulatory Constraints: Fintech, security, and compliance impact' - startOffset: 2394 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2394 - endOffset: 2568 -- name: 'Metadata & Lineage: Reproducibility, artifact logging, and tracking' - startOffset: 2568 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2568 - endOffset: 2750 -- name: 'Data Governance: GDPR implications of logging and dataset storage' - startOffset: 2750 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2750 - endOffset: 2828 -- name: 'Business-First Strategy: Models before heavy platform investment' - startOffset: 2828 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2828 - endOffset: 2959 -- name: 'Parallelization Strategy: Building minimal platform pieces alongside use - cases' - startOffset: 2959 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2959 - endOffset: 3101 -- name: 'MLOps Skill Focus: When platform engineers should learn model internals' - startOffset: 3101 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3101 - endOffset: 3255 -- name: 'API Design & Logging: Unified prediction schemas for monitoring and analytics' - startOffset: 3255 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3255 - endOffset: 3452 -- name: 'Learning Resources: Books, practical projects, and MLOps training' - startOffset: 3452 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3452 - endOffset: 3579 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3579 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3579 - endOffset: 3522 --- Links: diff --git a/_podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.md b/_podcast/building-scalable-and-reliable-machine-learning-systems.md similarity index 97% rename from _podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.md rename to _podcast/building-scalable-and-reliable-machine-learning-systems.md index 986dd8ea..b967de30 100644 --- a/_podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.md +++ b/_podcast/building-scalable-and-reliable-machine-learning-systems.md @@ -1,20 +1,127 @@ --- +title: 'Build Scalable, Reliable ML Systems (MLOps): Design Docs, Data Strategy & Edge Constraints' +short: Building Scalable and Reliable Machine Learning Systems +season: 14 episode: 1 guests: - arsenykravchenko +image: images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg ids: anchor: atatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q youtube: i-pIdekjUow -image: images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q apple: https://podcasts.apple.com/us/podcast/building-scalable-and-reliable-machine-learning/id1541710331?i=1000612813133 spotify: https://open.spotify.com/episode/6iDyJuhfXibDB6kXFhvaqG?si=urjDGVl6RrWtjVXIAUgOvQ youtube: https://www.youtube.com/watch?v=i-pIdekjUow -season: 14 -short: Building Scalable and Reliable Machine Learning Systems -title: 'Build Scalable, Reliable ML Systems (MLOps): Design Docs, Data Strategy & - Edge Constraints' + +description: Learn MLOps design doc and data strategy to build scalable, reliable machine learning systems; navigate edge constraints, metrics, pipelines, and testing +intro: 'How do you design machine learning systems that scale, stay reliable in production, and meet tight edge and mobile constraints? In this episode, Arseny Kravchenko — a seasoned ML engineer focused on computer vision, active in ML since 2015 and a former Kaggle Master — walks through practical MLOps patterns for turning models into production systems.

We cover where startups trade off productionization and who owns those decisions; how to define ML system goals, non-goals, and assumptions; and why a lightweight design phase with a problem-first design doc (50/50 problem vs solution) pays off. Arseny breaks down edge and mobile ML constraints (latency, FPS, energy, Core ML), managing known and unknown risks with early tests, and building a solution blueprint: baselines, metrics, pipeline components, and data strategy (availability, processing, features, data lakes). He also explains system diagramming for data flow and real-time vs batch, dataset heuristics, and shares design doc examples (photostock search and retail pricing). Listeners will get concrete guidance on MLOps, design docs, data strategy, and edge ML trade-offs — plus pointers to deeper learning resources and a book offer discussed at the end.' +topics: +- machine learning +- MLOps +- data strategy +- data engineering +- system design +- MLOps +dateadded: 2023-05-13 + +duration: PT00H59M25S + +quotableClips: +- name: 'Episode Overview: Building Scalable & Reliable Machine Learning Systems' + startOffset: 0 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=0 + endOffset: 154 +- name: Guest Bio & Startup Experience (deep learning, MLOps, Ntropy, AR, Lyft) + startOffset: 154 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=154 + endOffset: 371 +- name: 'Startups: ML Productionization Trade-offs and Decision Ownership' + startOffset: 371 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=371 + endOffset: 474 +- name: 'Defining Machine Learning System Design: Goals and Constraints' + startOffset: 474 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=474 + endOffset: 634 +- name: 'Edge & Mobile ML Constraints: Latency, FPS, Energy, Core ML' + startOffset: 634 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=634 + endOffset: 889 +- name: 'Managing Unknowns: Known Unknowns, Unknown Unknowns, Early Tests' + startOffset: 889 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=889 + endOffset: 1129 +- name: 'Planning Value: Why a Lightweight Design Phase Matters' + startOffset: 1129 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1129 + endOffset: 1221 +- name: 'Design Document Approach: Problem-First, 50/50 Problem vs Solution' + startOffset: 1221 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1221 + endOffset: 1368 +- name: 'Problem Framing: Product Scenarios, Realism vs Appeal Trade-offs' + startOffset: 1368 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1368 + endOffset: 1741 +- name: 'Goals, Non-Goals & Assumptions: Turning Requirements into Metrics' + startOffset: 1741 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1741 + endOffset: 1902 +- name: 'Solution Blueprint: Baseline, Metrics, Pipeline Components' + startOffset: 1902 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1902 + endOffset: 1957 +- name: 'Data Strategy: Availability, Processing, Feature Needs, Data Lakes' + startOffset: 1957 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1957 + endOffset: 2235 +- name: 'System Diagramming: Data Flow, Dependencies, Real-time vs Batch' + startOffset: 2235 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2235 + endOffset: 2382 +- name: 'Motivation for the Book: Generalizing Experience into Patterns' + startOffset: 2382 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2382 + endOffset: 2505 +- name: 'Heuristics for Datasets: Intuition, Limits, and Practical Guidance' + startOffset: 2505 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2505 + endOffset: 2710 +- name: 'Design Doc Examples: Photostock Search & Super Mega Retail Pricing' + startOffset: 2710 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2710 + endOffset: 2829 +- name: 'Reader Types: Theory-Focused vs Template-Focused Audiences' + startOffset: 2829 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2829 + endOffset: 2907 +- name: 'Co-author Dynamics: Balancing Corporate & Hands-on Perspectives' + startOffset: 2907 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2907 + endOffset: 3099 +- name: 'Book Development: Scope Decisions, Publisher Constraints, Reviewer Feedback' + startOffset: 3099 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3099 + endOffset: 3348 +- name: 'Favorite Chapter: Preliminary Research, Reuse, and External Sources' + startOffset: 3348 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3348 + endOffset: 3508 +- name: 'Further Learning: System Design Fundamentals & Software Engineering Skills' + startOffset: 3508 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3508 + endOffset: 3600 +- name: 'Book Offer & Giveaway: Discount Code, Twitter Giveaway Winners' + startOffset: 3600 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3600 + endOffset: 3637 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3637 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3637 + endOffset: 3565 + transcript: - header: 'Episode Overview: Building Scalable & Reliable Machine Learning Systems' - line: This week, we'll talk about building scalable and reliable machine learning @@ -944,118 +1051,6 @@ transcript: sec: 3658 time: '1:00:58' who: Alexey -description: Learn MLOps design doc and data strategy to build scalable, reliable - machine learning systems; navigate edge constraints, metrics, pipelines, and testing. -intro: 'How do you design machine learning systems that scale, stay reliable in production, - and meet tight edge and mobile constraints? In this episode, Arseny Kravchenko — - a seasoned ML engineer focused on computer vision, active in ML since 2015 and a - former Kaggle Master — walks through practical MLOps patterns for turning models - into production systems.

We cover where startups trade off productionization - and who owns those decisions; how to define ML system goals, non-goals, and assumptions; - and why a lightweight design phase with a problem-first design doc (50/50 problem - vs solution) pays off. Arseny breaks down edge and mobile ML constraints (latency, - FPS, energy, Core ML), managing known and unknown risks with early tests, and building - a solution blueprint: baselines, metrics, pipeline components, and data strategy - (availability, processing, features, data lakes). He also explains system diagramming - for data flow and real-time vs batch, dataset heuristics, and shares design doc - examples (photostock search and retail pricing). Listeners will get concrete guidance - on MLOps, design docs, data strategy, and edge ML trade-offs — plus pointers to - deeper learning resources and a book offer discussed at the end.' -dateadded: '2023-05-13' -duration: PT00H59M25S -quotableClips: -- name: 'Episode Overview: Building Scalable & Reliable Machine Learning Systems' - startOffset: 0 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=0 - endOffset: 154 -- name: Guest Bio & Startup Experience (deep learning, MLOps, Ntropy, AR, Lyft) - startOffset: 154 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=154 - endOffset: 371 -- name: 'Startups: ML Productionization Trade-offs and Decision Ownership' - startOffset: 371 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=371 - endOffset: 474 -- name: 'Defining Machine Learning System Design: Goals and Constraints' - startOffset: 474 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=474 - endOffset: 634 -- name: 'Edge & Mobile ML Constraints: Latency, FPS, Energy, Core ML' - startOffset: 634 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=634 - endOffset: 889 -- name: 'Managing Unknowns: Known Unknowns, Unknown Unknowns, Early Tests' - startOffset: 889 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=889 - endOffset: 1129 -- name: 'Planning Value: Why a Lightweight Design Phase Matters' - startOffset: 1129 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1129 - endOffset: 1221 -- name: 'Design Document Approach: Problem-First, 50/50 Problem vs Solution' - startOffset: 1221 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1221 - endOffset: 1368 -- name: 'Problem Framing: Product Scenarios, Realism vs Appeal Trade-offs' - startOffset: 1368 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1368 - endOffset: 1741 -- name: 'Goals, Non-Goals & Assumptions: Turning Requirements into Metrics' - startOffset: 1741 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1741 - endOffset: 1902 -- name: 'Solution Blueprint: Baseline, Metrics, Pipeline Components' - startOffset: 1902 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1902 - endOffset: 1957 -- name: 'Data Strategy: Availability, Processing, Feature Needs, Data Lakes' - startOffset: 1957 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1957 - endOffset: 2235 -- name: 'System Diagramming: Data Flow, Dependencies, Real-time vs Batch' - startOffset: 2235 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2235 - endOffset: 2382 -- name: 'Motivation for the Book: Generalizing Experience into Patterns' - startOffset: 2382 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2382 - endOffset: 2505 -- name: 'Heuristics for Datasets: Intuition, Limits, and Practical Guidance' - startOffset: 2505 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2505 - endOffset: 2710 -- name: 'Design Doc Examples: Photostock Search & Super Mega Retail Pricing' - startOffset: 2710 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2710 - endOffset: 2829 -- name: 'Reader Types: Theory-Focused vs Template-Focused Audiences' - startOffset: 2829 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2829 - endOffset: 2907 -- name: 'Co-author Dynamics: Balancing Corporate & Hands-on Perspectives' - startOffset: 2907 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2907 - endOffset: 3099 -- name: 'Book Development: Scope Decisions, Publisher Constraints, Reviewer Feedback' - startOffset: 3099 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3099 - endOffset: 3348 -- name: 'Favorite Chapter: Preliminary Research, Reuse, and External Sources' - startOffset: 3348 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3348 - endOffset: 3508 -- name: 'Further Learning: System Design Fundamentals & Software Engineering Skills' - startOffset: 3508 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3508 - endOffset: 3600 -- name: 'Book Offer & Giveaway: Discount Code, Twitter Giveaway Winners' - startOffset: 3600 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3600 - endOffset: 3637 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3637 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3637 - endOffset: 3565 --- Links: diff --git a/_podcast/s15e06-democratizing-causality.md b/_podcast/causal-inference-for-machine-learning.md similarity index 97% rename from _podcast/s15e06-democratizing-causality.md rename to _podcast/causal-inference-for-machine-learning.md index 43bbc2e1..e62fe5b1 100644 --- a/_podcast/s15e06-democratizing-causality.md +++ b/_podcast/causal-inference-for-machine-learning.md @@ -1,19 +1,147 @@ --- +title: 'Causal Inference for Real-World ML: Uplift Modeling, Counterfactuals, Treatment Effects & LLM Integration' +short: Democratizing Causality +season: 15 episode: 6 guests: - aleksandermolak +image: images/podcast/s15e06-democratizing-causality.jpg ids: anchor: atatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh youtube: 0I2FHH95Ofs -image: images/podcast/s15e06-democratizing-causality.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh apple: https://podcasts.apple.com/us/podcast/democratizing-causality-aleksander-molak/id1541710331?i=1000625694605 spotify: https://open.spotify.com/episode/17U3RWz5BupRIwoBvGWqYQ?si=g6XypIZnSwG4hznNIOs7mw youtube: https://www.youtube.com/watch?v=0I2FHH95Ofs -season: 15 -short: Democratizing Causality -title: 'Practical Causal ML: Counterfactuals, Uplift (CATE), A/B Testing & LLMs' +description: "Master causal inference for production ML: uplift modeling, treatment effects, counterfactuals, causal discovery, and LLM integration strategies." +intro: "How do you move from correlation to actionable decisions — using counterfactuals, uplift modeling, treatment effect estimation, and LLMs — without falling into confounding traps or biased estimators? In this episode, Aleksander Molak, an independent ML researcher, author and educator specializing in causality, NLP and AI strategy, walks through practical causal inference techniques for real-world machine learning applications.

We explore foundational concepts like counterfactuals and Judea Pearl's causal hierarchy, then dive into meta-learners (T-learner, S-learner), Conditional Average Treatment Effect (CATE) estimation, uplift modeling, and when A/B testing or causal feature selection are essential for achieving unconfoundedness. Aleksander covers deployment challenges, debiasing methods (double/debiased ML), refutation testing for model validation, causal discovery algorithms, and cost-benefit analysis that uncovered wasted marketing spend. The conversation also examines how LLMs integrate into causal workflows: feature extraction from text, using text as outcomes or treatments, inferring unobserved confounders, and practical demonstrations with CausalBERT. You'll gain actionable frameworks for building, evaluating, and deploying causal ML systems in production, plus resources and code examples to implement these methods in your own projects." +topics: +- causal inference +- LLMs +- machine learning +dateadded: 2023-09-10 + +duration: PT01H06M38S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=0 + endOffset: 82 +- name: 'Guest Intro: Aleksander Molak & book overview' + startOffset: 82 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=82 + endOffset: 126 +- name: Career highlights and dyslexia prediction project + startOffset: 126 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=126 + endOffset: 375 +- name: 'Causal advocacy: democratizing causal thinking' + startOffset: 375 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=375 + endOffset: 451 +- name: 'Association vs causation: limits of correlational reasoning' + startOffset: 451 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=451 + endOffset: 535 +- name: 'Illustrative confounders: race example and ice cream–drowning' + startOffset: 535 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=535 + endOffset: 761 +- name: 'Predictive ML vs decision-making: Zillow and IID assumptions' + startOffset: 761 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=761 + endOffset: 936 +- name: 'Counterfactuals in practice: marketing and recommender systems' + startOffset: 936 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=936 + endOffset: 1095 +- name: Counterfactuals defined and Judea Pearl’s intervention view + startOffset: 1095 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1095 + endOffset: 1282 +- name: 'Meta-learners overview: T‑learner and counterfactual estimation' + startOffset: 1282 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1282 + endOffset: 1464 +- name: Conditional Average Treatment Effect (CATE) estimation + startOffset: 1464 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1464 + endOffset: 1576 +- name: 'Achieving unconfoundedness: A/B tests vs causal feature selection' + startOffset: 1576 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1576 + endOffset: 1672 +- name: Targeting decisions from uplift estimates + startOffset: 1672 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1672 + endOffset: 1757 +- name: Deployment risks and debiasing estimators (double/triple ML) + startOffset: 1757 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1757 + endOffset: 1960 +- name: 'Uplift modeling: policy evaluation and business metrics' + startOffset: 1960 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1960 + endOffset: 1994 +- name: 'Evaluating causal models: refutation tests and estimator quality' + startOffset: 1994 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1994 + endOffset: 2257 +- name: Causal discovery and heterogeneous treatment effects (book coverage) + startOffset: 2257 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2257 + endOffset: 2334 +- name: 'Cost–benefit of causal models: complexity vs value' + startOffset: 2334 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2334 + endOffset: 2474 +- name: 'Real-world impact: discovering wasted marketing spend' + startOffset: 2474 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2474 + endOffset: 2605 +- name: 'Incremental rollout: A/B testing as validation baseline' + startOffset: 2605 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2605 + endOffset: 2666 +- name: 'LLMs in causal workflows: feature extraction and scoring' + startOffset: 2666 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2666 + endOffset: 2814 +- name: 'Text as outcome: using LLMs to score experimental text' + startOffset: 2814 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2814 + endOffset: 2957 +- name: 'Text as treatment/confounder: style extraction and embeddings' + startOffset: 2957 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2957 + endOffset: 3278 +- name: Inferring unobserved variables (e.g., gender/style) with LLMs + startOffset: 3278 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3278 + endOffset: 3494 +- name: CausalBert demo and code note (PyData Berlin talk) + startOffset: 3494 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3494 + endOffset: 3573 +- name: 'Causal ML without experiments: partial identification & sensitivity' + startOffset: 3573 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3573 + endOffset: 3843 +- name: 'Causal graphs and nonparametric identification: minimal observables' + startOffset: 3843 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3843 + endOffset: 3967 +- name: 'Recommended resources: The Book of Why, Molak’s book & GitHub' + startOffset: 3967 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3967 + endOffset: 4048 +- name: Closing remarks and next steps + startOffset: 4048 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=4048 + endOffset: 3998 + transcript: - header: Episode Introduction - header: 'Guest Intro: Aleksander Molak & book overview' @@ -1172,143 +1300,6 @@ transcript: sec: 4080 time: '1:08:00' who: Alexey -description: 'Discover Causal ML counterfactuals and uplift (CATE): actionable debiasing, - targeting strategies, policy evaluation and deployment tips to boost ROI.' -intro: 'How do you move from correlation to actionable decisions — using counterfactuals, - uplift (CATE), A/B testing and LLMs — without getting misled by confounders or biased - estimators? In this episode, Aleksander Molak, an independent ML researcher, author - and educator specializing in causality, NLP and AI strategy (and author of a dyslexia - prediction project), walks through practical causal ML techniques and real-world - tradeoffs.

We cover foundational ideas — counterfactuals and Judea Pearl’s - intervention view — then meta-learners (T‑learner), Conditional Average Treatment - Effect (CATE) estimation, uplift modeling and when A/B tests or causal feature selection - are needed to achieve unconfoundedness. Aleksander discusses deployment risks and - debiasing approaches (double/triple ML), refutation tests for estimator quality, - causal discovery and cost–benefit tradeoffs that revealed wasted marketing spend. - He also shows how LLMs fit into causal workflows: feature extraction, scoring text - as outcome, text as treatment or confounder, inferring unobserved variables and - a CausalBert demo. Listeners will come away with practical guidance on building, - evaluating and validating causal ML systems, plus recommended resources and code - to start applying these methods.' -dateadded: '2023-09-10' -duration: PT01H06M38S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=0 - endOffset: 82 -- name: 'Guest Intro: Aleksander Molak & book overview' - startOffset: 82 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=82 - endOffset: 126 -- name: Career highlights and dyslexia prediction project - startOffset: 126 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=126 - endOffset: 375 -- name: 'Causal advocacy: democratizing causal thinking' - startOffset: 375 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=375 - endOffset: 451 -- name: 'Association vs causation: limits of correlational reasoning' - startOffset: 451 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=451 - endOffset: 535 -- name: 'Illustrative confounders: race example and ice cream–drowning' - startOffset: 535 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=535 - endOffset: 761 -- name: 'Predictive ML vs decision-making: Zillow and IID assumptions' - startOffset: 761 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=761 - endOffset: 936 -- name: 'Counterfactuals in practice: marketing and recommender systems' - startOffset: 936 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=936 - endOffset: 1095 -- name: Counterfactuals defined and Judea Pearl’s intervention view - startOffset: 1095 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1095 - endOffset: 1282 -- name: 'Meta-learners overview: T‑learner and counterfactual estimation' - startOffset: 1282 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1282 - endOffset: 1464 -- name: Conditional Average Treatment Effect (CATE) estimation - startOffset: 1464 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1464 - endOffset: 1576 -- name: 'Achieving unconfoundedness: A/B tests vs causal feature selection' - startOffset: 1576 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1576 - endOffset: 1672 -- name: Targeting decisions from uplift estimates - startOffset: 1672 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1672 - endOffset: 1757 -- name: Deployment risks and debiasing estimators (double/triple ML) - startOffset: 1757 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1757 - endOffset: 1960 -- name: 'Uplift modeling: policy evaluation and business metrics' - startOffset: 1960 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1960 - endOffset: 1994 -- name: 'Evaluating causal models: refutation tests and estimator quality' - startOffset: 1994 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1994 - endOffset: 2257 -- name: Causal discovery and heterogeneous treatment effects (book coverage) - startOffset: 2257 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2257 - endOffset: 2334 -- name: 'Cost–benefit of causal models: complexity vs value' - startOffset: 2334 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2334 - endOffset: 2474 -- name: 'Real-world impact: discovering wasted marketing spend' - startOffset: 2474 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2474 - endOffset: 2605 -- name: 'Incremental rollout: A/B testing as validation baseline' - startOffset: 2605 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2605 - endOffset: 2666 -- name: 'LLMs in causal workflows: feature extraction and scoring' - startOffset: 2666 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2666 - endOffset: 2814 -- name: 'Text as outcome: using LLMs to score experimental text' - startOffset: 2814 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2814 - endOffset: 2957 -- name: 'Text as treatment/confounder: style extraction and embeddings' - startOffset: 2957 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2957 - endOffset: 3278 -- name: Inferring unobserved variables (e.g., gender/style) with LLMs - startOffset: 3278 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3278 - endOffset: 3494 -- name: CausalBert demo and code note (PyData Berlin talk) - startOffset: 3494 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3494 - endOffset: 3573 -- name: 'Causal ML without experiments: partial identification & sensitivity' - startOffset: 3573 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3573 - endOffset: 3843 -- name: 'Causal graphs and nonparametric identification: minimal observables' - startOffset: 3843 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3843 - endOffset: 3967 -- name: 'Recommended resources: The Book of Why, Molak’s book & GitHub' - startOffset: 3967 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3967 - endOffset: 4048 -- name: Closing remarks and next steps - startOffset: 4048 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=4048 - endOffset: 3998 --- Links: diff --git a/_podcast/s04e09-chief-data-officer.md b/_podcast/chief-data-officer-data-strategy-and-org-design.md similarity index 97% rename from _podcast/s04e09-chief-data-officer.md rename to _podcast/chief-data-officer-data-strategy-and-org-design.md index c6602984..cff5d375 100644 --- a/_podcast/s04e09-chief-data-officer.md +++ b/_podcast/chief-data-officer-data-strategy-and-org-design.md @@ -1,11 +1,11 @@ --- title: 'Mastering the Chief Data Officer Role: Build Data Strategy, Org Design & AI' short: Chief Data Officer +season: 4 +episode: 9 guests: - marcodesa image: images/podcast/s04e09-chief-data-officer.jpg -season: 4 -episode: 9 ids: youtube: IdaZOD46FEw anchor: Chief-Data-Officer---Marco-De-Sa-e16hm4t @@ -14,6 +14,131 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Chief-Data-Officer---Marco-De-Sa-e16hm4t spotify: https://open.spotify.com/episode/64lEB0Wv0a6DfkDi672Ulk apple: https://podcasts.apple.com/us/podcast/chief-data-officer-marco-de-sa/id1541710331?i=1000533326308 + +description: Learn how Chief Data Officers build data strategy, org design and roadmaps—get tactics on governance, KPIs, delegation, career growth and remote leadership +intro: 'How do you move from head of data to an effective Chief Data Officer who builds strategy, designs the org, and delivers AI? In this episode, Marco De Sa — CDO at OLX Group with prior data leadership roles at Yahoo, Facebook, Twitter, and Spotify — lays out what modern data leadership really requires.

We explore the evolving CDO scope: data strategy, governance, and AI; balancing vision versus tactics; and future-proofing data collection for tomorrow’s products. Marco breaks down organizational design and delegation — when to hire multiple VPs, how CDO responsibilities differ from VP, CTO and CPO roles, and how to structure reporting lines. Practical topics include working backwards from goals to data platform and machine learning investment, measuring progress with meaningful KPIs, time management and productivity for senior data leaders, and managing distributed teams.

Listeners will walk away with concrete frameworks for data strategy, org design, and building a data-driven culture, plus career guidance for aspiring CDOs on technical breadth, soft skills, interviewing, and overcoming resistance with evidence-based persuasion. Ideal for data leaders and executives shaping data strategy, governance, and AI roadmaps.' +topics: +- data strategy +- data governance +- AI +- leadership +- career growth +- communication +- team building +dateadded: 2021-08-29 + +duration: PT01H01M51S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=0 + endOffset: 78 +- name: 'Guest Overview: Marco''s Career & Roles' + startOffset: 78 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=78 + endOffset: 203 +- name: 'Industry Experience: Yahoo, Facebook, Twitter, Spotify' + startOffset: 203 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=203 + endOffset: 341 +- name: Transition to OLX Group and CDO Appointment + startOffset: 341 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=341 + endOffset: 368 +- name: 'Chief Data Officer Scope: Data Strategy, Governance, AI' + startOffset: 368 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=368 + endOffset: 437 +- name: Traditional Responsibilities vs Modern CDO Expectations + startOffset: 437 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=437 + endOffset: 619 +- name: 'Future-Proofing Data: Collecting for Tomorrow''s Products' + startOffset: 619 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=619 + endOffset: 700 +- name: Delegation and Organisational Design for Data Leadership + startOffset: 700 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=700 + endOffset: 864 +- name: 'Career Progression: From Head of Data to CDO' + startOffset: 864 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=864 + endOffset: 1057 +- name: 'Strategy vs Tactics: Vision, KPIs, and Execution' + startOffset: 1057 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1057 + endOffset: 1217 +- name: 'CDO vs VP of Data: Scope, Influence, and Proactivity' + startOffset: 1217 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1217 + endOffset: 1495 +- name: 'Structuring a Data Org: Multiple VPs and Reporting Lines' + startOffset: 1495 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1495 + endOffset: 1571 +- name: 'Splitting Work: CDO Responsibilities vs VP Execution' + startOffset: 1571 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1571 + endOffset: 1682 +- name: Differentiating CTO, CPO, and CDO Roles + startOffset: 1682 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1682 + endOffset: 1910 +- name: 'Working Backwards: Goals to Data Platform & ML Investment' + startOffset: 1910 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1910 + endOffset: 2083 +- name: 'Measuring Progress: Metrics, Accountability, and Visibility' + startOffset: 2083 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2083 + endOffset: 2205 +- name: 'Meeting Load: Time Management for Senior Data Leaders' + startOffset: 2205 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2205 + endOffset: 2396 +- name: 'Productivity Practices: Documentation, Async, and Slack' + startOffset: 2396 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2396 + endOffset: 2522 +- name: 'Building a Data-Driven Culture: Democratization & Usability' + startOffset: 2522 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2522 + endOffset: 2652 +- name: 'Remote Leadership: Challenges of Managing Distributed Teams' + startOffset: 2652 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2652 + endOffset: 2884 +- name: 'Technical Skills for CDOs: Breadth vs Depth (ML, SQL, Engineering)' + startOffset: 2884 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2884 + endOffset: 3020 +- name: 'Business Education: MBA Value for Executive Data Roles' + startOffset: 3020 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3020 + endOffset: 3138 +- name: 'Essential Soft Skills: Communication, Empathy, Influence' + startOffset: 3138 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3138 + endOffset: 3256 +- name: 'OLX Group Challenges: Geographic, Product, and Tech Complexity' + startOffset: 3256 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3256 + endOffset: 3396 +- name: 'Interviewing for CDO: Demonstrating Strategic Thinking' + startOffset: 3396 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3396 + endOffset: 3580 +- name: 'Overcoming Resistance: Persuasion, Evidence, and Constraints' + startOffset: 3580 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3580 + endOffset: 3744 +- name: Closing Remarks and Key Takeaways + startOffset: 3744 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3744 + endOffset: 3711 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Marco''s Career & Roles' @@ -996,132 +1121,4 @@ transcript: sec: 3789 time: '1:03:09' who: Marco -description: Learn how Chief Data Officers build data strategy, org design and roadmaps—get - tactics on governance, KPIs, delegation, career growth and remote leadership. -intro: 'How do you move from head of data to an effective Chief Data Officer who builds - strategy, designs the org, and delivers AI? In this episode, Marco De Sa — CDO at - OLX Group with prior data leadership roles at Yahoo, Facebook, Twitter, and Spotify - — lays out what modern data leadership really requires.

We explore the - evolving CDO scope: data strategy, governance, and AI; balancing vision versus tactics; - and future-proofing data collection for tomorrow’s products. Marco breaks down organizational - design and delegation — when to hire multiple VPs, how CDO responsibilities differ - from VP, CTO and CPO roles, and how to structure reporting lines. Practical topics - include working backwards from goals to data platform and machine learning investment, - measuring progress with meaningful KPIs, time management and productivity for senior - data leaders, and managing distributed teams.

Listeners will walk away - with concrete frameworks for data strategy, org design, and building a data-driven - culture, plus career guidance for aspiring CDOs on technical breadth, soft skills, - interviewing, and overcoming resistance with evidence-based persuasion. Ideal for - data leaders and executives shaping data strategy, governance, and AI roadmaps.' -dateadded: '2021-08-29' -duration: PT01H01M51S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=0 - endOffset: 78 -- name: 'Guest Overview: Marco''s Career & Roles' - startOffset: 78 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=78 - endOffset: 203 -- name: 'Industry Experience: Yahoo, Facebook, Twitter, Spotify' - startOffset: 203 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=203 - endOffset: 341 -- name: Transition to OLX Group and CDO Appointment - startOffset: 341 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=341 - endOffset: 368 -- name: 'Chief Data Officer Scope: Data Strategy, Governance, AI' - startOffset: 368 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=368 - endOffset: 437 -- name: Traditional Responsibilities vs Modern CDO Expectations - startOffset: 437 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=437 - endOffset: 619 -- name: 'Future-Proofing Data: Collecting for Tomorrow''s Products' - startOffset: 619 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=619 - endOffset: 700 -- name: Delegation and Organisational Design for Data Leadership - startOffset: 700 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=700 - endOffset: 864 -- name: 'Career Progression: From Head of Data to CDO' - startOffset: 864 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=864 - endOffset: 1057 -- name: 'Strategy vs Tactics: Vision, KPIs, and Execution' - startOffset: 1057 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1057 - endOffset: 1217 -- name: 'CDO vs VP of Data: Scope, Influence, and Proactivity' - startOffset: 1217 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1217 - endOffset: 1495 -- name: 'Structuring a Data Org: Multiple VPs and Reporting Lines' - startOffset: 1495 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1495 - endOffset: 1571 -- name: 'Splitting Work: CDO Responsibilities vs VP Execution' - startOffset: 1571 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1571 - endOffset: 1682 -- name: Differentiating CTO, CPO, and CDO Roles - startOffset: 1682 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1682 - endOffset: 1910 -- name: 'Working Backwards: Goals to Data Platform & ML Investment' - startOffset: 1910 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1910 - endOffset: 2083 -- name: 'Measuring Progress: Metrics, Accountability, and Visibility' - startOffset: 2083 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2083 - endOffset: 2205 -- name: 'Meeting Load: Time Management for Senior Data Leaders' - startOffset: 2205 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2205 - endOffset: 2396 -- name: 'Productivity Practices: Documentation, Async, and Slack' - startOffset: 2396 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2396 - endOffset: 2522 -- name: 'Building a Data-Driven Culture: Democratization & Usability' - startOffset: 2522 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2522 - endOffset: 2652 -- name: 'Remote Leadership: Challenges of Managing Distributed Teams' - startOffset: 2652 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2652 - endOffset: 2884 -- name: 'Technical Skills for CDOs: Breadth vs Depth (ML, SQL, Engineering)' - startOffset: 2884 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2884 - endOffset: 3020 -- name: 'Business Education: MBA Value for Executive Data Roles' - startOffset: 3020 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3020 - endOffset: 3138 -- name: 'Essential Soft Skills: Communication, Empathy, Influence' - startOffset: 3138 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3138 - endOffset: 3256 -- name: 'OLX Group Challenges: Geographic, Product, and Tech Complexity' - startOffset: 3256 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3256 - endOffset: 3396 -- name: 'Interviewing for CDO: Demonstrating Strategic Thinking' - startOffset: 3396 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3396 - endOffset: 3580 -- name: 'Overcoming Resistance: Persuasion, Evidence, and Constraints' - startOffset: 3580 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3580 - endOffset: 3744 -- name: Closing Remarks and Key Takeaways - startOffset: 3744 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3744 - endOffset: 3711 --- diff --git a/_podcast/s03e10-data-governance.md b/_podcast/cloud-data-governance.md similarity index 97% rename from _podcast/s03e10-data-governance.md rename to _podcast/cloud-data-governance.md index 3f89b6c5..03433dab 100644 --- a/_podcast/s03e10-data-governance.md +++ b/_podcast/cloud-data-governance.md @@ -1,13 +1,12 @@ --- -title: 'How to Build Data Governance in the Cloud: Classification, Catalogs, Policies - & ROI' +title: 'How to Build Data Governance in the Cloud: Classification, Catalogs, Policies & ROI' short: Data Governance +season: 3 +episode: 10 guests: - jessiashdown - urigilad image: images/podcast/s03e10-data-governance.jpg -season: 3 -episode: 10 ids: youtube: tJ3v8h7A7RY anchor: Data-Governance---Jessi-Ashdown--Uri-Gilad-e12jmoo @@ -16,6 +15,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Governance---Jessi-Ashdown--Uri-Gilad-e12jmoo spotify: https://open.spotify.com/episode/2zaLMrgbIgVkVEWY09b1Wn apple: https://podcasts.apple.com/us/podcast/data-governance-jessi-ashdown-uri-gilad/id1541710331?i=1000525176805 + +description: 'Learn data governance in the cloud: build data classification, catalogs & policies, automate tagging, add stewards and measure ROI for trusted, compliant data' +intro: 'How do you build data governance in the cloud that enables access, meets regulation, and demonstrates ROI? In this episode, Jessi Ashdown, Senior UX Researcher for Google Cloud, and Uri Gilad, Product Manager for Data Governance at Google Cloud, walk through practical approaches to data governance in the cloud—grounded in real user research and product experience.

They define governance beyond security and PII, explain how GDPR and high-profile events like Cambridge Analytica accelerated adoption, and outline the core components: people, processes, and tools. Key topics include data classification and taxonomy, building scalable data catalogs versus spreadsheets, policy design (retention, freshness, purpose-based access), enforcement models, and access workflows. They cover roles such as data stewards and producers, data quality signals, automation (tagging and requests), and what to measure for ROI—catalog metrics, cost versus usage, and compliance value. You’ll also hear an MVP strategy for minimum viable governance and what to include in a catalog (technical metadata, lineage, business glossary).

Listen to gain actionable steps to scope a cloud data governance program, prioritize by the “why,” and implement classification, catalogs, and policies that balance control and democratized access.' +topics: +- data governance +- data compliance +- cloud +dateadded: 2021-06-13 + +duration: PT00H58M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=0 + endOffset: 209 +- name: 'Guest Background — Jessi: UX Researcher & Data Governance at Google Cloud' + startOffset: 209 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=209 + endOffset: 286 +- name: 'Guest Background — Uri: Product Management & Data Governance Experience' + startOffset: 286 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=286 + endOffset: 400 +- name: 'Defining Data Governance: Beyond Security and PII' + startOffset: 400 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=400 + endOffset: 537 +- name: Cloud & Regulation Driving Governance Adoption (GDPR, Cambridge Analytica) + startOffset: 537 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=537 + endOffset: 844 +- name: 'Core Components: People, Processes, Tools and Cataloging' + startOffset: 844 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=844 + endOffset: 933 +- name: 'Practical Implementation: Classify Data and Establish Policies' + startOffset: 933 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=933 + endOffset: 1180 +- name: 'Assessing Necessity: When Governance Can Be Minimal' + startOffset: 1180 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1180 + endOffset: 1380 +- name: 'Prioritization: Start with the "Why" to Scope Your Program' + startOffset: 1380 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1380 + endOffset: 1454 +- name: 'Data Classification & Taxonomy: Defining Meaningful Data Classes' + startOffset: 1454 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1454 + endOffset: 1668 +- name: 'Tools vs Spreadsheets: Scalable Data Catalog Approaches' + startOffset: 1668 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1668 + endOffset: 1820 +- name: 'Aligning Storage and Systems: Make Data Work for You' + startOffset: 1820 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1820 + endOffset: 1983 +- name: 'Human Roles: Data Stewards, Producers and Decision Makers' + startOffset: 1983 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1983 + endOffset: 2099 +- name: 'Data Quality: Trust Signals, Source, and Measurable Checks' + startOffset: 2099 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2099 + endOffset: 2305 +- name: 'Policy Design: Retention, Freshness and Purpose-based Access' + startOffset: 2305 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2305 + endOffset: 2524 +- name: 'Policies as Enablement: Guardrails for Democratized Data Access' + startOffset: 2524 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2524 + endOffset: 2704 +- name: 'Enforcement Models: Catalog Interfaces vs Storage Control Plane' + startOffset: 2704 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2704 + endOffset: 2822 +- name: 'Access Workflows: Request/Approval "Shopping Cart" Experience' + startOffset: 2822 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2822 + endOffset: 2855 +- name: 'Governance Tools & Platforms: Dataplex, Collibra and Integrations' + startOffset: 2855 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2855 + endOffset: 2930 +- name: 'Automation: Tagging, Requests and Reducing Manual Effort' + startOffset: 2930 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2930 + endOffset: 3019 +- name: 'Measuring ROI: Catalog Metrics, Cost vs Usage and Compliance Value' + startOffset: 3019 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3019 + endOffset: 3201 +- name: 'MVP Strategy: Minimum Viable Governance and Future-proofing' + startOffset: 3201 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3201 + endOffset: 3277 +- name: 'Data Catalog Contents: Technical Metadata, Lineage and Business Glossary' + startOffset: 3277 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3277 + endOffset: 3466 +- name: 'Governance Scope: Why It Extends Beyond the Catalog' + startOffset: 3466 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3466 + endOffset: 3544 +- name: Closing Remarks, Contact Links and Next Steps + startOffset: 3544 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3544 + endOffset: 3560 +- name: 'Recommended Resource: Data Governance (O''Reilly Book)' + startOffset: 3560 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3560 + endOffset: 3489 + transcript: - header: Podcast Introduction - line: This week we will talk about data governance. We have two special guests. @@ -859,132 +975,6 @@ transcript: sec: 3640 time: '1:00:40' who: Uri -description: 'Learn data governance in the cloud: build data classification, catalogs - & policies, automate tagging, add stewards and measure ROI for trusted, compliant - data' -intro: 'How do you build data governance in the cloud that enables access, meets regulation, - and demonstrates ROI? In this episode, Jessi Ashdown, Senior UX Researcher for Google - Cloud, and Uri Gilad, Product Manager for Data Governance at Google Cloud, walk - through practical approaches to data governance in the cloud—grounded in real user - research and product experience.

They define governance beyond security - and PII, explain how GDPR and high-profile events like Cambridge Analytica accelerated - adoption, and outline the core components: people, processes, and tools. Key topics - include data classification and taxonomy, building scalable data catalogs versus - spreadsheets, policy design (retention, freshness, purpose-based access), enforcement - models, and access workflows. They cover roles such as data stewards and producers, - data quality signals, automation (tagging and requests), and what to measure for - ROI—catalog metrics, cost versus usage, and compliance value. You’ll also hear an - MVP strategy for minimum viable governance and what to include in a catalog (technical - metadata, lineage, business glossary).

Listen to gain actionable steps - to scope a cloud data governance program, prioritize by the “why,” and implement - classification, catalogs, and policies that balance control and democratized access.' -dateadded: '2021-06-13' -duration: PT00H58M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=0 - endOffset: 209 -- name: 'Guest Background — Jessi: UX Researcher & Data Governance at Google Cloud' - startOffset: 209 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=209 - endOffset: 286 -- name: 'Guest Background — Uri: Product Management & Data Governance Experience' - startOffset: 286 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=286 - endOffset: 400 -- name: 'Defining Data Governance: Beyond Security and PII' - startOffset: 400 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=400 - endOffset: 537 -- name: Cloud & Regulation Driving Governance Adoption (GDPR, Cambridge Analytica) - startOffset: 537 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=537 - endOffset: 844 -- name: 'Core Components: People, Processes, Tools and Cataloging' - startOffset: 844 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=844 - endOffset: 933 -- name: 'Practical Implementation: Classify Data and Establish Policies' - startOffset: 933 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=933 - endOffset: 1180 -- name: 'Assessing Necessity: When Governance Can Be Minimal' - startOffset: 1180 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1180 - endOffset: 1380 -- name: 'Prioritization: Start with the "Why" to Scope Your Program' - startOffset: 1380 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1380 - endOffset: 1454 -- name: 'Data Classification & Taxonomy: Defining Meaningful Data Classes' - startOffset: 1454 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1454 - endOffset: 1668 -- name: 'Tools vs Spreadsheets: Scalable Data Catalog Approaches' - startOffset: 1668 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1668 - endOffset: 1820 -- name: 'Aligning Storage and Systems: Make Data Work for You' - startOffset: 1820 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1820 - endOffset: 1983 -- name: 'Human Roles: Data Stewards, Producers and Decision Makers' - startOffset: 1983 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1983 - endOffset: 2099 -- name: 'Data Quality: Trust Signals, Source, and Measurable Checks' - startOffset: 2099 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2099 - endOffset: 2305 -- name: 'Policy Design: Retention, Freshness and Purpose-based Access' - startOffset: 2305 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2305 - endOffset: 2524 -- name: 'Policies as Enablement: Guardrails for Democratized Data Access' - startOffset: 2524 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2524 - endOffset: 2704 -- name: 'Enforcement Models: Catalog Interfaces vs Storage Control Plane' - startOffset: 2704 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2704 - endOffset: 2822 -- name: 'Access Workflows: Request/Approval "Shopping Cart" Experience' - startOffset: 2822 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2822 - endOffset: 2855 -- name: 'Governance Tools & Platforms: Dataplex, Collibra and Integrations' - startOffset: 2855 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2855 - endOffset: 2930 -- name: 'Automation: Tagging, Requests and Reducing Manual Effort' - startOffset: 2930 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2930 - endOffset: 3019 -- name: 'Measuring ROI: Catalog Metrics, Cost vs Usage and Compliance Value' - startOffset: 3019 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3019 - endOffset: 3201 -- name: 'MVP Strategy: Minimum Viable Governance and Future-proofing' - startOffset: 3201 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3201 - endOffset: 3277 -- name: 'Data Catalog Contents: Technical Metadata, Lineage and Business Glossary' - startOffset: 3277 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3277 - endOffset: 3466 -- name: 'Governance Scope: Why It Extends Beyond the Catalog' - startOffset: 3466 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3466 - endOffset: 3544 -- name: Closing Remarks, Contact Links and Next Steps - startOffset: 3544 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3544 - endOffset: 3560 -- name: 'Recommended Resource: Data Governance (O''Reilly Book)' - startOffset: 3560 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3560 - endOffset: 3489 --- Links: diff --git a/_podcast/s01e02-processes.md b/_podcast/crisp-dm.md similarity index 97% rename from _podcast/s01e02-processes.md rename to _podcast/crisp-dm.md index 324b776e..8f477053 100644 --- a/_podcast/s01e02-processes.md +++ b/_podcast/crisp-dm.md @@ -1,19 +1,11 @@ --- -title: 'CRISP-DM Methodology for Data Science Projects: Business Understanding, Data - Preparation, Modeling, Evaluation & Deployment' +title: 'CRISP-DM Methodology for Data Science Projects: Business Understanding, Data Preparation, Modeling, Evaluation & Deployment' short: Processes in a Data Science Project +season: 1 +episode: 2 guests: - alexeygrigorev image: images/podcast/s01e02-processes.jpg -description: Learn the CRISP-DM methodology for managing data science projects. Step-by-step - guide covering business understanding, data preparation, modeling, evaluation, and - deployment. -keywords: CRISP-DM, data science process, machine learning methodology, data science - project management, ML project lifecycle, data science workflow, A/B testing, model - deployment, data science best practices, ML model evaluation, cross-functional data - teams -season: 1 -episode: 2 ids: youtube: SesVTDklFYQ anchor: Processes-in-a-Data-Science-Project---Alexey-Grigorev-encdlg @@ -22,7 +14,17 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Processes-in-a-Data-Science-Project---Alexey-Grigorev-encdlg spotify: TODO apple: TODO -dateadded: '2021-02-23' + +description: Learn the CRISP-DM methodology for managing data science projects. Step-by-step guide covering business understanding, data preparation, modeling, evaluation, and deployment +topics: +- data science +- machine learning +- project management +dateadded: 2021-02-23 + + + +keywords: CRISP-DM, data science process, machine learning methodology, data science project management, ML project lifecycle, data science workflow, A/B testing, model deployment, data science best practices, ML model evaluation, cross-functional data teams --- The topic today is the processes in a data science project. We want to understand how cross-functional teams work together to ship real value. We'll use a concrete example (auto-categorizing marketplace listings) and walk through CRISP-DM step by step. diff --git a/_podcast/s12e03-data-centric-ai.md b/_podcast/data-centric.md similarity index 98% rename from _podcast/s12e03-data-centric-ai.md rename to _podcast/data-centric.md index af422bc0..8eb40cb7 100644 --- a/_podcast/s12e03-data-centric-ai.md +++ b/_podcast/data-centric.md @@ -1,19 +1,148 @@ --- +title: 'Data-Centric AI: Improve Label Quality & Edit Datasets to Boost Model Performance' +short: Data-Centric AI +season: 12 episode: 3 guests: - marysiawinkels +image: images/podcast/s12e03-data-centric-ai.jpg ids: anchor: Data-Centric-AI---Marysia-Winkels-e1shctn youtube: t3HDdVWQzNM -image: images/podcast/s12e03-data-centric-ai.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Centric-AI---Marysia-Winkels-e1shctn apple: https://podcasts.apple.com/us/podcast/data-centric-ai-marysia-winkels/id1541710331?i=1000592911172 spotify: https://open.spotify.com/episode/6q1yago5iyMt8OmCX1abG3?si=-OaRAwjaRfOfyQ7_QZEbBw youtube: https://www.youtube.com/watch?v=t3HDdVWQzNM -season: 12 -short: Data-Centric AI -title: 'Data-Centric AI: Improve Label Quality & Edit Datasets to Boost Model Performance' + +description: Discover Data-Centric AI tactics to improve label quality and edit datasets to boost model performance, practical workflows, relabeling, augmentation tips +intro: How much can improving label quality and editing your dataset actually boost model performance? In this episode, Marysia Winkels — Lead Data Scientist at GoDataDriven with a Master’s in Artificial Intelligence and a focus on data-efficient deep learning, and co-organizer of PyData Amsterdam/Global — walks through a practical, data-centric approach to that question.

We cover why shifting from “more data” to “better data” matters, especially for transfer learning and fine-tuning, and contrast model-centric vs data-centric workflows. Marysia breaks down a data-centric competition that used a fixed ResNet with an editable dataset, strategies for targeted relabeling using model confidence and embeddings, lightweight data versioning and low-tech tooling (Google Sheets + scripts), and when to use synthetic augmentation versus manual fixes. You’ll also hear about validation-split integrity, detecting dataset gaps with UMAP, acceptance criteria for real-world contexts, shadow-mode rollouts, and the trade-offs of automating dataset repairs.

Listen to learn concrete workflows and heuristics to prioritize impactful data fixes, improve label quality, and make dataset edits that measurably increase model performance. Find additional resources at marysia.nl and PyData +dateadded: 2023-01-07 + +duration: PT00H57M34S + +quotableClips: +- name: Podcast Introduction + startOffset: 86 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=86 + endOffset: 123 +- name: AI education & geometric deep learning in medical imaging + startOffset: 123 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=123 + endOffset: 184 +- name: Data science education and course development + startOffset: 184 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=184 + endOffset: 291 +- name: Building a community of practice and improving product maturity + startOffset: 291 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=291 + endOffset: 324 +- name: 'Data-Centric AI: shifting focus from Big Data to Good Data' + startOffset: 324 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=324 + endOffset: 354 +- name: Model-centric vs data-centric approaches; challenges with unstructured data + startOffset: 354 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=354 + endOffset: 628 +- name: 'Transfer learning & fine-tuning: why label quality matters more now' + startOffset: 628 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=628 + endOffset: 825 +- name: 'Data-centric competition case: fixed ResNet model with editable dataset' + startOffset: 825 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=825 + endOffset: 905 +- name: 'Competition lessons: accessibility, strategy, and innovation award' + startOffset: 905 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=905 + endOffset: 1064 +- name: Strategic data augmentation vs brute-force data collection + startOffset: 1064 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1064 + endOffset: 1126 +- name: 'Mindset shift: treating datasets as editable artifacts' + startOffset: 1126 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1126 + endOffset: 1164 +- name: Validation split adjustments and maintaining fair model comparisons + startOffset: 1164 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1164 + endOffset: 1345 +- name: Iterating on both data and model; prioritizing impactful data fixes + startOffset: 1345 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1345 + endOffset: 1382 +- name: 'Tooling spectrum: labeling, synthetic data, and data versioning' + startOffset: 1382 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1382 + endOffset: 1404 +- name: 'Practical workflows: lightweight versioning and easy data edits' + startOffset: 1404 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1404 + endOffset: 1586 +- name: 'Low-tech iteration: Google Sheets labeling plus automation scripts' + startOffset: 1586 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1586 + endOffset: 1675 +- name: Targeted relabeling using model confidence and image embeddings + startOffset: 1675 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1675 + endOffset: 1942 +- name: 'Curated resources: Haiti Research and WhyData tool directories' + startOffset: 1942 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1942 + endOffset: 1996 +- name: 'Iterative loop: baseline model, error analysis, and SME validation' + startOffset: 1996 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1996 + endOffset: 2124 +- name: 'Beyond cleaning: representativeness, bias, and dataset completeness' + startOffset: 2124 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2124 + endOffset: 2174 +- name: Detecting dataset gaps with embeddings and UMAP (penguin example) + startOffset: 2174 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2174 + endOffset: 2386 +- name: 'Defining real-world contexts: lighting, angles, and edge cases' + startOffset: 2386 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2386 + endOffset: 2507 +- name: 'Acceptance criteria: deciding when dataset quality is sufficient' + startOffset: 2507 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2507 + endOffset: 2653 +- name: 'Production feedback loops: collecting user feedback post-deployment' + startOffset: 2653 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2653 + endOffset: 2812 +- name: 'Shadow mode rollout: passive deployment for safe feedback collection' + startOffset: 2812 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2812 + endOffset: 2949 +- name: 'Scarce or low-quality data: feasibility, manual fixes, and limits' + startOffset: 2949 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2949 + endOffset: 3045 +- name: Automating dataset repairs vs manual editing trade-offs + startOffset: 3045 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3045 + endOffset: 3056 +- name: 'PyData involvement: organizing meetups, tutorials, and global events' + startOffset: 3056 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3056 + endOffset: 3361 +- name: 'PyData vs PyCon: data focus, language inclusivity, and NumFOCUS support' + startOffset: 3361 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3361 + endOffset: 3504 +- name: 'Contact & resources: marysia.nl, LinkedIn, and PyData engagement' + startOffset: 3504 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3504 + endOffset: 3454 + transcript: - header: Podcast Introduction - line: This week, we'll talk about data-centric AI. We have a special guest today, @@ -1243,147 +1372,6 @@ transcript: sec: 3540 time: '59:00' who: Alexey -description: Discover Data-Centric AI tactics to improve label quality and edit datasets - to boost model performance, practical workflows, relabeling, augmentation tips. -intro: How much can improving label quality and editing your dataset actually boost - model performance? In this episode, Marysia Winkels — Lead Data Scientist at GoDataDriven - with a Master’s in Artificial Intelligence and a focus on data-efficient deep learning, - and co-organizer of PyData Amsterdam/Global — walks through a practical, data-centric - approach to that question.

We cover why shifting from “more data” to “better - data” matters, especially for transfer learning and fine-tuning, and contrast model-centric - vs data-centric workflows. Marysia breaks down a data-centric competition that used - a fixed ResNet with an editable dataset, strategies for targeted relabeling using - model confidence and embeddings, lightweight data versioning and low-tech tooling - (Google Sheets + scripts), and when to use synthetic augmentation versus manual - fixes. You’ll also hear about validation-split integrity, detecting dataset gaps - with UMAP, acceptance criteria for real-world contexts, shadow-mode rollouts, and - the trade-offs of automating dataset repairs.

Listen to learn concrete - workflows and heuristics to prioritize impactful data fixes, improve label quality, - and make dataset edits that measurably increase model performance. Find additional - resources at marysia.nl and PyData. -dateadded: '2023-01-07' -duration: PT00H57M34S -quotableClips: -- name: Podcast Introduction - startOffset: 86 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=86 - endOffset: 123 -- name: AI education & geometric deep learning in medical imaging - startOffset: 123 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=123 - endOffset: 184 -- name: Data science education and course development - startOffset: 184 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=184 - endOffset: 291 -- name: Building a community of practice and improving product maturity - startOffset: 291 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=291 - endOffset: 324 -- name: 'Data-Centric AI: shifting focus from Big Data to Good Data' - startOffset: 324 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=324 - endOffset: 354 -- name: Model-centric vs data-centric approaches; challenges with unstructured data - startOffset: 354 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=354 - endOffset: 628 -- name: 'Transfer learning & fine-tuning: why label quality matters more now' - startOffset: 628 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=628 - endOffset: 825 -- name: 'Data-centric competition case: fixed ResNet model with editable dataset' - startOffset: 825 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=825 - endOffset: 905 -- name: 'Competition lessons: accessibility, strategy, and innovation award' - startOffset: 905 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=905 - endOffset: 1064 -- name: Strategic data augmentation vs brute-force data collection - startOffset: 1064 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1064 - endOffset: 1126 -- name: 'Mindset shift: treating datasets as editable artifacts' - startOffset: 1126 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1126 - endOffset: 1164 -- name: Validation split adjustments and maintaining fair model comparisons - startOffset: 1164 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1164 - endOffset: 1345 -- name: Iterating on both data and model; prioritizing impactful data fixes - startOffset: 1345 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1345 - endOffset: 1382 -- name: 'Tooling spectrum: labeling, synthetic data, and data versioning' - startOffset: 1382 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1382 - endOffset: 1404 -- name: 'Practical workflows: lightweight versioning and easy data edits' - startOffset: 1404 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1404 - endOffset: 1586 -- name: 'Low-tech iteration: Google Sheets labeling plus automation scripts' - startOffset: 1586 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1586 - endOffset: 1675 -- name: Targeted relabeling using model confidence and image embeddings - startOffset: 1675 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1675 - endOffset: 1942 -- name: 'Curated resources: Haiti Research and WhyData tool directories' - startOffset: 1942 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1942 - endOffset: 1996 -- name: 'Iterative loop: baseline model, error analysis, and SME validation' - startOffset: 1996 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1996 - endOffset: 2124 -- name: 'Beyond cleaning: representativeness, bias, and dataset completeness' - startOffset: 2124 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2124 - endOffset: 2174 -- name: Detecting dataset gaps with embeddings and UMAP (penguin example) - startOffset: 2174 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2174 - endOffset: 2386 -- name: 'Defining real-world contexts: lighting, angles, and edge cases' - startOffset: 2386 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2386 - endOffset: 2507 -- name: 'Acceptance criteria: deciding when dataset quality is sufficient' - startOffset: 2507 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2507 - endOffset: 2653 -- name: 'Production feedback loops: collecting user feedback post-deployment' - startOffset: 2653 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2653 - endOffset: 2812 -- name: 'Shadow mode rollout: passive deployment for safe feedback collection' - startOffset: 2812 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2812 - endOffset: 2949 -- name: 'Scarce or low-quality data: feasibility, manual fixes, and limits' - startOffset: 2949 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2949 - endOffset: 3045 -- name: Automating dataset repairs vs manual editing trade-offs - startOffset: 3045 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3045 - endOffset: 3056 -- name: 'PyData involvement: organizing meetups, tutorials, and global events' - startOffset: 3056 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3056 - endOffset: 3361 -- name: 'PyData vs PyCon: data focus, language inclusivity, and NumFOCUS support' - startOffset: 3361 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3361 - endOffset: 3504 -- name: 'Contact & resources: marysia.nl, LinkedIn, and PyData engagement' - startOffset: 3504 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3504 - endOffset: 3454 --- Links: diff --git a/_podcast/s13e04-starting-consultancy-in-data-space.md b/_podcast/data-consulting-business-pricing-and-client-acquisition.md similarity index 97% rename from _podcast/s13e04-starting-consultancy-in-data-space.md rename to _podcast/data-consulting-business-pricing-and-client-acquisition.md index 377ddeea..06bacd54 100644 --- a/_podcast/s13e04-starting-consultancy-in-data-space.md +++ b/_podcast/data-consulting-business-pricing-and-client-acquisition.md @@ -1,29 +1,129 @@ --- +title: 'Build a Data Consulting Business: Customer Validation, User Interviews & Pricing Strategy' +short: Starting a Consultancy in the Data Space +season: 13 episode: 4 guests: - aleksanderkruszelnicki -date: 2025-11-07 -topics: -- consulting -- entrepreneurship -- freelance -- data strategy -- Business Development -- Career Growth -- Startups +image: images/podcast/s13e04-starting-consultancy-in-data-space.jpg ids: anchor: ow/datatalksclub/episodes/Starting-a-Consultancy-in-the-Data-Space---Aleksander-Kruszelnicki-e203c8g youtube: rh_pE35m3vE -image: images/podcast/s13e04-starting-consultancy-in-data-space.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Starting-a-Consultancy-in-the-Data-Space---Aleksander-Kruszelnicki-e203c8g apple: https://podcasts.apple.com/us/podcast/starting-a-consultancy-in-the-data-space/id1541710331?i=1000604682286 spotify: https://open.spotify.com/episode/2Y0mKRHq6wVfr25HJ5Ji3Y?si=kUkmMW2AT6-FeRd6SpXWlg youtube: https://www.youtube.com/watch?v=rh_pE35m3vE -season: 13 -short: Starting a Consultancy in the Data Space -title: 'Build a Data Consulting Business: Customer Validation, User Interviews & Pricing - Strategy' + +description: 'Learn data consulting: customer validation, user interviews and pricing strategy to validate ideas, win clients, set value-based rates & scale your practice.' +intro: How do you validate customers, run effective user interviews, and set pricing to build a sustainable data consulting business? In this episode, Aleksander Kruszelnicki — ex-Delivery Hero product manager turned co-founder of leukos, a boutique data analytics agency in Berlin — walks through the practical steps he took shifting from product ideas to a consulting model after early startup failures.

We cover market and technical limits of “data stack as a service,” first-customer stories, customer validation techniques for pre-product ideas, and a repeatable user interview strategy (questions, cadence, roles, and note-taking). Aleksander explains why value often sits in data modeling over infrastructure, the decision to pivot to hands-on consulting, and team composition (PM + engineer). You’ll also hear tactical guidance on client acquisition (network-first outreach), positioning for European customers and VCs, messaging examples for revenue/marketing optimization, marketing mix (networking, content, LinkedIn), pricing frameworks and rate setting, contract models (day rates vs project pricing), and practical legal/admin steps for registering a consultancy in Germany.

Listen to get actionable methods for customer validation, user interviews, pricing strategy, and building a data consulting business that captures real client value +topics: +- consulting +- entrepreneurship +- freelance +- data strategy +- business development +- career growth +- startups +dateadded: 2023-03-19 +date: 2025-11-07 + +duration: PT01H16S + +quotableClips: +- name: Podcast Introduction + startOffset: 70 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=70 + endOffset: 107 +- name: Career Journey & Archaeology Origin Story + startOffset: 107 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=107 + endOffset: 256 +- name: 'Data Stack as a Service: Market and Technical Limits' + startOffset: 256 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=256 + endOffset: 436 +- name: 'Transition to Consulting: Early Projects and First Customer' + startOffset: 436 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=436 + endOffset: 548 +- name: Customer Validation Techniques for Pre-Product Ideas + startOffset: 548 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=548 + endOffset: 773 +- name: 'User Interview Strategy: Questions, Frequency, and Evidence' + startOffset: 773 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=773 + endOffset: 955 +- name: 'Conducting Interviews: Pair Roles and Note-Taking' + startOffset: 955 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=955 + endOffset: 1067 +- name: 'Team Composition: PM + Engineer Partnership' + startOffset: 1067 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1067 + endOffset: 1081 +- name: 'Lessons from a Failed Product: Premature Build and Market Size' + startOffset: 1081 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1081 + endOffset: 1299 +- name: 'Value Realization: Data Modeling vs Infrastructure' + startOffset: 1299 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1299 + endOffset: 1362 +- name: 'Pivot Decision: Choosing Consulting to Capture Value' + startOffset: 1362 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1362 + endOffset: 1545 +- name: 'Consulting Approach: Hands-On Implementation and Accountability' + startOffset: 1545 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1545 + endOffset: 1679 +- name: 'Client Acquisition: Network-First Outreach' + startOffset: 1679 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1679 + endOffset: 1817 +- name: 'Positioning Services: Target Customers and Timing' + startOffset: 1817 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1817 + endOffset: 1919 +- name: 'Geographic Strategy: Europe Focus and VC Introductions' + startOffset: 1919 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1919 + endOffset: 2223 +- name: 'Messaging Example: Revenue and Marketing Optimization Offers' + startOffset: 2223 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2223 + endOffset: 2450 +- name: 'Marketing Mix: Networking, Content, and LinkedIn' + startOffset: 2450 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2450 + endOffset: 2719 +- name: 'Pricing Framework: Value-Based Benchmarking' + startOffset: 2719 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2719 + endOffset: 2958 +- name: 'Rate Setting: Starting Rates, Maximums and Minimums' + startOffset: 2958 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2958 + endOffset: 3158 +- name: 'Contract Models: Day Rates vs Project Pricing and Incentives' + startOffset: 3158 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3158 + endOffset: 3478 +- name: 'Legal & Administrative: Registering a Consultancy in Germany' + startOffset: 3478 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3478 + endOffset: 3569 +- name: 'Recommended Reading: Decision-Making and Interviewing Books' + startOffset: 3569 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3569 + endOffset: 3686 +- name: Closing Remarks and Episode Wrap-Up + startOffset: 3686 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3686 + endOffset: 3616 + transcript: - header: Podcast Introduction - header: Podcast Introduction @@ -1115,120 +1215,6 @@ transcript: sec: 3686 time: '1:01:26' who: Alexey -intro: How do you validate customers, run effective user interviews, and set pricing - to build a sustainable data consulting business? In this episode, Aleksander Kruszelnicki - — ex-Delivery Hero product manager turned co-founder of leukos, a boutique data - analytics agency in Berlin — walks through the practical steps he took shifting - from product ideas to a consulting model after early startup failures.

- We cover market and technical limits of “data stack as a service,” first-customer - stories, customer validation techniques for pre-product ideas, and a repeatable - user interview strategy (questions, cadence, roles, and note-taking). Aleksander - explains why value often sits in data modeling over infrastructure, the decision - to pivot to hands-on consulting, and team composition (PM + engineer). You’ll also - hear tactical guidance on client acquisition (network-first outreach), positioning - for European customers and VCs, messaging examples for revenue/marketing optimization, - marketing mix (networking, content, LinkedIn), pricing frameworks and rate setting, - contract models (day rates vs project pricing), and practical legal/admin steps - for registering a consultancy in Germany.

Listen to get actionable methods - for customer validation, user interviews, pricing strategy, and building a data - consulting business that captures real client value. -description: 'Learn data consulting: customer validation, user interviews and pricing - strategy to validate ideas, win clients, set value-based rates & scale your practice.' -dateadded: '2023-03-19' -duration: PT01H16S -quotableClips: -- name: Podcast Introduction - startOffset: 70 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=70 - endOffset: 107 -- name: Career Journey & Archaeology Origin Story - startOffset: 107 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=107 - endOffset: 256 -- name: 'Data Stack as a Service: Market and Technical Limits' - startOffset: 256 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=256 - endOffset: 436 -- name: 'Transition to Consulting: Early Projects and First Customer' - startOffset: 436 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=436 - endOffset: 548 -- name: Customer Validation Techniques for Pre-Product Ideas - startOffset: 548 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=548 - endOffset: 773 -- name: 'User Interview Strategy: Questions, Frequency, and Evidence' - startOffset: 773 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=773 - endOffset: 955 -- name: 'Conducting Interviews: Pair Roles and Note-Taking' - startOffset: 955 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=955 - endOffset: 1067 -- name: 'Team Composition: PM + Engineer Partnership' - startOffset: 1067 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1067 - endOffset: 1081 -- name: 'Lessons from a Failed Product: Premature Build and Market Size' - startOffset: 1081 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1081 - endOffset: 1299 -- name: 'Value Realization: Data Modeling vs Infrastructure' - startOffset: 1299 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1299 - endOffset: 1362 -- name: 'Pivot Decision: Choosing Consulting to Capture Value' - startOffset: 1362 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1362 - endOffset: 1545 -- name: 'Consulting Approach: Hands-On Implementation and Accountability' - startOffset: 1545 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1545 - endOffset: 1679 -- name: 'Client Acquisition: Network-First Outreach' - startOffset: 1679 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1679 - endOffset: 1817 -- name: 'Positioning Services: Target Customers and Timing' - startOffset: 1817 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1817 - endOffset: 1919 -- name: 'Geographic Strategy: Europe Focus and VC Introductions' - startOffset: 1919 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1919 - endOffset: 2223 -- name: 'Messaging Example: Revenue and Marketing Optimization Offers' - startOffset: 2223 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2223 - endOffset: 2450 -- name: 'Marketing Mix: Networking, Content, and LinkedIn' - startOffset: 2450 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2450 - endOffset: 2719 -- name: 'Pricing Framework: Value-Based Benchmarking' - startOffset: 2719 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2719 - endOffset: 2958 -- name: 'Rate Setting: Starting Rates, Maximums and Minimums' - startOffset: 2958 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2958 - endOffset: 3158 -- name: 'Contract Models: Day Rates vs Project Pricing and Incentives' - startOffset: 3158 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3158 - endOffset: 3478 -- name: 'Legal & Administrative: Registering a Consultancy in Germany' - startOffset: 3478 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3478 - endOffset: 3569 -- name: 'Recommended Reading: Decision-Making and Interviewing Books' - startOffset: 3569 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3569 - endOffset: 3686 -- name: Closing Remarks and Episode Wrap-Up - startOffset: 3686 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3686 - endOffset: 3616 --- Links: diff --git a/_podcast/s08e08-teaching-data-engineers.md b/_podcast/data-engineering-career-path-and-skills.md similarity index 98% rename from _podcast/s08e08-teaching-data-engineers.md rename to _podcast/data-engineering-career-path-and-skills.md index 0bc9599a..6f26f7e6 100644 --- a/_podcast/s08e08-teaching-data-engineers.md +++ b/_podcast/data-engineering-career-path-and-skills.md @@ -1,40 +1,148 @@ --- +title: 'Build a Data Engineering Career: Bootcamp Curriculum, SQL Mastery & Interview Prep' +short: Teaching Data Engineers +season: 8 episode: 8 guests: - jeffkatz -intro: How do you build a data engineering career from zero — what should you learn, - how do you master SQL, and how do you pass the interviews? In this episode, Jeff - Katz — former lawyer turned developer, founder of Jigsaw Labs, and current ML engineer - at AppFolio — walks through practical paths into data engineering and how to design - bootcamp curriculum that actually leads to hires.

We cover curriculum development - and pedagogy (active learning, conceptual-first lessons, reinforcement cycles), - core skills to prioritize (Python, SQL, cloud fundamentals), and why junior-focused - programs drop Spark/Kafka/Kubernetes early. Jeff details analytics engineering tools - (DBT, Snowflake, Mode, Fivetran), backend and ETL practices (Flask, codebase navigation, - testing), data modeling (OLTP vs OLAP), and SQL mastery (window functions, medium - LeetCode problems). You’ll also hear about admissions and screening, mid-program - internships for real experience, interview stages (screening calls, SQL tests, on-site - expectations), and tactics for transitioning from data analyst to data engineer. -

Listen for actionable guidance on building a bootcamp-ready portfolio, - targeted interview prep, and the concrete curriculum choices that employers value - in data engineering hires. +image: images/podcast/s08e08-teaching-data-engineers.jpg ids: anchor: Teaching-Data-Engineers---Jeff-Katz-e1iaoru youtube: dFo10l8B6Go -image: images/podcast/s08e08-teaching-data-engineers.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Teaching-Data-Engineers---Jeff-Katz-e1iaoru apple: https://podcasts.apple.com/us/podcast/teaching-data-engineers-jeff-katz/id1541710331?i=1000561145955 spotify: https://open.spotify.com/episode/0Fo6Y62xaWPy7C24eZKfJw?si=lnjgqHUiRdGiZNxE76QMYQ youtube: https://www.youtube.com/watch?v=dFo10l8B6Go -season: 8 -short: Teaching Data Engineers -title: 'Build a Data Engineering Career: Bootcamp Curriculum, SQL Mastery & Interview - Prep' + +description: 'Master data engineering and SQL with a bootcamp curriculum: employer-validated projects, cloud basics, SQL window functions & interview prep for junior roles.' +intro: How do you build a data engineering career from zero — what should you learn, how do you master SQL, and how do you pass the interviews? In this episode, Jeff Katz — former lawyer turned developer, founder of Jigsaw Labs, and current ML engineer at AppFolio — walks through practical paths into data engineering and how to design bootcamp curriculum that actually leads to hires.

We cover curriculum development and pedagogy (active learning, conceptual-first lessons, reinforcement cycles), core skills to prioritize (Python, SQL, cloud fundamentals), and why junior-focused programs drop Spark/Kafka/Kubernetes early. Jeff details analytics engineering tools (DBT, Snowflake, Mode, Fivetran), backend and ETL practices (Flask, codebase navigation, testing), data modeling (OLTP vs OLAP), and SQL mastery (window functions, medium LeetCode problems). You’ll also hear about admissions and screening, mid-program internships for real experience, interview stages (screening calls, SQL tests, on-site expectations), and tactics for transitioning from data analyst to data engineer.

Listen for actionable guidance on building a bootcamp-ready portfolio, targeted interview prep, and the concrete curriculum choices that employers value in data engineering hires topics: - data engineering - education - career growth +dateadded: 2022-05-16 + +duration: PT01H31S + +quotableClips: +- name: Episode Overview & Guest Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=0 + endOffset: 80 +- name: 'Guest Background: Lawyer → Developer → Educator' + startOffset: 80 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=80 + endOffset: 236 +- name: Active Learning & Continuous Student Feedback (teaching methods) + startOffset: 236 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=236 + endOffset: 392 +- name: 'Education as Social Impact: Training, Refugees, Last-mile' + startOffset: 392 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=392 + endOffset: 522 +- name: 'Early Bootcamps: General Assembly and Flatiron School Origins' + startOffset: 522 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=522 + endOffset: 598 +- name: 'Curriculum Development: Market Research & Employer Validation' + startOffset: 598 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=598 + endOffset: 704 +- name: 'Lesson Structure: Syllabi, Labs, Reinforcement Cycles' + startOffset: 704 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=704 + endOffset: 870 +- name: 'Pedagogy: Conceptual Understanding Before Implementation' + startOffset: 870 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=870 + endOffset: 924 +- name: 'Market Shift: Why Data Science Moved Toward Data Engineering' + startOffset: 924 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=924 + endOffset: 1018 +- name: 'Building a School: Affordability, Part-time Model, Career Services' + startOffset: 1018 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1018 + endOffset: 1218 +- name: 'Lowering Barriers: Workshops, Part-time Pathways, Admissions' + startOffset: 1218 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1218 + endOffset: 1415 +- name: 'Data Engineering Core Skills: Python, SQL, Cloud Fundamentals' + startOffset: 1415 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1415 + endOffset: 1600 +- name: 'Ensuring Hires: Admissions Criteria, Curriculum-Employer Fit, Follow-up' + startOffset: 1600 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1600 + endOffset: 1661 +- name: 'Mid-Program Internships: Employer Projects for Real Experience' + startOffset: 1661 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1661 + endOffset: 1832 +- name: 'Applicant Screening: Technical Interview & Learning Agility' + startOffset: 1832 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1832 + endOffset: 1985 +- name: 'Interview Practice: Apply Early, Learn from Rejection' + startOffset: 1985 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1985 + endOffset: 2178 +- name: 'Analytics Engineering Module: DBT, Snowflake, Mode, Fivetran' + startOffset: 2178 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2178 + endOffset: 2261 +- name: 'Backend Engineering Module: Flask, ETL, Codebase Navigation, Testing' + startOffset: 2261 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2261 + endOffset: 2285 +- name: 'Curriculum Prioritization: Dropping Spark/Kafka/Kubernetes for Juniors' + startOffset: 2285 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2285 + endOffset: 2442 +- name: 'Transition Path: Data Analyst → Data Engineer (backend & cloud focus)' + startOffset: 2442 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2442 + endOffset: 2661 +- name: 'SQL Mastery: Window Functions & Medium LeetCode SQL Problems' + startOffset: 2661 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2661 + endOffset: 2714 +- name: 'Data Modeling Practice: OLTP vs OLAP and Sample Databases' + startOffset: 2714 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2714 + endOffset: 2880 +- name: 'Interview Stages: Screening Calls, SQL Tests, On-site Expectations' + startOffset: 2880 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2880 + endOffset: 2992 +- name: 'How to Start Teaching: Pick a Beginner Topic & Teach One Person' + startOffset: 2992 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2992 + endOffset: 3116 +- name: 'Delivery Tactics: In-Person vs Online Engagement and Sequencing' + startOffset: 3116 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3116 + endOffset: 3294 +- name: 'Running a Small School: Curriculum Volume and Time Management' + startOffset: 3294 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3294 + endOffset: 3406 +- name: 'Teaching Fundamentals vs Shiny Tech: 85% Python/SQL, 15% tools' + startOffset: 3406 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3406 + endOffset: 3571 +- name: 'Outcomes & Next Cohort: JigsawLabs Results and Start Date' + startOffset: 3571 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3571 + endOffset: 3621 +- name: 'Contact & Follow-up: Jeff Katz, Webinar on Getting Data Engineering Jobs' + startOffset: 3621 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3621 + endOffset: 3631 + transcript: - header: Episode Overview & Guest Introduction - header: 'Guest Background: Lawyer → Developer → Educator' @@ -1298,127 +1406,6 @@ transcript: sec: 3711 time: '1:01:51' who: Jeff -description: 'Master data engineering and SQL with a bootcamp curriculum: employer-validated - projects, cloud basics, SQL window functions & interview prep for junior roles.' -dateadded: '2022-05-16' -duration: PT01H31S -quotableClips: -- name: Episode Overview & Guest Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=0 - endOffset: 80 -- name: 'Guest Background: Lawyer → Developer → Educator' - startOffset: 80 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=80 - endOffset: 236 -- name: Active Learning & Continuous Student Feedback (teaching methods) - startOffset: 236 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=236 - endOffset: 392 -- name: 'Education as Social Impact: Training, Refugees, Last-mile' - startOffset: 392 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=392 - endOffset: 522 -- name: 'Early Bootcamps: General Assembly and Flatiron School Origins' - startOffset: 522 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=522 - endOffset: 598 -- name: 'Curriculum Development: Market Research & Employer Validation' - startOffset: 598 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=598 - endOffset: 704 -- name: 'Lesson Structure: Syllabi, Labs, Reinforcement Cycles' - startOffset: 704 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=704 - endOffset: 870 -- name: 'Pedagogy: Conceptual Understanding Before Implementation' - startOffset: 870 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=870 - endOffset: 924 -- name: 'Market Shift: Why Data Science Moved Toward Data Engineering' - startOffset: 924 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=924 - endOffset: 1018 -- name: 'Building a School: Affordability, Part-time Model, Career Services' - startOffset: 1018 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1018 - endOffset: 1218 -- name: 'Lowering Barriers: Workshops, Part-time Pathways, Admissions' - startOffset: 1218 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1218 - endOffset: 1415 -- name: 'Data Engineering Core Skills: Python, SQL, Cloud Fundamentals' - startOffset: 1415 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1415 - endOffset: 1600 -- name: 'Ensuring Hires: Admissions Criteria, Curriculum-Employer Fit, Follow-up' - startOffset: 1600 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1600 - endOffset: 1661 -- name: 'Mid-Program Internships: Employer Projects for Real Experience' - startOffset: 1661 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1661 - endOffset: 1832 -- name: 'Applicant Screening: Technical Interview & Learning Agility' - startOffset: 1832 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1832 - endOffset: 1985 -- name: 'Interview Practice: Apply Early, Learn from Rejection' - startOffset: 1985 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1985 - endOffset: 2178 -- name: 'Analytics Engineering Module: DBT, Snowflake, Mode, Fivetran' - startOffset: 2178 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2178 - endOffset: 2261 -- name: 'Backend Engineering Module: Flask, ETL, Codebase Navigation, Testing' - startOffset: 2261 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2261 - endOffset: 2285 -- name: 'Curriculum Prioritization: Dropping Spark/Kafka/Kubernetes for Juniors' - startOffset: 2285 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2285 - endOffset: 2442 -- name: 'Transition Path: Data Analyst → Data Engineer (backend & cloud focus)' - startOffset: 2442 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2442 - endOffset: 2661 -- name: 'SQL Mastery: Window Functions & Medium LeetCode SQL Problems' - startOffset: 2661 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2661 - endOffset: 2714 -- name: 'Data Modeling Practice: OLTP vs OLAP and Sample Databases' - startOffset: 2714 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2714 - endOffset: 2880 -- name: 'Interview Stages: Screening Calls, SQL Tests, On-site Expectations' - startOffset: 2880 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2880 - endOffset: 2992 -- name: 'How to Start Teaching: Pick a Beginner Topic & Teach One Person' - startOffset: 2992 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2992 - endOffset: 3116 -- name: 'Delivery Tactics: In-Person vs Online Engagement and Sequencing' - startOffset: 3116 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3116 - endOffset: 3294 -- name: 'Running a Small School: Curriculum Volume and Time Management' - startOffset: 3294 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3294 - endOffset: 3406 -- name: 'Teaching Fundamentals vs Shiny Tech: 85% Python/SQL, 15% tools' - startOffset: 3406 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3406 - endOffset: 3571 -- name: 'Outcomes & Next Cohort: JigsawLabs Results and Start Date' - startOffset: 3571 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3571 - endOffset: 3621 -- name: 'Contact & Follow-up: Jeff Katz, Webinar on Getting Data Engineering Jobs' - startOffset: 3621 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3621 - endOffset: 3631 --- Links: diff --git a/_podcast/s07e07-becoming-a-data-engineering-manager.md b/_podcast/data-engineering-leadership-and-modern-data-platforms.md similarity index 97% rename from _podcast/s07e07-becoming-a-data-engineering-manager.md rename to _podcast/data-engineering-leadership-and-modern-data-platforms.md index 7d8db751..4ae9bf24 100644 --- a/_podcast/s07e07-becoming-a-data-engineering-manager.md +++ b/_podcast/data-engineering-leadership-and-modern-data-platforms.md @@ -1,42 +1,133 @@ --- +title: 'Data Engineering Leadership: Scale ETL to ELT, Build Robust Data Platforms & Teams' +short: Becoming a Data Engineering Manager +season: 7 episode: 7 guests: - 16rahuljain -description: Learn to scale ETL to ELT and build resilient data platforms—gain leadership - skills, stakeholder management, data quality metrics and hiring tips. -intro: 'How do you lead a data engineering team to scale ETL into ELT, build a robust - data platform, and maintain data quality as you grow? In this episode, Rahul Jain - — a data engineering manager at Siemens with 12+ years in data and three years in - management — walks through that transition from ETL developer to IoT data platform - lead and what leadership looks like in practice.

We cover practical topics - like migrating ETL to ELT architectures, data lake and data lineage design, and - end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul - discusses stakeholder management, prioritization, hands-on technical credibility, - balancing individual contributor work with people management, and onboarding strategies - to build trust and delegate effectively. He shares approaches for measuring success - (data culture, consumers served, data quality), detecting data reconciliation issues, - GDPR tactics like dynamic data masking and role‑based access, and how to evaluate - new tools (example: Prefect). Hiring, interview screening, and essential skills - (SQL, Python, CI/CD, cloud) are also explored.

Listen to gain concrete - leadership and technical guidance for scaling data platforms, improving throughput, - and enabling your team to deliver reliable, compliant data products.' -topics: -- data engineering -- career growth -- career switch +image: images/podcast/s07e07-becoming-a-data-engineering-manager.jpg ids: anchor: Becoming-a-Data-Engineering-Manager---Rahul-Jain-e1f5nvf youtube: FljnbUQ796w -image: images/podcast/s07e07-becoming-a-data-engineering-manager.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-Engineering-Manager---Rahul-Jain-e1f5nvf apple: https://podcasts.apple.com/us/podcast/becoming-a-data-engineering-manager-rahul-jain/id1541710331?i=1000552953646 spotify: https://open.spotify.com/episode/4nWP18woLTt4a7Wm0CQwhM youtube: https://www.youtube.com/watch?v=FljnbUQ796w -season: 7 -short: Becoming a Data Engineering Manager -title: 'Data Engineering Leadership: Scale ETL to ELT, Build Robust Data Platforms - & Teams' + +description: Learn to scale ETL to ELT and build resilient data platforms—gain leadership skills, stakeholder management, data quality metrics and hiring tips +intro: 'How do you lead a data engineering team to scale ETL into ELT, build a robust data platform, and maintain data quality as you grow? In this episode, Rahul Jain — a data engineering manager at Siemens with 12+ years in data and three years in management — walks through that transition from ETL developer to IoT data platform lead and what leadership looks like in practice.

We cover practical topics like migrating ETL to ELT architectures, data lake and data lineage design, and end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul discusses stakeholder management, prioritization, hands-on technical credibility, balancing individual contributor work with people management, and onboarding strategies to build trust and delegate effectively. He shares approaches for measuring success (data culture, consumers served, data quality), detecting data reconciliation issues, GDPR tactics like dynamic data masking and role‑based access, and how to evaluate new tools (example: Prefect). Hiring, interview screening, and essential skills (SQL, Python, CI/CD, cloud) are also explored.

Listen to gain concrete leadership and technical guidance for scaling data platforms, improving throughput, and enabling your team to deliver reliable, compliant data products.' +topics: +- data engineering +- career growth +- career switch +dateadded: 2022-03-06 + +duration: PT00H59M31S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=0 + endOffset: 116 +- name: 'Rahul''s Career Path: From ETL Developer to IoT Data Platform Lead' + startOffset: 116 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=116 + endOffset: 212 +- name: ETL Foundations to Big Data and Open Source Tooling + startOffset: 212 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=212 + endOffset: 292 +- name: 'Data Engineering Leadership: Stakeholder Management & Prioritization' + startOffset: 292 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=292 + endOffset: 447 +- name: 'Technical Credibility: Hands-on Management and Code-Level Involvement' + startOffset: 447 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=447 + endOffset: 534 +- name: 'Time Allocation: Balancing Individual Contributor Work with People Management' + startOffset: 534 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=534 + endOffset: 669 +- name: 'Transition into Management: Business Acumen and Seeing the Bigger Picture' + startOffset: 669 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=669 + endOffset: 795 +- name: 'Core Manager Traits: Empathy, Situational Awareness, and Quality Standards' + startOffset: 795 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=795 + endOffset: 894 +- name: 'Continuous Learning: Evaluating New Tools and Prototypes (example: Prefect)' + startOffset: 894 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=894 + endOffset: 992 +- name: 'Onboarding Challenges: Building Trust, Prioritization, and Delegation' + startOffset: 992 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=992 + endOffset: 1395 +- name: 'Expectation Framework: Non‑Negotiable Deliverables vs. Stretch (Aspirational) + Goals' + startOffset: 1395 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1395 + endOffset: 1504 +- name: 'Measuring Success: Data Culture, Consumers Served, and Data Quality Metrics' + startOffset: 1504 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1504 + endOffset: 1684 +- name: 'Data Reconciliation: Detecting Losses Between Sources and Targets' + startOffset: 1684 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1684 + endOffset: 1741 +- name: 'GDPR Strategies: Dynamic Data Masking and Role-Based Access Control' + startOffset: 1741 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1741 + endOffset: 1850 +- name: 'Modeling at Scale: Moving from ETL to ELT, Data Lake, and Data Lineage' + startOffset: 1850 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1850 + endOffset: 2019 +- name: 'Manager Transition Advice: Prioritize Business Impact and Enable Team Growth' + startOffset: 2019 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2019 + endOffset: 2138 +- name: 'Sustaining Relevance: Automate Monotony and Improve Throughput' + startOffset: 2138 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2138 + endOffset: 2316 +- name: 'Essential Data Engineering Skills: SQL, Python, CI/CD, Cloud, and Ownership' + startOffset: 2316 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2316 + endOffset: 2460 +- name: 'Interview Screening: Communicating Projects Clearly in Five Minutes' + startOffset: 2460 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2460 + endOffset: 2688 +- name: 'Hiring Assessment: Hypotheticals, Leadership Traits, and Future Potential' + startOffset: 2688 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2688 + endOffset: 2833 +- name: 'Top Hires: Due Diligence, Cultural Fit, and Assertiveness' + startOffset: 2833 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2833 + endOffset: 2975 +- name: 'Filtering Buzzwords: Ask for Context, Alternatives, and Real Use Cases' + startOffset: 2975 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2975 + endOffset: 3274 +- name: 'Advice for Students: Master DBMS, SQL, and Fundamentals Over Specific Tools' + startOffset: 3274 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3274 + endOffset: 3449 +- name: 'End-to-End Data Pipeline Overview: Ingestion, Central Hub, Exposure, Monitoring' + startOffset: 3449 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3449 + endOffset: 3599 +- name: Closing Remarks and Connect with Rahul on LinkedIn + startOffset: 3599 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3599 + endOffset: 3571 + transcript: - header: Podcast Introduction - line: This week, we'll talk about becoming a data engineering manager. We have a @@ -1105,108 +1196,4 @@ transcript: sec: 3650 time: '1:00:50' who: Rahul -dateadded: '2022-03-06' -duration: PT00H59M31S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=0 - endOffset: 116 -- name: 'Rahul''s Career Path: From ETL Developer to IoT Data Platform Lead' - startOffset: 116 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=116 - endOffset: 212 -- name: ETL Foundations to Big Data and Open Source Tooling - startOffset: 212 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=212 - endOffset: 292 -- name: 'Data Engineering Leadership: Stakeholder Management & Prioritization' - startOffset: 292 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=292 - endOffset: 447 -- name: 'Technical Credibility: Hands-on Management and Code-Level Involvement' - startOffset: 447 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=447 - endOffset: 534 -- name: 'Time Allocation: Balancing Individual Contributor Work with People Management' - startOffset: 534 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=534 - endOffset: 669 -- name: 'Transition into Management: Business Acumen and Seeing the Bigger Picture' - startOffset: 669 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=669 - endOffset: 795 -- name: 'Core Manager Traits: Empathy, Situational Awareness, and Quality Standards' - startOffset: 795 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=795 - endOffset: 894 -- name: 'Continuous Learning: Evaluating New Tools and Prototypes (example: Prefect)' - startOffset: 894 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=894 - endOffset: 992 -- name: 'Onboarding Challenges: Building Trust, Prioritization, and Delegation' - startOffset: 992 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=992 - endOffset: 1395 -- name: 'Expectation Framework: Non‑Negotiable Deliverables vs. Stretch (Aspirational) - Goals' - startOffset: 1395 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1395 - endOffset: 1504 -- name: 'Measuring Success: Data Culture, Consumers Served, and Data Quality Metrics' - startOffset: 1504 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1504 - endOffset: 1684 -- name: 'Data Reconciliation: Detecting Losses Between Sources and Targets' - startOffset: 1684 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1684 - endOffset: 1741 -- name: 'GDPR Strategies: Dynamic Data Masking and Role-Based Access Control' - startOffset: 1741 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1741 - endOffset: 1850 -- name: 'Modeling at Scale: Moving from ETL to ELT, Data Lake, and Data Lineage' - startOffset: 1850 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1850 - endOffset: 2019 -- name: 'Manager Transition Advice: Prioritize Business Impact and Enable Team Growth' - startOffset: 2019 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2019 - endOffset: 2138 -- name: 'Sustaining Relevance: Automate Monotony and Improve Throughput' - startOffset: 2138 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2138 - endOffset: 2316 -- name: 'Essential Data Engineering Skills: SQL, Python, CI/CD, Cloud, and Ownership' - startOffset: 2316 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2316 - endOffset: 2460 -- name: 'Interview Screening: Communicating Projects Clearly in Five Minutes' - startOffset: 2460 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2460 - endOffset: 2688 -- name: 'Hiring Assessment: Hypotheticals, Leadership Traits, and Future Potential' - startOffset: 2688 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2688 - endOffset: 2833 -- name: 'Top Hires: Due Diligence, Cultural Fit, and Assertiveness' - startOffset: 2833 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2833 - endOffset: 2975 -- name: 'Filtering Buzzwords: Ask for Context, Alternatives, and Real Use Cases' - startOffset: 2975 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2975 - endOffset: 3274 -- name: 'Advice for Students: Master DBMS, SQL, and Fundamentals Over Specific Tools' - startOffset: 3274 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3274 - endOffset: 3449 -- name: 'End-to-End Data Pipeline Overview: Ingestion, Central Hub, Exposure, Monitoring' - startOffset: 3449 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3449 - endOffset: 3599 -- name: Closing Remarks and Connect with Rahul on LinkedIn - startOffset: 3599 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3599 - endOffset: 3571 --- diff --git a/_podcast/s05e02-data-engineering-acronyms.md b/_podcast/data-engineering-tools-modern-data-stack.md similarity index 98% rename from _podcast/s05e02-data-engineering-acronyms.md rename to _podcast/data-engineering-tools-modern-data-stack.md index c7617bb5..3c729d91 100644 --- a/_podcast/s05e02-data-engineering-acronyms.md +++ b/_podcast/data-engineering-tools-modern-data-stack.md @@ -1,12 +1,11 @@ --- title: 'ETL vs ELT & Data Lake vs Warehouse: Airbyte, dbt, CDC for Modern Data Engineering' short: Making Sense of Data Engineering Acronyms and Buzzwords +season: 5 +episode: 2 guests: - nataliekwong image: images/podcast/s05e02-data-engineering-acronyms.jpg -season: 5 -date: 2025-11-07 -episode: 2 ids: youtube: t9Z1S3OYnJU anchor: Making-Sense-of-Data-Engineering-Acronyms-and-Buzzwords---Natalie-Kwong-e177303 @@ -15,6 +14,135 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Making-Sense-of-Data-Engineering-Acronyms-and-Buzzwords---Natalie-Kwong-e177303 spotify: https://open.spotify.com/episode/1AvtwdcAXGGjdJ7fl0Hsuw apple: https://podcasts.apple.com/us/podcast/making-sense-of-data-engineering-acronyms-and/id1541710331?i=1000534990760 + +description: Discover ETL vs ELT, data lake vs data warehouse with Airbyte and dbt—learn CDC, orchestration, and governance to design reliable, fast modern data pipelines +intro: How do you decide between ETL and ELT, or when to keep a data lake versus a warehouse—and where do tools like Airbyte, dbt, and CDC fit into a modern data stack? In this episode, Natalie Kwong, Growth Product Manager at Airbyte with prior analytics and ops roles at Harness, KeepTruckin, and AppDynamics, pulls from hands-on experience scaling analytics teams and systems to unpack these trade-offs.

We break down core concepts—ETL (traditional extract-transform-load) vs ELT (load then transform), the rise of the analytics engineer, and why ELT favors analyst autonomy with dbt. Natalie explains Airbyte's role as a connector/ingestion layer, CDC for row-level change syncing, and orchestration with Airflow. We also cover data lake vs data warehouse purposes, preventing data swamps through governance, schema evolution, operational reverse data flows, and when hybrid architectures make sense.

If you're designing a modern data platform or refining pipelines, this episode offers practical guidance on ETL vs ELT decisions, choosing lakes vs warehouses, leveraging Airbyte and dbt, and operational considerations like data quality, orchestration, and cleanup practices +topics: +- data engineering +- tools +dateadded: 2021-09-11 +date: 2025-11-07 + +duration: PT00H59M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=0 + endOffset: 94 +- name: 'Episode Overview: Decoding Data Engineering Acronyms' + startOffset: 94 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=94 + endOffset: 118 +- name: 'Guest Career Journey: From Marketing Ops to Analytics & Growth' + startOffset: 118 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=118 + endOffset: 199 +- name: 'Airbyte Overview: ELT Focus and Connector Purpose' + startOffset: 199 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=199 + endOffset: 226 +- name: 'ETL Explained: Extract, Transform, Load (Traditional Model)' + startOffset: 226 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=226 + endOffset: 397 +- name: 'ETL Use Case: Calculating Customer Acquisition Cost' + startOffset: 397 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=397 + endOffset: 477 +- name: 'ELT Advantages: Flexibility, Speed, and Analyst Autonomy' + startOffset: 477 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=477 + endOffset: 600 +- name: 'Transformations in Practice: From Type Casting to Complex SQL Joins' + startOffset: 600 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=600 + endOffset: 759 +- name: 'Analytics Engineer Emergence: Empowering Analysts with DBT & SQL' + startOffset: 759 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=759 + endOffset: 930 +- name: 'Data Marts vs. Warehouses: Purpose, Layers, and Consumption' + startOffset: 930 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=930 + endOffset: 1075 +- name: 'Ingestion Layer: Raw Data Storage, Sanity, and Guardrails' + startOffset: 1075 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1075 + endOffset: 1127 +- name: 'Bringing Transforms Into the Warehouse: ELT vs Legacy Workflows' + startOffset: 1127 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1127 + endOffset: 1190 +- name: 'Data Lakes: Unstructured Storage for Files, Logs, and Media' + startOffset: 1190 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1190 + endOffset: 1282 +- name: 'Data Quality: Preventing Data Swamps Through Governance' + startOffset: 1282 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1282 + endOffset: 1464 +- name: 'Warehouse Ingestion vs. Data Lake: Trade-offs and Convergence' + startOffset: 1464 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1464 + endOffset: 1659 +- name: 'Architecture Decision: When to Maintain a Lake, a Warehouse, or Both' + startOffset: 1659 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1659 + endOffset: 1859 +- name: 'Orchestration: Airflow’s Role in Scheduling and Running Pipelines' + startOffset: 1859 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1859 + endOffset: 1891 +- name: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' + startOffset: 1891 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1891 + endOffset: 2025 +- name: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' + startOffset: 2025 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2025 + endOffset: 2142 +- name: 'Operational Reverse Data Flows: Pushing Warehouse Tables Back to Sources' + startOffset: 2142 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2142 + endOffset: 2346 +- name: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing Them' + startOffset: 2346 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2346 + endOffset: 2490 +- name: 'ETL’s Continued Relevance: Large Enterprises and Complex Staging Needs' + startOffset: 2490 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2490 + endOffset: 2582 +- name: 'Managing Unused Data: Team Ownership and Regular Cleanup Practices' + startOffset: 2582 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2582 + endOffset: 2625 +- name: 'Open Source Strategy: Why Airbyte Is Open and the Cloud Offering Model' + startOffset: 2625 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2625 + endOffset: 2759 +- name: 'CDC Explained: Capturing and Syncing Only Row-Level Changes' + startOffset: 2759 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2759 + endOffset: 2906 +- name: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' + startOffset: 2906 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2906 + endOffset: 2938 +- name: 'Schema Evolution: Handling Slowly Changing Attributes' + startOffset: 2938 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2938 + endOffset: 2972 +- name: 'Licensing Considerations: MIT, Cloud Products, and Future Choices' + startOffset: 2972 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2972 + endOffset: 3642 +- name: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' + startOffset: 3642 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=3642 + endOffset: 3595 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Decoding Data Engineering Acronyms' @@ -1607,142 +1735,6 @@ transcript: sec: 3689 time: '1:01:29' who: Alexey -intro: How do you decide between ETL and ELT, or when to keep a data lake versus a - warehouse—and where do tools like Airbyte, dbt, and CDC fit into a modern data stack? - In this episode, Natalie Kwong, Growth Product Manager at Airbyte with prior analytics - and ops roles at Harness, KeepTruckin, and AppDynamics, pulls from hands-on experience - scaling analytics teams and systems to unpack these trade-offs.

We break - down core concepts—ETL (traditional extract-transform-load) vs ELT (load then transform), - the rise of the analytics engineer, and why ELT favors analyst autonomy with dbt. - Natalie explains Airbyte's role as a connector/ingestion layer, CDC for row-level - change syncing, and orchestration with Airflow. We also cover data lake vs data - warehouse purposes, preventing data swamps through governance, schema evolution, - operational reverse data flows, and when hybrid architectures make sense.

- If you're designing a modern data platform or refining pipelines, this episode offers - practical guidance on ETL vs ELT decisions, choosing lakes vs warehouses, leveraging - Airbyte and dbt, and operational considerations like data quality, orchestration, - and cleanup practices. -description: Discover ETL vs ELT, data lake vs data warehouse with Airbyte and dbt—learn - CDC, orchestration, and governance to design reliable, fast modern data pipelines. -dateadded: '2021-09-11' -duration: PT00H59M55S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=0 - endOffset: 94 -- name: 'Episode Overview: Decoding Data Engineering Acronyms' - startOffset: 94 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=94 - endOffset: 118 -- name: 'Guest Career Journey: From Marketing Ops to Analytics & Growth' - startOffset: 118 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=118 - endOffset: 199 -- name: 'Airbyte Overview: ELT Focus and Connector Purpose' - startOffset: 199 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=199 - endOffset: 226 -- name: 'ETL Explained: Extract, Transform, Load (Traditional Model)' - startOffset: 226 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=226 - endOffset: 397 -- name: 'ETL Use Case: Calculating Customer Acquisition Cost' - startOffset: 397 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=397 - endOffset: 477 -- name: 'ELT Advantages: Flexibility, Speed, and Analyst Autonomy' - startOffset: 477 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=477 - endOffset: 600 -- name: 'Transformations in Practice: From Type Casting to Complex SQL Joins' - startOffset: 600 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=600 - endOffset: 759 -- name: 'Analytics Engineer Emergence: Empowering Analysts with DBT & SQL' - startOffset: 759 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=759 - endOffset: 930 -- name: 'Data Marts vs. Warehouses: Purpose, Layers, and Consumption' - startOffset: 930 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=930 - endOffset: 1075 -- name: 'Ingestion Layer: Raw Data Storage, Sanity, and Guardrails' - startOffset: 1075 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1075 - endOffset: 1127 -- name: 'Bringing Transforms Into the Warehouse: ELT vs Legacy Workflows' - startOffset: 1127 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1127 - endOffset: 1190 -- name: 'Data Lakes: Unstructured Storage for Files, Logs, and Media' - startOffset: 1190 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1190 - endOffset: 1282 -- name: 'Data Quality: Preventing Data Swamps Through Governance' - startOffset: 1282 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1282 - endOffset: 1464 -- name: 'Warehouse Ingestion vs. Data Lake: Trade-offs and Convergence' - startOffset: 1464 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1464 - endOffset: 1659 -- name: 'Architecture Decision: When to Maintain a Lake, a Warehouse, or Both' - startOffset: 1659 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1659 - endOffset: 1859 -- name: 'Orchestration: Airflow’s Role in Scheduling and Running Pipelines' - startOffset: 1859 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1859 - endOffset: 1891 -- name: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' - startOffset: 1891 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1891 - endOffset: 2025 -- name: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' - startOffset: 2025 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2025 - endOffset: 2142 -- name: 'Operational Reverse Data Flows: Pushing Warehouse Tables Back to Sources' - startOffset: 2142 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2142 - endOffset: 2346 -- name: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing Them' - startOffset: 2346 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2346 - endOffset: 2490 -- name: 'ETL’s Continued Relevance: Large Enterprises and Complex Staging Needs' - startOffset: 2490 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2490 - endOffset: 2582 -- name: 'Managing Unused Data: Team Ownership and Regular Cleanup Practices' - startOffset: 2582 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2582 - endOffset: 2625 -- name: 'Open Source Strategy: Why Airbyte Is Open and the Cloud Offering Model' - startOffset: 2625 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2625 - endOffset: 2759 -- name: 'CDC Explained: Capturing and Syncing Only Row-Level Changes' - startOffset: 2759 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2759 - endOffset: 2906 -- name: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' - startOffset: 2906 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2906 - endOffset: 2938 -- name: 'Schema Evolution: Handling Slowly Changing Attributes' - startOffset: 2938 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2938 - endOffset: 2972 -- name: 'Licensing Considerations: MIT, Cloud Products, and Future Choices' - startOffset: 2972 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2972 - endOffset: 3642 -- name: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' - startOffset: 3642 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=3642 - endOffset: 3595 --- Links: diff --git a/_podcast/s14e04-data-access-management.md b/_podcast/data-governance-data-access-management.md similarity index 97% rename from _podcast/s14e04-data-access-management.md rename to _podcast/data-governance-data-access-management.md index cc1c5103..4f82c4da 100644 --- a/_podcast/s14e04-data-access-management.md +++ b/_podcast/data-governance-data-access-management.md @@ -1,19 +1,150 @@ --- +title: 'Data Governance & Data Access Management: Access Controls, Data Catalogs & Access-as-Code' +short: Data Governance & Data Access Management +season: 14 episode: 4 guests: - bartvandekerckhove +image: images/podcast/s14e04-data-access-management.jpg ids: anchor: ow/datatalksclub/episodes/Data-Access-Management---Bart-Vandekerckhove-e253r4u youtube: IiPOIiUy5b4 -image: images/podcast/s14e04-data-access-management.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Data-Access-Management---Bart-Vandekerckhove-e253r4u apple: https://podcasts.apple.com/us/podcast/data-access-management-bart-vandekerckhove/id1541710331?i=1000615456026 spotify: https://open.spotify.com/episode/5PDgK1FsGNtKAAyiXOppRs?si=QZDP8k38Q0e4LaZtl4lCMA youtube: https://www.youtube.com/watch?v=IiPOIiUy5b4 -season: 14 -short: Data Access Management -title: 'Data Access Management: Access Controls, Data Catalogs & Access-as-Code' + +description: Master Data Access Management with data catalog, access controls & access-as-code to stop privilege creep, speed investigations and ensure compliance +intro: 'How do you scale data access management—from access controls and data catalogs to access-as-code—without slowing innovation? In this episode, Bart Vandekerckhove, co-founder and CEO at Raito and former PM of Privacy at Collibra, walks through practical approaches born from consulting with banks (BCBS 239) and tackling early data governance pain.

We explore what effective data governance looks like for building trust in data, the differences between data catalogs, dictionaries and lineage, and how cloud consolidation and Chinese walls shape access management. Bart covers ownership models (data teams, governance teams, data mesh), common skill gaps for data engineers, and core processes: access requests, approvals, reviews and revocation. You’ll hear actionable tactics for preventing privilege creep—time-bound access, revocation workflows—and guidance on GDPR, privacy vs security roles, and debugging with temporary access.

Later segments dive into DataOps patterns (active metadata, automated tagging), avoiding role explosion, and the rise of access-as-code with Terraform and IAM. Listeners will gain a clear, incremental strategy for implementing access controls, leveraging data catalogs, and evaluating build vs buy or open source options to scale data access management.' +topics: +- data governance +dateadded: 2023-06-03 + +duration: PT00H55M54S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=0 + endOffset: 87 +- name: 'Episode Overview: Data Access Management & Guest Summary' + startOffset: 87 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=87 + endOffset: 143 +- name: Guest Introduction & Career Path + startOffset: 143 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=143 + endOffset: 206 +- name: 'Consulting Background: Banks, BCBS 239 and data trauma' + startOffset: 206 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=206 + endOffset: 280 +- name: 'Early Data Governance Pain: Manual tools and outdated lineage' + startOffset: 280 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=280 + endOffset: 320 +- name: 'Defining Data Governance: Building trust in data' + startOffset: 320 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=320 + endOffset: 412 +- name: 'Legacy Governance Problems: Top-down models and friction' + startOffset: 412 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=412 + endOffset: 538 +- name: 'Data Catalogs, Dictionaries & Lineage: Purpose and differences' + startOffset: 538 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=538 + endOffset: 680 +- name: 'Data Access Management Defined: Cloud consolidation and Chinese walls' + startOffset: 680 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=680 + endOffset: 814 +- name: 'Ownership Models: Data teams, governance teams, and data mesh' + startOffset: 814 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=814 + endOffset: 887 +- name: 'Data Engineers & Access Requests: Skill gaps and role mismatch' + startOffset: 887 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=887 + endOffset: 1038 +- name: 'Governance Skillset: Change management and DMBOK guidance' + startOffset: 1038 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1038 + endOffset: 1188 +- name: 'Maturing Access Management: Incremental improvement and scaling' + startOffset: 1188 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1188 + endOffset: 1310 +- name: 'Learning Resources: Books, Slack communities, and conferences' + startOffset: 1310 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1310 + endOffset: 1383 +- name: 'When to Invest: Size, maturity signals, and trust erosion' + startOffset: 1383 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1383 + endOffset: 1505 +- name: Start with Access Controls Early for Sensitive Data + startOffset: 1505 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1505 + endOffset: 1669 +- name: 'Core Processes: Access requests, approvals, reviews, revocation' + startOffset: 1669 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1669 + endOffset: 1776 +- name: 'Churn Use Case: Catalog discovery, purpose-based access requests' + startOffset: 1776 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1776 + endOffset: 1928 +- name: 'Privilege Creep & Best Practices: Time-bound access and revocation' + startOffset: 1928 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1928 + endOffset: 2002 +- name: 'Regulatory Context: GDPR, privacy awareness, and EU perspective' + startOffset: 2002 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2002 + endOffset: 2135 +- name: 'Debugging in Production: Temporary access and investigation workflows' + startOffset: 2135 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2135 + endOffset: 2239 +- name: 'Privacy vs Security Stakeholders: DPO needs and CISO responsibilities' + startOffset: 2239 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2239 + endOffset: 2540 +- name: 'Data Mesh & Sensitive Data: Federated governance, masking, filtering' + startOffset: 2540 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2540 + endOffset: 2695 +- name: 'Avoiding Role Explosion: Role inheritance, reviews, and alerts' + startOffset: 2695 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2695 + endOffset: 2802 +- name: 'Governance in DataOps: Active metadata, automated tagging, and pipelines' + startOffset: 2802 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2802 + endOffset: 3008 +- name: 'Access-as-Code Beginnings: Terraform, IAM and early patterns' + startOffset: 3008 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3008 + endOffset: 3230 +- name: 'Build vs Buy: Maintenance cost, connector updates, key-person risk' + startOffset: 3230 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3230 + endOffset: 3282 +- name: 'Gradual Adoption Strategy: Visibility-first onboarding and automation' + startOffset: 3282 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3282 + endOffset: 3356 +- name: 'Open Source Options: Raito CLI, Terraform patterns and limitations' + startOffset: 3356 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3356 + endOffset: 3396 +- name: Closing Remarks & Next Steps + startOffset: 3396 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3396 + endOffset: 3354 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Data Access Management & Guest Summary' @@ -1052,147 +1183,6 @@ transcript: sec: 3441 time: '57:21' who: Alexey -description: Master Data Access Management with data catalog, access controls & access-as-code - to stop privilege creep, speed investigations and ensure compliance. -intro: 'How do you scale data access management—from access controls and data catalogs - to access-as-code—without slowing innovation? In this episode, Bart Vandekerckhove, - co-founder and CEO at Raito and former PM of Privacy at Collibra, walks through - practical approaches born from consulting with banks (BCBS 239) and tackling early - data governance pain.

We explore what effective data governance looks like - for building trust in data, the differences between data catalogs, dictionaries - and lineage, and how cloud consolidation and Chinese walls shape access management. - Bart covers ownership models (data teams, governance teams, data mesh), common skill - gaps for data engineers, and core processes: access requests, approvals, reviews - and revocation. You’ll hear actionable tactics for preventing privilege creep—time-bound - access, revocation workflows—and guidance on GDPR, privacy vs security roles, and - debugging with temporary access.

Later segments dive into DataOps patterns - (active metadata, automated tagging), avoiding role explosion, and the rise of access-as-code - with Terraform and IAM. Listeners will gain a clear, incremental strategy for implementing - access controls, leveraging data catalogs, and evaluating build vs buy or open source - options to scale data access management.' -dateadded: '2023-06-03' -duration: PT00H55M54S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=0 - endOffset: 87 -- name: 'Episode Overview: Data Access Management & Guest Summary' - startOffset: 87 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=87 - endOffset: 143 -- name: Guest Introduction & Career Path - startOffset: 143 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=143 - endOffset: 206 -- name: 'Consulting Background: Banks, BCBS 239 and data trauma' - startOffset: 206 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=206 - endOffset: 280 -- name: 'Early Data Governance Pain: Manual tools and outdated lineage' - startOffset: 280 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=280 - endOffset: 320 -- name: 'Defining Data Governance: Building trust in data' - startOffset: 320 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=320 - endOffset: 412 -- name: 'Legacy Governance Problems: Top-down models and friction' - startOffset: 412 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=412 - endOffset: 538 -- name: 'Data Catalogs, Dictionaries & Lineage: Purpose and differences' - startOffset: 538 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=538 - endOffset: 680 -- name: 'Data Access Management Defined: Cloud consolidation and Chinese walls' - startOffset: 680 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=680 - endOffset: 814 -- name: 'Ownership Models: Data teams, governance teams, and data mesh' - startOffset: 814 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=814 - endOffset: 887 -- name: 'Data Engineers & Access Requests: Skill gaps and role mismatch' - startOffset: 887 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=887 - endOffset: 1038 -- name: 'Governance Skillset: Change management and DMBOK guidance' - startOffset: 1038 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1038 - endOffset: 1188 -- name: 'Maturing Access Management: Incremental improvement and scaling' - startOffset: 1188 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1188 - endOffset: 1310 -- name: 'Learning Resources: Books, Slack communities, and conferences' - startOffset: 1310 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1310 - endOffset: 1383 -- name: 'When to Invest: Size, maturity signals, and trust erosion' - startOffset: 1383 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1383 - endOffset: 1505 -- name: Start with Access Controls Early for Sensitive Data - startOffset: 1505 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1505 - endOffset: 1669 -- name: 'Core Processes: Access requests, approvals, reviews, revocation' - startOffset: 1669 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1669 - endOffset: 1776 -- name: 'Churn Use Case: Catalog discovery, purpose-based access requests' - startOffset: 1776 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1776 - endOffset: 1928 -- name: 'Privilege Creep & Best Practices: Time-bound access and revocation' - startOffset: 1928 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1928 - endOffset: 2002 -- name: 'Regulatory Context: GDPR, privacy awareness, and EU perspective' - startOffset: 2002 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2002 - endOffset: 2135 -- name: 'Debugging in Production: Temporary access and investigation workflows' - startOffset: 2135 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2135 - endOffset: 2239 -- name: 'Privacy vs Security Stakeholders: DPO needs and CISO responsibilities' - startOffset: 2239 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2239 - endOffset: 2540 -- name: 'Data Mesh & Sensitive Data: Federated governance, masking, filtering' - startOffset: 2540 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2540 - endOffset: 2695 -- name: 'Avoiding Role Explosion: Role inheritance, reviews, and alerts' - startOffset: 2695 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2695 - endOffset: 2802 -- name: 'Governance in DataOps: Active metadata, automated tagging, and pipelines' - startOffset: 2802 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2802 - endOffset: 3008 -- name: 'Access-as-Code Beginnings: Terraform, IAM and early patterns' - startOffset: 3008 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3008 - endOffset: 3230 -- name: 'Build vs Buy: Maintenance cost, connector updates, key-person risk' - startOffset: 3230 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3230 - endOffset: 3282 -- name: 'Gradual Adoption Strategy: Visibility-first onboarding and automation' - startOffset: 3282 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3282 - endOffset: 3356 -- name: 'Open Source Options: Raito CLI, Terraform patterns and limitations' - startOffset: 3356 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3356 - endOffset: 3396 -- name: Closing Remarks & Next Steps - startOffset: 3396 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3396 - endOffset: 3354 --- Links: diff --git a/_podcast/s06e02-non-technical-interviews.md b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md similarity index 99% rename from _podcast/s06e02-non-technical-interviews.md rename to _podcast/data-interview-behavioral-and-portfolio-prep-guide.md index c27d8f07..48a845e6 100644 --- a/_podcast/s06e02-non-technical-interviews.md +++ b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md @@ -1,13 +1,11 @@ --- title: 'Ace Data Interviews: Behavioral STARs, Case Strategy, Portfolios & Cold Emails' short: Ace Non-Technical Data Science Interviews +season: 6 +episode: 2 guests: - nicksingh image: images/podcast/s06e02-non-technical-interviews.jpg -description: 'Master behavioral interviews & prep to break into data roles: build - an impact portfolio, use STAR stories, nail case interviews and cold emails.' -season: 6 -episode: 2 ids: youtube: tRdLVUKU7Bo anchor: Ace-Non-Technical-Data-Science-Interviews---Nick-Singh-e1a5qtd @@ -16,6 +14,118 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Ace-Non-Technical-Data-Science-Interviews---Nick-Singh-e1a5qtd spotify: https://open.spotify.com/episode/7tO8GmqAcFUUk4fLqxEXy1 apple: https://podcasts.apple.com/us/podcast/ace-non-technical-data-science-interviews-nick-singh/id1541710331?i=1000541631687 + +description: 'Master behavioral interviews & prep to break into data roles: build an impact portfolio, use STAR stories, nail case interviews and cold emails.' +dateadded: 2021-11-12 + +duration: PT01H01M38S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=0 + endOffset: 118 +- name: 'Guest Overview: Nick Singh’s career and book' + startOffset: 118 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=118 + endOffset: 257 +- name: 'Career Coaching Focus: Helping candidates break into data roles' + startOffset: 257 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=257 + endOffset: 311 +- name: 'Hiring Process Breakdown: Screens, assessments, and panel interviews' + startOffset: 311 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=311 + endOffset: 417 +- name: 'Industry Trends: Why multiple interview rounds are common' + startOffset: 417 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=417 + endOffset: 538 +- name: 'Behavioral Interviews: Purpose and what interviewers seek' + startOffset: 538 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=538 + endOffset: 800 +- name: 'Behavioral Prep Method: Grid planning and STAR storytelling' + startOffset: 800 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=800 + endOffset: 1127 +- name: 'Practiced Delivery: Preparation without sounding scripted' + startOffset: 1127 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1127 + endOffset: 1185 +- name: 'Handling Tricky Prompts: Common pitfalls and recoveries' + startOffset: 1185 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1185 + endOffset: 1513 +- name: 'Project Walkthroughs: Detailing work while showing ownership' + startOffset: 1513 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1513 + endOffset: 1670 +- name: 'Lead with Impact: Pyramid principle for concise results-first stories' + startOffset: 1670 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1670 + endOffset: 1866 +- name: 'Business Context: Translating technical work into product value' + startOffset: 1866 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1866 + endOffset: 2039 +- name: 'Controlling Pacing: Avoiding rambling and burying the lead' + startOffset: 2039 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2039 + endOffset: 2238 +- name: 'Technical Claims: Only present models you can defend' + startOffset: 2238 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2238 + endOffset: 2297 +- name: 'Favorite-Model Strategy: Choose familiar, project-backed techniques' + startOffset: 2297 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2297 + endOffset: 2382 +- name: 'Portfolio Impact: Quantifying non-enterprise projects' + startOffset: 2382 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2382 + endOffset: 2471 +- name: 'Senior vs. Junior: Differences in behavioral and case expectations' + startOffset: 2471 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2471 + endOffset: 2667 +- name: 'Case Interview Approach: Clarify goals before proposing solutions' + startOffset: 2667 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2667 + endOffset: 2730 +- name: 'Product-Sense Interviews: Metrics, assumptions, and brainstorming' + startOffset: 2730 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2730 + endOffset: 2974 +- name: 'Metric Identification: Researching unfamiliar domains effectively' + startOffset: 2974 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2974 + endOffset: 3191 +- name: 'Company Context: Using reports and product knowledge to prepare' + startOffset: 3191 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3191 + endOffset: 3295 +- name: 'Read Tech Blogs: Learning production and architecture from case studies' + startOffset: 3295 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3295 + endOffset: 3506 +- name: 'Outreach Strategy: Cold emailing hiring managers and recruiters' + startOffset: 3506 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3506 + endOffset: 3659 +- name: 'Cold Email Examples: Showcasing projects with links and visuals' + startOffset: 3659 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3659 + endOffset: 3736 +- name: 'Final Tips and Resources: Book recommendations and next steps' + startOffset: 3736 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3736 + endOffset: 3767 +- name: 'Contact & Follow-up: NickSingh.com and LinkedIn' + startOffset: 3767 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3767 + endOffset: 3698 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Nick Singh’s career and book' @@ -1904,111 +2014,4 @@ transcript: sec: 3816 time: '1:03:36' who: Nick -dateadded: '2021-11-12' -duration: PT01H01M38S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=0 - endOffset: 118 -- name: 'Guest Overview: Nick Singh’s career and book' - startOffset: 118 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=118 - endOffset: 257 -- name: 'Career Coaching Focus: Helping candidates break into data roles' - startOffset: 257 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=257 - endOffset: 311 -- name: 'Hiring Process Breakdown: Screens, assessments, and panel interviews' - startOffset: 311 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=311 - endOffset: 417 -- name: 'Industry Trends: Why multiple interview rounds are common' - startOffset: 417 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=417 - endOffset: 538 -- name: 'Behavioral Interviews: Purpose and what interviewers seek' - startOffset: 538 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=538 - endOffset: 800 -- name: 'Behavioral Prep Method: Grid planning and STAR storytelling' - startOffset: 800 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=800 - endOffset: 1127 -- name: 'Practiced Delivery: Preparation without sounding scripted' - startOffset: 1127 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1127 - endOffset: 1185 -- name: 'Handling Tricky Prompts: Common pitfalls and recoveries' - startOffset: 1185 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1185 - endOffset: 1513 -- name: 'Project Walkthroughs: Detailing work while showing ownership' - startOffset: 1513 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1513 - endOffset: 1670 -- name: 'Lead with Impact: Pyramid principle for concise results-first stories' - startOffset: 1670 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1670 - endOffset: 1866 -- name: 'Business Context: Translating technical work into product value' - startOffset: 1866 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1866 - endOffset: 2039 -- name: 'Controlling Pacing: Avoiding rambling and burying the lead' - startOffset: 2039 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2039 - endOffset: 2238 -- name: 'Technical Claims: Only present models you can defend' - startOffset: 2238 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2238 - endOffset: 2297 -- name: 'Favorite-Model Strategy: Choose familiar, project-backed techniques' - startOffset: 2297 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2297 - endOffset: 2382 -- name: 'Portfolio Impact: Quantifying non-enterprise projects' - startOffset: 2382 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2382 - endOffset: 2471 -- name: 'Senior vs. Junior: Differences in behavioral and case expectations' - startOffset: 2471 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2471 - endOffset: 2667 -- name: 'Case Interview Approach: Clarify goals before proposing solutions' - startOffset: 2667 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2667 - endOffset: 2730 -- name: 'Product-Sense Interviews: Metrics, assumptions, and brainstorming' - startOffset: 2730 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2730 - endOffset: 2974 -- name: 'Metric Identification: Researching unfamiliar domains effectively' - startOffset: 2974 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2974 - endOffset: 3191 -- name: 'Company Context: Using reports and product knowledge to prepare' - startOffset: 3191 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3191 - endOffset: 3295 -- name: 'Read Tech Blogs: Learning production and architecture from case studies' - startOffset: 3295 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3295 - endOffset: 3506 -- name: 'Outreach Strategy: Cold emailing hiring managers and recruiters' - startOffset: 3506 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3506 - endOffset: 3659 -- name: 'Cold Email Examples: Showcasing projects with links and visuals' - startOffset: 3659 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3659 - endOffset: 3736 -- name: 'Final Tips and Resources: Book recommendations and next steps' - startOffset: 3736 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3736 - endOffset: 3767 -- name: 'Contact & Follow-up: NickSingh.com and LinkedIn' - startOffset: 3767 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3767 - endOffset: 3698 --- diff --git a/_podcast/s11e08-technical-writing-and-data-journalism.md b/_podcast/data-journalism-python-visualization-storytelling.md similarity index 96% rename from _podcast/s11e08-technical-writing-and-data-journalism.md rename to _podcast/data-journalism-python-visualization-storytelling.md index 21204a5f..d2104536 100644 --- a/_podcast/s11e08-technical-writing-and-data-journalism.md +++ b/_podcast/data-journalism-python-visualization-storytelling.md @@ -1,20 +1,141 @@ --- +title: 'Practical Data Journalism: Sourcing, Storytelling, Visualization & Tools (Python, Tableau)' +short: Technical Writing and Data Journalism +season: 11 episode: 8 guests: - angelicaloduca +image: images/podcast/s11e08-technical-writing-and-data-journalism.jpg ids: anchor: Technical-Writing-and-Data-Journalism---Angelica-Lo-Duca-e1r7j8k youtube: uO_lk12q02A -image: images/podcast/s11e08-technical-writing-and-data-journalism.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Technical-Writing-and-Data-Journalism---Angelica-Lo-Duca-e1r7j8k apple: https://podcasts.apple.com/us/podcast/technical-writing-and-data-journalism-angelica-lo-duca/id1541710331?i=1000587507530 spotify: https://open.spotify.com/episode/38b2Y9KgxSFlIHPZ3jqheK?si=SPiURO1bTamVKrKV_laVDQ youtube: https://www.youtube.com/watch?v=uO_lk12q02A -season: 11 -short: Technical Writing and Data Journalism -title: 'Practical Data Journalism: Sourcing, Storytelling, Visualization & Tools (Python, - Tableau)' + +description: 'Discover data journalism: sourcing, storytelling & visualization with Python and Tableau—learn tools, workflows and publishing tips to craft compelling stories.' +intro: "How do you transform raw data into compelling, trustworthy journalism that readers can understand and act upon? In this episode, Angelica Lo Duca—researcher at the Institute of Informatics and Telematics (CNR) and Data Journalism professor at the University of Pisa—shares practical frameworks for data journalism covering sourcing, storytelling, visualization, and essential tools like Python and Tableau.

Drawing from her journey through cryptography, web development, and data science, Angelica tackles real-world challenges: finding reliable small datasets on the web, working with query engines like Presto and Trino, and learning from investigative work like Washington Post projects. She distinguishes data journalism from data science, reveals teaching approaches for digital humanities students, and breaks down an effective writer's workflow: problem identification → solution development → clear results presentation, complete with code repositories and step-by-step clarity.

You'll gain concrete strategies for converting dense reports and survey PDFs into engaging narratives, visualization best practices (one concept per chart, choosing tables over confusing pie charts), tool selection guidance between Python scripting and Tableau, plus curated learning resources. Whether you're a journalist exploring data tools, a data professional interested in storytelling, or an educator teaching interdisciplinary skills, this episode delivers actionable methods for reliable sourcing, effective narrative construction, and clear data visualization that makes complex information accessible and impactful." +topics: +- data journalism +- data science +- data visualization +- tools +dateadded: 2022-11-26 + +duration: PT01H01M37S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=0 + endOffset: 113 +- name: 'Guest Introduction: Angelica Lo Duca, researcher & professor' + startOffset: 113 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=113 + endOffset: 149 +- name: 'Career Journey: Cryptography to Web Applications and Data Science' + startOffset: 149 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=149 + endOffset: 281 +- name: 'Data Engineering Research Interests: security and data integrity' + startOffset: 281 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=281 + endOffset: 357 +- name: 'Writing Portfolio: novels, technical articles, and Comet for Data Science' + startOffset: 357 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=357 + endOffset: 404 +- name: 'Query Engines: Presto, Trino, and real-world migrations' + startOffset: 404 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=404 + endOffset: 463 +- name: 'Defining Data Journalism: data-driven news vs. storytelling' + startOffset: 463 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=463 + endOffset: 481 +- name: 'Data Journalism vs Data Science: accuracy, methods, and scope' + startOffset: 481 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=481 + endOffset: 671 +- name: 'Investigative Examples: Washington Post and international projects' + startOffset: 671 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=671 + endOffset: 911 +- name: 'Data Sourcing Challenges: finding small, accurate datasets on the web' + startOffset: 911 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=911 + endOffset: 973 +- name: 'Teaching Shift: how Angelica started teaching data journalism' + startOffset: 973 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=973 + endOffset: 1129 +- name: 'Course Audience: digital humanities students and interdisciplinary skills' + startOffset: 1129 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1129 + endOffset: 1213 +- name: 'Tool Choices: Python scripting vs. Tableau for data journalism' + startOffset: 1213 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1213 + endOffset: 1283 +- name: 'Learning Resources: Coursera and recommended readings' + startOffset: 1283 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1283 + endOffset: 1475 +- name: 'Defining Technical Writing: how-to guides, clarity, and audience focus' + startOffset: 1475 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1475 + endOffset: 1759 +- name: 'From Reports to Stories: converting survey PDFs into narratives' + startOffset: 1759 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1759 + endOffset: 1945 +- name: 'Adding Context & Wisdom: framing data with meaning and calls to action' + startOffset: 1945 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1945 + endOffset: 2180 +- name: 'Visualization Guidelines: one concept per chart; tables when clearer' + startOffset: 2180 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2180 + endOffset: 2306 +- name: 'Visualization Pitfalls: why to avoid pie charts and confusing graphics' + startOffset: 2306 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2306 + endOffset: 2352 +- name: 'Article Length & Formats: short Medium posts and the Syntax Error publication' + startOffset: 2352 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2352 + endOffset: 2447 +- name: 'Article Workflow: problem → solution → result, with code repos' + startOffset: 2447 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2447 + endOffset: 2620 +- name: 'Topic Sourcing: personal problems, social media, and community signals' + startOffset: 2620 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2620 + endOffset: 2735 +- name: 'Path to a Book: publisher outreach and acquisition editor contact' + startOffset: 2735 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2735 + endOffset: 3019 +- name: 'Book Contract & Schedule: chapter timelines, pacing, and holidays' + startOffset: 3019 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3019 + endOffset: 3257 +- name: 'Market Research & Audience: proposal, state-of-the-art, and level targeting' + startOffset: 3257 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3257 + endOffset: 3609 +- name: 'Editing & Reviews: reviewer feedback, overlapping revisions, and organization' + startOffset: 3609 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3609 + endOffset: 3743 +- name: 'Episode Wrap-Up: final questions and closing remarks' + startOffset: 3743 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3743 + endOffset: 3697 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Angelica Lo Duca, researcher & professor' @@ -984,136 +1105,6 @@ transcript: sec: 3810 time: '1:03:30' who: Alexey -description: 'Discover data journalism: sourcing, storytelling & visualization with - Python and Tableau—learn tools, workflows and publishing tips to craft compelling - stories.' -intro: 'How do you turn messy, hard-to-find data into clear, accountable journalism? - In this episode, Angelica Lo Duca — researcher at the Institute of Informatics and - Telematics (CNR) and Data Journalism professor at the University of Pisa — walks - through practical approaches to data journalism: sourcing, storytelling, visualization, - and tools like Python and Tableau. Drawing on a career from cryptography to web - apps and data science, Angelica covers data sourcing challenges (including finding - small, accurate web datasets), query engines and migrations (Presto, Trino), and - examples of investigative projects such as work referenced from the Washington Post. - She contrasts data journalism with data science, explains teaching strategies for - digital humanities students, and outlines a writer’s workflow: problem → solution - → result, with code repositories and how-to clarity. Expect concrete guidance on - converting reports and survey PDFs into narratives, visualization rules (one concept - per chart; prefer tables when clearer; avoid confusing pie charts), tool choices - between Python scripting and Tableau, and curated learning resources. Listen to - learn actionable methods for reliable data sourcing, effective data storytelling, - and clean data visualization you can apply to reporting projects.' -dateadded: '2022-11-26' -duration: PT01H01M37S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=0 - endOffset: 113 -- name: 'Guest Introduction: Angelica Lo Duca, researcher & professor' - startOffset: 113 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=113 - endOffset: 149 -- name: 'Career Journey: Cryptography to Web Applications and Data Science' - startOffset: 149 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=149 - endOffset: 281 -- name: 'Data Engineering Research Interests: security and data integrity' - startOffset: 281 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=281 - endOffset: 357 -- name: 'Writing Portfolio: novels, technical articles, and Comet for Data Science' - startOffset: 357 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=357 - endOffset: 404 -- name: 'Query Engines: Presto, Trino, and real-world migrations' - startOffset: 404 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=404 - endOffset: 463 -- name: 'Defining Data Journalism: data-driven news vs. storytelling' - startOffset: 463 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=463 - endOffset: 481 -- name: 'Data Journalism vs Data Science: accuracy, methods, and scope' - startOffset: 481 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=481 - endOffset: 671 -- name: 'Investigative Examples: Washington Post and international projects' - startOffset: 671 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=671 - endOffset: 911 -- name: 'Data Sourcing Challenges: finding small, accurate datasets on the web' - startOffset: 911 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=911 - endOffset: 973 -- name: 'Teaching Shift: how Angelica started teaching data journalism' - startOffset: 973 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=973 - endOffset: 1129 -- name: 'Course Audience: digital humanities students and interdisciplinary skills' - startOffset: 1129 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1129 - endOffset: 1213 -- name: 'Tool Choices: Python scripting vs. Tableau for data journalism' - startOffset: 1213 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1213 - endOffset: 1283 -- name: 'Learning Resources: Coursera and recommended readings' - startOffset: 1283 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1283 - endOffset: 1475 -- name: 'Defining Technical Writing: how-to guides, clarity, and audience focus' - startOffset: 1475 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1475 - endOffset: 1759 -- name: 'From Reports to Stories: converting survey PDFs into narratives' - startOffset: 1759 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1759 - endOffset: 1945 -- name: 'Adding Context & Wisdom: framing data with meaning and calls to action' - startOffset: 1945 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1945 - endOffset: 2180 -- name: 'Visualization Guidelines: one concept per chart; tables when clearer' - startOffset: 2180 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2180 - endOffset: 2306 -- name: 'Visualization Pitfalls: why to avoid pie charts and confusing graphics' - startOffset: 2306 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2306 - endOffset: 2352 -- name: 'Article Length & Formats: short Medium posts and the Syntax Error publication' - startOffset: 2352 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2352 - endOffset: 2447 -- name: 'Article Workflow: problem → solution → result, with code repos' - startOffset: 2447 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2447 - endOffset: 2620 -- name: 'Topic Sourcing: personal problems, social media, and community signals' - startOffset: 2620 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2620 - endOffset: 2735 -- name: 'Path to a Book: publisher outreach and acquisition editor contact' - startOffset: 2735 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2735 - endOffset: 3019 -- name: 'Book Contract & Schedule: chapter timelines, pacing, and holidays' - startOffset: 3019 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3019 - endOffset: 3257 -- name: 'Market Research & Audience: proposal, state-of-the-art, and level targeting' - startOffset: 3257 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3257 - endOffset: 3609 -- name: 'Editing & Reviews: reviewer feedback, overlapping revisions, and organization' - startOffset: 3609 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3609 - endOffset: 3743 -- name: 'Episode Wrap-Up: final questions and closing remarks' - startOffset: 3743 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3743 - endOffset: 3697 --- Links: diff --git a/_podcast/s03e08-data-led-professional.md b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md similarity index 97% rename from _podcast/s03e08-data-led-professional.md rename to _podcast/data-led-growth-event-tracking-and-reverse-etl.md index 87d93ec0..ef835b8a 100644 --- a/_podcast/s03e08-data-led-professional.md +++ b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md @@ -1,12 +1,11 @@ --- -title: 'How to Build a Data-Led Growth Stack: Event Tracking, Tracking Plans & Reverse - ETL' +title: 'How to Build a Data-Led Growth Stack: Event Tracking, Tracking Plans & Reverse ETL' short: Becoming a Data-led Professional +season: 3 +episode: 8 guests: - arpitchoudhury image: images/podcast/s03e08-data-led-professional.jpg -season: 3 -episode: 8 ids: youtube: 8v5KpHWgyYw anchor: Becoming-a-Data-led-Professional---Arpit-Choudhury-e11mkgq @@ -15,6 +14,124 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-led-Professional---Arpit-Choudhury-e11mkgq spotify: https://open.spotify.com/episode/2hg3Gi3h5OfdedXENwZwnU apple: https://podcasts.apple.com/us/podcast/becoming-a-data-led-professional-arpit-choudhury/id1541710331?i=1000523422699 + +description: Build a data-led growth stack with event tracking, tracking plans & Reverse ETL to activate product data for precise experimentation, personalization, and ops +intro: 'How do you design a data-led growth stack that reliably powers personalization, activation, and operational workflows? In this episode, Arpit Choudhury, founder of Data-led Academy, walks through the practical steps of building a data-led growth stack focused on event tracking, documented tracking plans, and reverse ETL.

Arpit — who runs Data-led Academy to teach data skills for non-technical and technical teams alike — breaks down the full data flow: collection (client- vs server-side events), storage (warehouses like Snowflake, BigQuery, Redshift), transformation (DBT), analysis (product analytics), and activation (reverse ETL to support, sales, and engagement tools). He covers how to create tracking plans and instrument events (signup, project created, invite, invoice), common tooling (Segment, RudderStack, MetaRouter, Freshpaint, AVO, Iteratively, TrackPlan), and reverse ETL platforms (Census, Hightouch, Grouparoo). You’ll also hear trade-offs around CDPs versus warehouse-centric approaches, buy vs build decisions, and the team roles and documentation practices needed to democratize data.

Listen to learn concrete patterns for event tracking, tracking-plan ownership, anomaly investigation, and activating product data to drive growth without sacrificing data quality.' +topics: +- data engineering +- tools +dateadded: 2021-05-29 + +duration: PT01H21S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=0 + endOffset: 141 +- name: 'DataLed Academy: free learning, repository & podcast' + startOffset: 141 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=141 + endOffset: 306 +- name: 'Career trajectory: integrations, Integromat & community growth' + startOffset: 306 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=306 + endOffset: 441 +- name: 'Growth marketing: A/B testing, personalization & product data' + startOffset: 441 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=441 + endOffset: 586 +- name: 'Marketer tooling: visual queries and self-serve data access' + startOffset: 586 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=586 + endOffset: 645 +- name: 'Definition: data-led professional — source awareness & data skepticism' + startOffset: 645 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=645 + endOffset: 693 +- name: 'Data-led vs. data-driven: balancing data, intuition & bias' + startOffset: 693 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=693 + endOffset: 814 +- name: 'Tracking plan & instrumentation: documenting events, properties & ownership' + startOffset: 814 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=814 + endOffset: 1107 +- name: 'Anomaly investigation: tracing event origins and fake signups' + startOffset: 1107 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1107 + endOffset: 1247 +- name: 'Collaborative tracking tools: AVO, Iteratively, TrackPlan' + startOffset: 1247 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1247 + endOffset: 1370 +- name: 'Data flow overview: collection, storage, analysis and activation' + startOffset: 1370 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1370 + endOffset: 1483 +- name: 'Event examples for SaaS: signup, project created, invite, invoice' + startOffset: 1483 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1483 + endOffset: 1620 +- name: 'Client-side vs. server-side events: timing, accuracy and use cases' + startOffset: 1620 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1620 + endOffset: 1732 +- name: 'Data warehousing & transformation: warehouses, DBT and BI analysis' + startOffset: 1732 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1732 + endOffset: 1803 +- name: 'Data activation: sending event data to support, sales and engagement tools' + startOffset: 1803 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1803 + endOffset: 2021 +- name: 'Data collection platforms: Segment, RudderStack, MetaRouter, Freshpaint' + startOffset: 2021 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2021 + endOffset: 2127 +- name: 'Warehouse-centric analytics: Snowflake, BigQuery, Redshift & warehouse-first + tools' + startOffset: 2127 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2127 + endOffset: 2245 +- name: 'Reverse ETL / operational analytics: Census, HighTouch, Grouparoo' + startOffset: 2245 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2245 + endOffset: 2300 +- name: 'Customer Data Platforms (CDP): all-in-one trade-offs for marketers' + startOffset: 2300 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2300 + endOffset: 2490 +- name: 'Modern data stack for growth: CDI, product analytics, warehouse & reverse + ETL' + startOffset: 2490 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2490 + endOffset: 2630 +- name: 'Buy vs. build: cost, maintenance and open-source alternatives' + startOffset: 2630 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2630 + endOffset: 2773 +- name: 'Team composition: data engineer, analyst, analytics engineer & product ops' + startOffset: 2773 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2773 + endOffset: 3100 +- name: 'Data democratization: data literacy, documentation & self-serve analytics' + startOffset: 3100 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3100 + endOffset: 3228 +- name: 'Motivating documentation: culture, early habits & catalog tools' + startOffset: 3228 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3228 + endOffset: 3368 +- name: 'Product-led vs. data-led: activation events and personalized onboarding' + startOffset: 3368 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3368 + endOffset: 3629 +- name: 'Closing & resources: dataled.academy, newsletter and podcast episodes' + startOffset: 3629 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3629 + endOffset: 3621 + transcript: - header: Podcast Introduction - header: 'DataLed Academy: free learning, repository & podcast' @@ -897,131 +1014,4 @@ transcript: sec: 3762 time: '1:02:42' who: Alexey -description: Build a data-led growth stack with event tracking, tracking plans & Reverse - ETL to activate product data for precise experimentation, personalization, and ops. -intro: 'How do you design a data-led growth stack that reliably powers personalization, - activation, and operational workflows? In this episode, Arpit Choudhury, founder - of Data-led Academy, walks through the practical steps of building a data-led growth - stack focused on event tracking, documented tracking plans, and reverse ETL.

- Arpit — who runs Data-led Academy to teach data skills for non-technical and technical - teams alike — breaks down the full data flow: collection (client- vs server-side - events), storage (warehouses like Snowflake, BigQuery, Redshift), transformation - (DBT), analysis (product analytics), and activation (reverse ETL to support, sales, - and engagement tools). He covers how to create tracking plans and instrument events - (signup, project created, invite, invoice), common tooling (Segment, RudderStack, - MetaRouter, Freshpaint, AVO, Iteratively, TrackPlan), and reverse ETL platforms - (Census, Hightouch, Grouparoo). You’ll also hear trade-offs around CDPs versus warehouse-centric - approaches, buy vs build decisions, and the team roles and documentation practices - needed to democratize data.

Listen to learn concrete patterns for event - tracking, tracking-plan ownership, anomaly investigation, and activating product - data to drive growth without sacrificing data quality.' -dateadded: '2021-05-29' -duration: PT01H21S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=0 - endOffset: 141 -- name: 'DataLed Academy: free learning, repository & podcast' - startOffset: 141 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=141 - endOffset: 306 -- name: 'Career trajectory: integrations, Integromat & community growth' - startOffset: 306 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=306 - endOffset: 441 -- name: 'Growth marketing: A/B testing, personalization & product data' - startOffset: 441 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=441 - endOffset: 586 -- name: 'Marketer tooling: visual queries and self-serve data access' - startOffset: 586 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=586 - endOffset: 645 -- name: 'Definition: data-led professional — source awareness & data skepticism' - startOffset: 645 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=645 - endOffset: 693 -- name: 'Data-led vs. data-driven: balancing data, intuition & bias' - startOffset: 693 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=693 - endOffset: 814 -- name: 'Tracking plan & instrumentation: documenting events, properties & ownership' - startOffset: 814 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=814 - endOffset: 1107 -- name: 'Anomaly investigation: tracing event origins and fake signups' - startOffset: 1107 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1107 - endOffset: 1247 -- name: 'Collaborative tracking tools: AVO, Iteratively, TrackPlan' - startOffset: 1247 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1247 - endOffset: 1370 -- name: 'Data flow overview: collection, storage, analysis and activation' - startOffset: 1370 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1370 - endOffset: 1483 -- name: 'Event examples for SaaS: signup, project created, invite, invoice' - startOffset: 1483 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1483 - endOffset: 1620 -- name: 'Client-side vs. server-side events: timing, accuracy and use cases' - startOffset: 1620 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1620 - endOffset: 1732 -- name: 'Data warehousing & transformation: warehouses, DBT and BI analysis' - startOffset: 1732 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1732 - endOffset: 1803 -- name: 'Data activation: sending event data to support, sales and engagement tools' - startOffset: 1803 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1803 - endOffset: 2021 -- name: 'Data collection platforms: Segment, RudderStack, MetaRouter, Freshpaint' - startOffset: 2021 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2021 - endOffset: 2127 -- name: 'Warehouse-centric analytics: Snowflake, BigQuery, Redshift & warehouse-first - tools' - startOffset: 2127 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2127 - endOffset: 2245 -- name: 'Reverse ETL / operational analytics: Census, HighTouch, Grouparoo' - startOffset: 2245 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2245 - endOffset: 2300 -- name: 'Customer Data Platforms (CDP): all-in-one trade-offs for marketers' - startOffset: 2300 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2300 - endOffset: 2490 -- name: 'Modern data stack for growth: CDI, product analytics, warehouse & reverse - ETL' - startOffset: 2490 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2490 - endOffset: 2630 -- name: 'Buy vs. build: cost, maintenance and open-source alternatives' - startOffset: 2630 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2630 - endOffset: 2773 -- name: 'Team composition: data engineer, analyst, analytics engineer & product ops' - startOffset: 2773 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2773 - endOffset: 3100 -- name: 'Data democratization: data literacy, documentation & self-serve analytics' - startOffset: 3100 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3100 - endOffset: 3228 -- name: 'Motivating documentation: culture, early habits & catalog tools' - startOffset: 3228 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3228 - endOffset: 3368 -- name: 'Product-led vs. data-led: activation events and personalized onboarding' - startOffset: 3368 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3368 - endOffset: 3629 -- name: 'Closing & resources: dataled.academy, newsletter and podcast episodes' - startOffset: 3629 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3629 - endOffset: 3621 --- diff --git a/_podcast/s10e06-data-mesh-101.md b/_podcast/data-mesh-architecture-decentralized-data-products.md similarity index 97% rename from _podcast/s10e06-data-mesh-101.md rename to _podcast/data-mesh-architecture-decentralized-data-products.md index f5c58553..27c7095e 100644 --- a/_podcast/s10e06-data-mesh-101.md +++ b/_podcast/data-mesh-architecture-decentralized-data-products.md @@ -1,20 +1,131 @@ --- +title: 'Data Mesh Implementation: Build Decentralized Data Products, Contracts & Federated Governance' +short: Data Mesh 101 +season: 10 episode: 6 guests: - zhamakdehghani +image: images/podcast/s10e06-data-mesh-101.jpg ids: anchor: Data-Mesh-101---Zhamak-Dehghani-e1n7vlk youtube: 346N_pCtYZU -image: images/podcast/s10e06-data-mesh-101.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Mesh-101---Zhamak-Dehghani-e1n7vlk apple: https://podcasts.apple.com/us/podcast/data-mesh-101-zhamak-dehghani/id1541710331?i=1000578193372 spotify: https://open.spotify.com/episode/5uX5sfRPvC9WAXOM9fRCup?si=FQYB7cpuSOyzq7022xU3Tg youtube: https://www.youtube.com/watch?v=346N_pCtYZU -season: 10 -short: Data Mesh 101 -title: 'Data Mesh Implementation: Build Decentralized Data Products, Contracts & Federated - Governance' + +description: Discover Data Mesh strategies, data contracts and federated governance to build decentralized data products, improve data quality, and scale adoption +intro: 'How do you scale data architecture so teams deliver value without centralized bottlenecks? In this episode, Zhamak Dehghani — director of technology at Thoughtworks and founder of the Data Mesh concept — walks through practical steps for Data Mesh implementation: building decentralized data products, defining data contracts, and establishing federated governance.

We cover why enterprises face long pipelines to value and how a socio-technical, domain-oriented approach decouples pipelines with clear contracts and ownership. Zhamak explains the mesh-as-graph view, streaming examples of domain producers/consumers and schemas, and the maturity spectrum from tight warehouse schemas to loose coupling. You’ll hear about minimal guarantees and metadata for discoverability, decentralized interoperability (identity and auth), and how to define data product contracts (quality, SLAs, ownership).

The episode also digs into self-serve data platforms, platform federation with shared standards, governance primitives such as retention and automated validation, and an adoption roadmap including assessment, pilots, and executive buy-in. Listeners will gain concrete guidance on applying Data Mesh principles, designing data products and contracts, and operationalizing federated governance in their organizations.' +topics: +- data mesh +- data engineering +dateadded: 2022-09-02 + +duration: PT00H59M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 144 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=144 + endOffset: 159 +- name: Guest background & career path + startOffset: 159 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=159 + endOffset: 197 +- name: 'From firmware to distributed systems: career highlights' + startOffset: 197 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=197 + endOffset: 382 +- name: 'Consulting practice: building data platforms and products' + startOffset: 382 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=382 + endOffset: 455 +- name: 'Enterprise data friction: long pipelines to value' + startOffset: 455 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=455 + endOffset: 589 +- name: Data Mesh concept and core motivation + startOffset: 589 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=589 + endOffset: 596 +- name: 'Decentralized socio-technical approach: autonomy and interoperability' + startOffset: 596 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=596 + endOffset: 800 +- name: 'Architectural shift: decoupling pipelines and data contracts' + startOffset: 800 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=800 + endOffset: 895 +- name: 'Mesh as a graph: interconnectivity and value exchange' + startOffset: 895 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=895 + endOffset: 994 +- name: Domain-oriented ownership and team alignment + startOffset: 994 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=994 + endOffset: 1030 +- name: 'Streaming example: domain producers, consumers, and schemas' + startOffset: 1030 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1030 + endOffset: 1345 +- name: 'Maturity spectrum: warehouse schemas versus loose coupling' + startOffset: 1345 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1345 + endOffset: 1578 +- name: Optimizing for humans vs machines; federated queries and compute + startOffset: 1578 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1578 + endOffset: 1865 +- name: Minimal guarantees and metadata for discoverability + startOffset: 1865 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1865 + endOffset: 1924 +- name: 'Decentralized interoperability: standard seams, identity, and auth' + startOffset: 1924 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1924 + endOffset: 2076 +- name: 'Data as a product: consumer-first guarantees and KPIs' + startOffset: 2076 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2076 + endOffset: 2376 +- name: 'Data product contracts: quality, SLAs, and ownership decisions' + startOffset: 2376 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2376 + endOffset: 2518 +- name: 'Self-serve data platform: developer experience and abstractions' + startOffset: 2518 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2518 + endOffset: 2855 +- name: 'Platform federation: multiple platforms with shared standards' + startOffset: 2855 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2855 + endOffset: 2965 +- name: 'Federated governance: policies, automation, and enforcement' + startOffset: 2965 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2965 + endOffset: 3182 +- name: 'Governance primitives: retention, metadata, and automated validation' + startOffset: 3182 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3182 + endOffset: 3288 +- name: 'Core understanding: why, what, and how of Data Mesh' + startOffset: 3288 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3288 + endOffset: 3447 +- name: 'Adoption roadmap: assessment, pilots, and executive buy-in' + startOffset: 3447 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3447 + endOffset: 3603 +- name: 'Implementations and case studies: community resources' + startOffset: 3603 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3603 + endOffset: 3717 +- name: Episode wrap-up and closing remarks + startOffset: 3717 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3717 + endOffset: 3595 + transcript: - header: Podcast Introduction - line: This week we will talk about Data Mesh. We have a special guest today, Zhamak. @@ -1009,127 +1120,6 @@ transcript: sec: 3739 time: '1:02:19' who: Zhamak -description: Discover Data Mesh strategies, data contracts and federated governance - to build decentralized data products, improve data quality, and scale adoption. -intro: 'How do you scale data architecture so teams deliver value without centralized - bottlenecks? In this episode, Zhamak Dehghani — director of technology at Thoughtworks - and founder of the Data Mesh concept — walks through practical steps for Data Mesh - implementation: building decentralized data products, defining data contracts, and - establishing federated governance.

We cover why enterprises face long pipelines - to value and how a socio-technical, domain-oriented approach decouples pipelines - with clear contracts and ownership. Zhamak explains the mesh-as-graph view, streaming - examples of domain producers/consumers and schemas, and the maturity spectrum from - tight warehouse schemas to loose coupling. You’ll hear about minimal guarantees - and metadata for discoverability, decentralized interoperability (identity and auth), - and how to define data product contracts (quality, SLAs, ownership).

The - episode also digs into self-serve data platforms, platform federation with shared - standards, governance primitives such as retention and automated validation, and - an adoption roadmap including assessment, pilots, and executive buy-in. Listeners - will gain concrete guidance on applying Data Mesh principles, designing data products - and contracts, and operationalizing federated governance in their organizations.' -dateadded: '2022-09-02' -duration: PT00H59M55S -quotableClips: -- name: Podcast Introduction - startOffset: 144 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=144 - endOffset: 159 -- name: Guest background & career path - startOffset: 159 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=159 - endOffset: 197 -- name: 'From firmware to distributed systems: career highlights' - startOffset: 197 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=197 - endOffset: 382 -- name: 'Consulting practice: building data platforms and products' - startOffset: 382 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=382 - endOffset: 455 -- name: 'Enterprise data friction: long pipelines to value' - startOffset: 455 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=455 - endOffset: 589 -- name: Data Mesh concept and core motivation - startOffset: 589 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=589 - endOffset: 596 -- name: 'Decentralized socio-technical approach: autonomy and interoperability' - startOffset: 596 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=596 - endOffset: 800 -- name: 'Architectural shift: decoupling pipelines and data contracts' - startOffset: 800 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=800 - endOffset: 895 -- name: 'Mesh as a graph: interconnectivity and value exchange' - startOffset: 895 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=895 - endOffset: 994 -- name: Domain-oriented ownership and team alignment - startOffset: 994 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=994 - endOffset: 1030 -- name: 'Streaming example: domain producers, consumers, and schemas' - startOffset: 1030 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1030 - endOffset: 1345 -- name: 'Maturity spectrum: warehouse schemas versus loose coupling' - startOffset: 1345 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1345 - endOffset: 1578 -- name: Optimizing for humans vs machines; federated queries and compute - startOffset: 1578 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1578 - endOffset: 1865 -- name: Minimal guarantees and metadata for discoverability - startOffset: 1865 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1865 - endOffset: 1924 -- name: 'Decentralized interoperability: standard seams, identity, and auth' - startOffset: 1924 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1924 - endOffset: 2076 -- name: 'Data as a product: consumer-first guarantees and KPIs' - startOffset: 2076 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2076 - endOffset: 2376 -- name: 'Data product contracts: quality, SLAs, and ownership decisions' - startOffset: 2376 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2376 - endOffset: 2518 -- name: 'Self-serve data platform: developer experience and abstractions' - startOffset: 2518 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2518 - endOffset: 2855 -- name: 'Platform federation: multiple platforms with shared standards' - startOffset: 2855 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2855 - endOffset: 2965 -- name: 'Federated governance: policies, automation, and enforcement' - startOffset: 2965 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2965 - endOffset: 3182 -- name: 'Governance primitives: retention, metadata, and automated validation' - startOffset: 3182 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3182 - endOffset: 3288 -- name: 'Core understanding: why, what, and how of Data Mesh' - startOffset: 3288 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3288 - endOffset: 3447 -- name: 'Adoption roadmap: assessment, pilots, and executive buy-in' - startOffset: 3447 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3447 - endOffset: 3603 -- name: 'Implementations and case studies: community resources' - startOffset: 3603 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3603 - endOffset: 3717 -- name: Episode wrap-up and closing remarks - startOffset: 3717 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3717 - endOffset: 3595 --- Links: diff --git a/_podcast/s14e02-practical-data-privacy.md b/_podcast/data-privacy-engineering-gdpr-machine-learning.md similarity index 97% rename from _podcast/s14e02-practical-data-privacy.md rename to _podcast/data-privacy-engineering-gdpr-machine-learning.md index c1fe9696..420c7a80 100644 --- a/_podcast/s14e02-practical-data-privacy.md +++ b/_podcast/data-privacy-engineering-gdpr-machine-learning.md @@ -1,20 +1,118 @@ --- +title: 'Data Privacy Playbook: Differential Privacy, Federated Learning, PETs & Consent UX' +short: Practical Data Privacy +season: 14 episode: 2 guests: - katharinejarmul +image: images/podcast/s14e02-practical-data-privacy.jpg ids: anchor: ow/datatalksclub/episodes/Practical-Data-Privacy---Katharine-Jarmul-e23u551 youtube: gbjoFfrm4iw -image: images/podcast/s14e02-practical-data-privacy.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Practical-Data-Privacy---Katharine-Jarmul-e23u551 apple: https://podcasts.apple.com/us/podcast/practical-data-privacy-katharine-jarmul/id1541710331?i=1000613701646 spotify: https://open.spotify.com/episode/137H2M9qU5lFqb4hLyMBvg?si=b0KXeubVSpa3bfsuZaS6pQ youtube: https://www.youtube.com/watch?v=gbjoFfrm4iw -season: 14 -short: Practical Data Privacy -title: 'Data Privacy Playbook: Differential Privacy, Federated Learning, PETs & Consent - UX' + +description: Discover differential privacy, federated learning and PETs - privacy engineering, consent UX fixes and compliance to reduce re-identification risk +intro: 'How can teams build useful machine learning while respecting user privacy, compliance, and re‑identification risk? In this episode, Katharine Jarmul — privacy activist and Principal Data Scientist at ThoughtWorks Germany — walks through a practical Data Privacy Playbook focused on differential privacy, federated learning, privacy‑enhancing technologies (PETs) and consent UX.

Katharine draws on a career from data journalism and NLP to startup work at KI Protect and enterprise ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies for pseudonymisation, encrypted ML and federated architectures. We cover consent and opt‑out UX, legal vs technical definitions of privacy, profiling and fingerprinting risks, and privacy‑friendly personalization like session‑based intent and ephemeral inference.

You’ll get concrete takeaways: why differential privacy matters (formal definition, use cases, Tumult and other libraries), common anonymization pitfalls (hashing, k‑anonymity, Netflix lessons), how PETs fit into system design, and generative AI privacy considerations including retention and localized model deployment. Listeners leave with actionable guidance on privacy engineering, data minimization, consent design, and resources to continue learning.' +topics: +- data governance +- data privacy +- machine learning +- federated learning +dateadded: 2023-05-20 + +duration: PT01H01M28S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=0 + endOffset: 100 +- name: 'Guest Introduction: Katharine Jarmul — privacy activist, ML engineer, ThoughtWorks, + book' + startOffset: 100 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=100 + endOffset: 152 +- name: 'Career Journey: data journalism, NLP, consulting, and machine learning' + startOffset: 152 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=152 + endOffset: 548 +- name: 'Startup Focus: KI Protect, pseudonymisation, encrypted & federated ML' + startOffset: 548 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=548 + endOffset: 693 +- name: 'Privacy Regulation Overview: GDPR, CCPA, CPRA and cookie consent defaults' + startOffset: 693 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=693 + endOffset: 875 +- name: 'Cookie Consent & Opt‑Out UX: one‑click rejects and user behavior' + startOffset: 875 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=875 + endOffset: 984 +- name: 'Defining Data Privacy: legal, social, and technical perspectives' + startOffset: 984 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=984 + endOffset: 1295 +- name: 'Practical Data Privacy (book): availability, previews, and giveaways' + startOffset: 1295 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1295 + endOffset: 1358 +- name: 'Bridging Legal & Technical Views: privacy risk, translation, and collaboration' + startOffset: 1358 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1358 + endOffset: 1512 +- name: 'User Profiling & Fingerprinting: browser history, apps, and re‑identification + risks' + startOffset: 1512 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1512 + endOffset: 1815 +- name: 'Privacy‑Friendly Personalization: session‑based intent and ephemeral inference' + startOffset: 1815 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1815 + endOffset: 1988 +- name: 'Privacy Engineering & PETs: encrypted ML, federated learning, and architecture' + startOffset: 1988 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1988 + endOffset: 2109 +- name: 'Business Case for Privacy: risk management, regulation, and customer trust' + startOffset: 2109 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2109 + endOffset: 2450 +- name: 'Differential Privacy Explained: formal definition, use cases, and libraries + (Tumult)' + startOffset: 2450 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2450 + endOffset: 2708 +- name: 'Anonymization Pitfalls: hashing, k‑anonymity, Netflix de‑anonymization lessons' + startOffset: 2708 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2708 + endOffset: 2820 +- name: 'Designing for Privacy: consent, data minimization, and workflow practices' + startOffset: 2820 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2820 + endOffset: 3155 +- name: 'Generative AI & Privacy: ChatGPT incidents, consent, retention, and enterprise + options' + startOffset: 3155 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3155 + endOffset: 3569 +- name: 'Deploying Localized Models: Azure localization, fine‑tuning, and ownership' + startOffset: 3569 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3569 + endOffset: 3675 +- name: 'Further Learning: Probably Private newsletter, notebooks, and differential + privacy resources' + startOffset: 3675 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3675 + endOffset: 3764 +- name: 'Episode Close: final notes, social links, and next steps' + startOffset: 3764 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3764 + endOffset: 3688 + transcript: - header: Episode Introduction - header: 'Guest Introduction: Katharine Jarmul — privacy activist, ML engineer, ThoughtWorks, @@ -1268,112 +1366,6 @@ transcript: sec: 3788 time: '1:03:08' who: Katharine -description: Discover differential privacy, federated learning and PETs - privacy - engineering, consent UX fixes and compliance to reduce re-identification risk. -intro: 'How can teams build useful machine learning while respecting user privacy, - compliance, and re‑identification risk? In this episode, Katharine Jarmul — privacy - activist and Principal Data Scientist at ThoughtWorks Germany — walks through a - practical Data Privacy Playbook focused on differential privacy, federated learning, - privacy‑enhancing technologies (PETs) and consent UX.

Katharine draws on - a career from data journalism and NLP to startup work at KI Protect and enterprise - ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies - for pseudonymisation, encrypted ML and federated architectures. We cover consent - and opt‑out UX, legal vs technical definitions of privacy, profiling and fingerprinting - risks, and privacy‑friendly personalization like session‑based intent and ephemeral - inference.

You’ll get concrete takeaways: why differential privacy matters - (formal definition, use cases, Tumult and other libraries), common anonymization - pitfalls (hashing, k‑anonymity, Netflix lessons), how PETs fit into system design, - and generative AI privacy considerations including retention and localized model - deployment. Listeners leave with actionable guidance on privacy engineering, data - minimization, consent design, and resources to continue learning.' -dateadded: '2023-05-20' -duration: PT01H01M28S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=0 - endOffset: 100 -- name: 'Guest Introduction: Katharine Jarmul — privacy activist, ML engineer, ThoughtWorks, - book' - startOffset: 100 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=100 - endOffset: 152 -- name: 'Career Journey: data journalism, NLP, consulting, and machine learning' - startOffset: 152 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=152 - endOffset: 548 -- name: 'Startup Focus: KI Protect, pseudonymisation, encrypted & federated ML' - startOffset: 548 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=548 - endOffset: 693 -- name: 'Privacy Regulation Overview: GDPR, CCPA, CPRA and cookie consent defaults' - startOffset: 693 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=693 - endOffset: 875 -- name: 'Cookie Consent & Opt‑Out UX: one‑click rejects and user behavior' - startOffset: 875 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=875 - endOffset: 984 -- name: 'Defining Data Privacy: legal, social, and technical perspectives' - startOffset: 984 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=984 - endOffset: 1295 -- name: 'Practical Data Privacy (book): availability, previews, and giveaways' - startOffset: 1295 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1295 - endOffset: 1358 -- name: 'Bridging Legal & Technical Views: privacy risk, translation, and collaboration' - startOffset: 1358 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1358 - endOffset: 1512 -- name: 'User Profiling & Fingerprinting: browser history, apps, and re‑identification - risks' - startOffset: 1512 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1512 - endOffset: 1815 -- name: 'Privacy‑Friendly Personalization: session‑based intent and ephemeral inference' - startOffset: 1815 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1815 - endOffset: 1988 -- name: 'Privacy Engineering & PETs: encrypted ML, federated learning, and architecture' - startOffset: 1988 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1988 - endOffset: 2109 -- name: 'Business Case for Privacy: risk management, regulation, and customer trust' - startOffset: 2109 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2109 - endOffset: 2450 -- name: 'Differential Privacy Explained: formal definition, use cases, and libraries - (Tumult)' - startOffset: 2450 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2450 - endOffset: 2708 -- name: 'Anonymization Pitfalls: hashing, k‑anonymity, Netflix de‑anonymization lessons' - startOffset: 2708 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2708 - endOffset: 2820 -- name: 'Designing for Privacy: consent, data minimization, and workflow practices' - startOffset: 2820 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2820 - endOffset: 3155 -- name: 'Generative AI & Privacy: ChatGPT incidents, consent, retention, and enterprise - options' - startOffset: 3155 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3155 - endOffset: 3569 -- name: 'Deploying Localized Models: Azure localization, fine‑tuning, and ownership' - startOffset: 3569 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3569 - endOffset: 3675 -- name: 'Further Learning: Probably Private newsletter, notebooks, and differential - privacy resources' - startOffset: 3675 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3675 - endOffset: 3764 -- name: 'Episode Close: final notes, social links, and next steps' - startOffset: 3764 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3764 - endOffset: 3688 --- Links: diff --git a/_podcast/s12e02-business-skills-for-data-professionals.md b/_podcast/data-professionals-business-skills-in-saas.md similarity index 97% rename from _podcast/s12e02-business-skills-for-data-professionals.md rename to _podcast/data-professionals-business-skills-in-saas.md index 9a30f296..34b10f8e 100644 --- a/_podcast/s12e02-business-skills-for-data-professionals.md +++ b/_podcast/data-professionals-business-skills-in-saas.md @@ -1,20 +1,124 @@ --- +title: 'Practical Skills for Data Professionals in SaaS: Bridging the Gap between Data and Business' +short: Practical Skills for Data Professionals in SaaS +season: 12 episode: 2 guests: - lorismarini +image: images/podcast/s12e02-business-skills-for-data-professionals.jpg ids: anchor: Business-Skills-for-Data-Professionals---Loris-Marini-e1s89hu youtube: xMYRUiTu960 -image: images/podcast/s12e02-business-skills-for-data-professionals.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Business-Skills-for-Data-Professionals---Loris-Marini-e1s89hu apple: https://podcasts.apple.com/us/podcast/business-skills-for-data-professionals-loris-marini/id1541710331?i=1000590422440 spotify: https://open.spotify.com/episode/5tw3qs1XHETDPYrxdEaVbK?si=QIclWOT_QhKhIGrcl-KQXg youtube: https://www.youtube.com/watch?v=xMYRUiTu960 -season: 12 -short: Business Skills for Data Professionals -title: 'Practical Data Science for SaaS: Deployments, Marketing Automation, Metrics - & Storytelling' + +description: 'Discover practical data science for SaaS: deploy ML, build marketing automation, define metrics and reduce churn—stakeholder tactics, tooling, and storytelling insights.' +intro: 'How do you move data science from experiments to measurable impact in a SaaS business? In this episode, Loris Marini — CEO and founder of Discovering Data and host of the Discovering Data podcast — walks through practical approaches to deploying models, building marketing automation, and turning metrics into persuasive stories.

Loris covers production challenges for model deployment in SaaS, a marketing automation use case (recommendations and reporting), and how applied research like reinforcement learning maps to real problems. We dig into semantic alignment — defining "customer" and core metrics — plus lead indicators, stickiness, churn, and causal thinking for product metrics. Loris also shares tactics for onboarding stakeholders: stakeholder mapping, CRM-style context capture, meeting immersion, and Notion-based note systems. He emphasizes pragmatic tools (Excel, pivots), prioritizing high-connectivity opportunities, and a conversation-first diagnostic before ML. Finally, learn data storytelling techniques, building trust through active listening and business literacy, and where to find further resources and community.

Listen to gain concrete strategies for model deployment, marketing automation, measurement, and communicating data-driven outcomes in SaaS.' +dateadded: 2022-12-17 + +duration: PT01H15S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=0 + endOffset: 102 +- name: 'Guest Background: From Physics to Data Science' + startOffset: 102 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=102 + endOffset: 165 +- name: 'Early Data Role: Research Skills Applied in a Startup' + startOffset: 165 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=165 + endOffset: 291 +- name: 'Production Challenges: Deploying Models in a SaaS' + startOffset: 291 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=291 + endOffset: 378 +- name: 'Marketing Automation Use Case: Recommendations & Reporting' + startOffset: 378 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=378 + endOffset: 510 +- name: 'Applied Research: Reinforcement Learning to Practical Problems' + startOffset: 510 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=510 + endOffset: 739 +- name: 'Semantic Alignment: Defining "Customer" and Core Metrics' + startOffset: 739 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=739 + endOffset: 946 +- name: 'Lead Indicators & Stickiness: Churn and Causal Thinking' + startOffset: 946 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=946 + endOffset: 1080 +- name: 'Context & Semantics: Cross-Functional Meaning in Data' + startOffset: 1080 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1080 + endOffset: 1306 +- name: 'Data Storytelling: Marketing Techniques for Memorable Communication' + startOffset: 1306 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1306 + endOffset: 1553 +- name: 'Building Trust: Active Listening and Business Literacy' + startOffset: 1553 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1553 + endOffset: 1675 +- name: 'Onboarding Strategy: Stakeholder Mapping and Prioritization' + startOffset: 1675 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1675 + endOffset: 2120 +- name: 'Stakeholder CRM: Capturing Names, Roles, and Context' + startOffset: 2120 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2120 + endOffset: 2271 +- name: 'Meeting Immersion: Learning Business Language by Attendance' + startOffset: 2271 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2271 + endOffset: 2493 +- name: 'Note Systems: Using Notion to Track Meetings and Key Activities' + startOffset: 2493 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2493 + endOffset: 2622 +- name: 'Tooling & IP Considerations: Personal Knowledge vs Company Systems' + startOffset: 2622 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2622 + endOffset: 2713 +- name: 'Prioritization: Choosing Projects by Stakeholder Impact' + startOffset: 2713 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2713 + endOffset: 2830 +- name: 'Opportunity Selection: Finding High-Connectivity Data Projects' + startOffset: 2830 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2830 + endOffset: 3061 +- name: 'Pragmatism in Tools: Excel, Pivot Tables, and Rapid Experiments' + startOffset: 3061 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3061 + endOffset: 3188 +- name: 'Conversation First: Description and Diagnostic Before ML' + startOffset: 3188 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3188 + endOffset: 3373 +- name: 'Presenting Online: Podcasting, Pauses, and Audio Practices' + startOffset: 3373 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3373 + endOffset: 3515 +- name: 'Resources: Discovering Data Podcast for Business Skills' + startOffset: 3515 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3515 + endOffset: 3633 +- name: 'Community Building: Joining the Discovering Data Discord' + startOffset: 3633 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3633 + endOffset: 3683 +- name: Episode Wrap-Up and Contact Links + startOffset: 3683 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3683 + endOffset: 3615 + transcript: - header: Podcast Introduction - header: 'Guest Background: From Physics to Data Science' @@ -1122,125 +1226,6 @@ transcript: sec: 3717 time: '1:01:57' who: Loris -description: 'Discover practical data science for SaaS: deploy ML, build marketing - automation, define metrics and reduce churn—stakeholder tactics, tooling, and storytelling - insights.' -intro: 'How do you move data science from experiments to measurable impact in a SaaS - business? In this episode, Loris Marini — CEO and founder of Discovering Data and - host of the Discovering Data podcast — walks through practical approaches to deploying - models, building marketing automation, and turning metrics into persuasive stories. -

Drawing on a journey "from physics to data science," Loris covers production - challenges for model deployment in SaaS, a marketing automation use case (recommendations - and reporting), and how applied research like reinforcement learning maps to real - problems. We dig into semantic alignment — defining "customer" and core metrics - — plus lead indicators, stickiness, churn, and causal thinking for product metrics. - Loris also shares tactics for onboarding stakeholders: stakeholder mapping, CRM-style - context capture, meeting immersion, and Notion-based note systems. He emphasizes - pragmatic tools (Excel, pivots), prioritizing high-connectivity opportunities, and - a conversation-first diagnostic before ML. Finally, learn data storytelling techniques, - building trust through active listening and business literacy, and where to find - further resources and community.

Listen to gain concrete strategies for - model deployment, marketing automation, measurement, and communicating data-driven - outcomes in SaaS.' -dateadded: '2022-12-17' -duration: PT01H15S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=0 - endOffset: 102 -- name: 'Guest Background: From Physics to Data Science' - startOffset: 102 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=102 - endOffset: 165 -- name: 'Early Data Role: Research Skills Applied in a Startup' - startOffset: 165 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=165 - endOffset: 291 -- name: 'Production Challenges: Deploying Models in a SaaS' - startOffset: 291 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=291 - endOffset: 378 -- name: 'Marketing Automation Use Case: Recommendations & Reporting' - startOffset: 378 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=378 - endOffset: 510 -- name: 'Applied Research: Reinforcement Learning to Practical Problems' - startOffset: 510 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=510 - endOffset: 739 -- name: 'Semantic Alignment: Defining "Customer" and Core Metrics' - startOffset: 739 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=739 - endOffset: 946 -- name: 'Lead Indicators & Stickiness: Churn and Causal Thinking' - startOffset: 946 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=946 - endOffset: 1080 -- name: 'Context & Semantics: Cross-Functional Meaning in Data' - startOffset: 1080 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1080 - endOffset: 1306 -- name: 'Data Storytelling: Marketing Techniques for Memorable Communication' - startOffset: 1306 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1306 - endOffset: 1553 -- name: 'Building Trust: Active Listening and Business Literacy' - startOffset: 1553 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1553 - endOffset: 1675 -- name: 'Onboarding Strategy: Stakeholder Mapping and Prioritization' - startOffset: 1675 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1675 - endOffset: 2120 -- name: 'Stakeholder CRM: Capturing Names, Roles, and Context' - startOffset: 2120 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2120 - endOffset: 2271 -- name: 'Meeting Immersion: Learning Business Language by Attendance' - startOffset: 2271 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2271 - endOffset: 2493 -- name: 'Note Systems: Using Notion to Track Meetings and Key Activities' - startOffset: 2493 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2493 - endOffset: 2622 -- name: 'Tooling & IP Considerations: Personal Knowledge vs Company Systems' - startOffset: 2622 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2622 - endOffset: 2713 -- name: 'Prioritization: Choosing Projects by Stakeholder Impact' - startOffset: 2713 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2713 - endOffset: 2830 -- name: 'Opportunity Selection: Finding High-Connectivity Data Projects' - startOffset: 2830 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2830 - endOffset: 3061 -- name: 'Pragmatism in Tools: Excel, Pivot Tables, and Rapid Experiments' - startOffset: 3061 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3061 - endOffset: 3188 -- name: 'Conversation First: Description and Diagnostic Before ML' - startOffset: 3188 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3188 - endOffset: 3373 -- name: 'Presenting Online: Podcasting, Pauses, and Audio Practices' - startOffset: 3373 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3373 - endOffset: 3515 -- name: 'Resources: Discovering Data Podcast for Business Skills' - startOffset: 3515 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3515 - endOffset: 3633 -- name: 'Community Building: Joining the Discovering Data Discord' - startOffset: 3633 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3633 - endOffset: 3683 -- name: Episode Wrap-Up and Contact Links - startOffset: 3683 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3683 - endOffset: 3615 --- Links: diff --git a/_podcast/s03e03-data-observability.md b/_podcast/data-quality-data-observability-data-reliability.md similarity index 97% rename from _podcast/s03e03-data-observability.md rename to _podcast/data-quality-data-observability-data-reliability.md index ce57ed06..5632852c 100644 --- a/_podcast/s03e03-data-observability.md +++ b/_podcast/data-quality-data-observability-data-reliability.md @@ -1,12 +1,11 @@ --- -title: 'Data Observability Explained: 5 Pillars to Prevent Downtime, Drift & False - Positives' +title: 'Data Observability Explained: 5 Pillars to Prevent Downtime, Drift & False Positives' short: 'Data Observability: The Next Frontier of Data Engineering' +season: 3 +episode: 3 guests: - barrmoses image: images/podcast/s03e03-data-observability.jpg -season: 3 -episode: 3 ids: youtube: TrMG1SOqZkQ anchor: Data-Observability---Barr-Moses-evghmh @@ -15,6 +14,115 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Observability---Barr-Moses-evghmh spotify: https://open.spotify.com/episode/48QcLAw2I1apC1jeo8e1sd apple: https://podcasts.apple.com/us/podcast/data-observability-barr-moses/id1541710331?i=1000518351217 + +description: Discover data observability, freshness, lineage and schema detection to prevent downtime, stop model drift and cut false positives in pipelines +intro: How do you prevent data downtime, drift, and false positives before they break analytics and models? In this episode, Barr Moses, CEO and co‑founder of Monte Carlo and former VP of Customer Operations at Gainsight, walks through a practical framework for data observability grounded in real-world incidents and DevOps principles.

Barr explains why batch data needs different approaches than app monitoring and outlines the Five Pillars of Data Observability—freshness, volume, distribution, schema, and lineage. You’ll hear a schema‑change case study, learn how silent failures and model drift occur, and how to move from monitoring to true observability for faster root cause analysis using correlation, logs, and lineage. The conversation covers accountability models (RACI), defining and automating data SLAs, operational runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria for end‑to‑end platforms versus point tools.

Listeners will get actionable guidance on reducing false positives, prioritizing pipeline fixes, implementing auto lineage, and applying anomaly detection with contextual alerts—practical steps to improve data quality, reliability, and observability across cloud‑agnostic environments +topics: +- MLOps +- data observability +dateadded: 2021-04-24 + +duration: PT01H01M50S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=0 + endOffset: 108 +- name: 'Guest Profile: Barr Moses — career, GainSight, Monte Carlo' + startOffset: 108 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=108 + endOffset: 275 +- name: 'Market Gap: Data downtime impact on analytics teams' + startOffset: 275 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=275 + endOffset: 416 +- name: 'Observability Origins: DevOps pillars (metrics, logs, traces)' + startOffset: 416 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=416 + endOffset: 589 +- name: 'Batch Data Challenges: Why data observability differs from app monitoring' + startOffset: 589 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=589 + endOffset: 820 +- name: 'Silent Failures: Invisible data quality incidents and model drift' + startOffset: 820 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=820 + endOffset: 998 +- name: 'Five Pillars of Data Observability: Freshness, Volume, Distribution, Schema, + Lineage' + startOffset: 998 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=998 + endOffset: 1150 +- name: 'Schema Change Case Study: Downstream breakage and missed notifications' + startOffset: 1150 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1150 + endOffset: 1317 +- name: 'Good Pipelines, Bad Data: Need for engineering and data observability' + startOffset: 1317 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1317 + endOffset: 1471 +- name: 'Monitoring vs Observability: Detection versus diagnosis' + startOffset: 1471 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1471 + endOffset: 1564 +- name: 'Root Cause Analysis: Correlation, logs, lineage for triage' + startOffset: 1564 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1564 + endOffset: 1740 +- name: 'Accountability Models: RACI for data ownership and communication' + startOffset: 1740 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1740 + endOffset: 2124 +- name: 'Data SLAs: Defining timeliness and prioritizing pipeline fixes' + startOffset: 2124 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2124 + endOffset: 2294 +- name: 'SLA Automation: Inferring thresholds from historical data' + startOffset: 2294 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2294 + endOffset: 2463 +- name: 'Operational Runbooks: Playbooks and remediation workflows' + startOffset: 2463 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2463 + endOffset: 2580 +- name: 'Maturity Curve: Reactive → Proactive → Automated → Scalable' + startOffset: 2580 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2580 + endOffset: 2820 +- name: 'Platform Criteria: End-to-end integration and reducing false positives' + startOffset: 2820 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2820 + endOffset: 2992 +- name: 'Open Source Landscape: Point tools versus holistic observability' + startOffset: 2992 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2992 + endOffset: 3052 +- name: 'Test-Driven Data Development: Tests, DBT checks, and limitations' + startOffset: 3052 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3052 + endOffset: 3263 +- name: 'Cloud Agnosticism: Integrations across AWS, GCP, Snowflake' + startOffset: 3263 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3263 + endOffset: 3417 +- name: 'Centralized Governance: Observability across distributed environments' + startOffset: 3417 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3417 + endOffset: 3531 +- name: 'Auto Lineage: Detecting upstream and downstream data impact' + startOffset: 3531 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3531 + endOffset: 3627 +- name: 'Anomalies vs Bad Data: Contextual alerts and reducing false positives' + startOffset: 3627 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3627 + endOffset: 3770 +- name: Closing Remarks & Contact Resources (Monte Carlo, links, Slack) + startOffset: 3770 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3770 + endOffset: 3710 + transcript: - header: Podcast Introduction - header: 'Guest Profile: Barr Moses — career, GainSight, Monte Carlo' @@ -976,123 +1084,6 @@ transcript: sec: 3818 time: '1:03:38' who: Barr -description: Discover data observability, freshness, lineage and schema detection - to prevent downtime, stop model drift and cut false positives in pipelines. -intro: How do you prevent data downtime, drift, and false positives before they break - analytics and models? In this episode, Barr Moses, CEO and co‑founder of Monte Carlo - and former VP of Customer Operations at Gainsight, walks through a practical framework - for data observability grounded in real-world incidents and DevOps principles.

- Barr explains why batch data needs different approaches than app monitoring and - outlines the Five Pillars of Data Observability—freshness, volume, distribution, - schema, and lineage. You’ll hear a schema‑change case study, learn how silent failures - and model drift occur, and how to move from monitoring to true observability for - faster root cause analysis using correlation, logs, and lineage. The conversation - covers accountability models (RACI), defining and automating data SLAs, operational - runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria - for end‑to‑end platforms versus point tools.

Listeners will get actionable - guidance on reducing false positives, prioritizing pipeline fixes, implementing - auto lineage, and applying anomaly detection with contextual alerts—practical steps - to improve data quality, reliability, and observability across cloud‑agnostic environments. -dateadded: '2021-04-24' -duration: PT01H01M50S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=0 - endOffset: 108 -- name: 'Guest Profile: Barr Moses — career, GainSight, Monte Carlo' - startOffset: 108 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=108 - endOffset: 275 -- name: 'Market Gap: Data downtime impact on analytics teams' - startOffset: 275 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=275 - endOffset: 416 -- name: 'Observability Origins: DevOps pillars (metrics, logs, traces)' - startOffset: 416 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=416 - endOffset: 589 -- name: 'Batch Data Challenges: Why data observability differs from app monitoring' - startOffset: 589 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=589 - endOffset: 820 -- name: 'Silent Failures: Invisible data quality incidents and model drift' - startOffset: 820 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=820 - endOffset: 998 -- name: 'Five Pillars of Data Observability: Freshness, Volume, Distribution, Schema, - Lineage' - startOffset: 998 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=998 - endOffset: 1150 -- name: 'Schema Change Case Study: Downstream breakage and missed notifications' - startOffset: 1150 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1150 - endOffset: 1317 -- name: 'Good Pipelines, Bad Data: Need for engineering and data observability' - startOffset: 1317 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1317 - endOffset: 1471 -- name: 'Monitoring vs Observability: Detection versus diagnosis' - startOffset: 1471 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1471 - endOffset: 1564 -- name: 'Root Cause Analysis: Correlation, logs, lineage for triage' - startOffset: 1564 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1564 - endOffset: 1740 -- name: 'Accountability Models: RACI for data ownership and communication' - startOffset: 1740 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1740 - endOffset: 2124 -- name: 'Data SLAs: Defining timeliness and prioritizing pipeline fixes' - startOffset: 2124 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2124 - endOffset: 2294 -- name: 'SLA Automation: Inferring thresholds from historical data' - startOffset: 2294 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2294 - endOffset: 2463 -- name: 'Operational Runbooks: Playbooks and remediation workflows' - startOffset: 2463 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2463 - endOffset: 2580 -- name: 'Maturity Curve: Reactive → Proactive → Automated → Scalable' - startOffset: 2580 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2580 - endOffset: 2820 -- name: 'Platform Criteria: End-to-end integration and reducing false positives' - startOffset: 2820 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2820 - endOffset: 2992 -- name: 'Open Source Landscape: Point tools versus holistic observability' - startOffset: 2992 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2992 - endOffset: 3052 -- name: 'Test-Driven Data Development: Tests, DBT checks, and limitations' - startOffset: 3052 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3052 - endOffset: 3263 -- name: 'Cloud Agnosticism: Integrations across AWS, GCP, Snowflake' - startOffset: 3263 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3263 - endOffset: 3417 -- name: 'Centralized Governance: Observability across distributed environments' - startOffset: 3417 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3417 - endOffset: 3531 -- name: 'Auto Lineage: Detecting upstream and downstream data impact' - startOffset: 3531 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3531 - endOffset: 3627 -- name: 'Anomalies vs Bad Data: Contextual alerts and reducing false positives' - startOffset: 3627 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3627 - endOffset: 3770 -- name: Closing Remarks & Contact Resources (Monte Carlo, links, Slack) - startOffset: 3770 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3770 - endOffset: 3710 --- Links: diff --git a/_podcast/s13e02-analytics-for-better-world.md b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md similarity index 97% rename from _podcast/s13e02-analytics-for-better-world.md rename to _podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md index 104cd6ee..6f734daa 100644 --- a/_podcast/s13e02-analytics-for-better-world.md +++ b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md @@ -1,20 +1,142 @@ --- +title: 'Analytics for Nonprofits: Build Data Maturity, Teams, Tools & Optimization Strategies' +short: Analytics for a Better World +season: 13 episode: 2 guests: - parvathykrishnan +image: images/podcast/s13e02-analytics-for-better-world.jpg ids: anchor: Analytics-for-a-Better-World---Parvathy-Krishnan-e1vo27h youtube: b6x5zZ3C6sQ -image: images/podcast/s13e02-analytics-for-better-world.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Analytics-for-a-Better-World---Parvathy-Krishnan-e1vo27h apple: https://podcasts.apple.com/us/podcast/analytics-for-a-better-world-parvathy-krishnan/id1541710331?i=1000602678901 spotify: https://open.spotify.com/episode/5Xiuu4jMBCMuwkokXbwhE2?si=nGRQrMUaRNa5EINbtJadBA youtube: https://www.youtube.com/watch?v=b6x5zZ3C6sQ -season: 13 -short: Analytics for a Better World -title: 'Analytics for Nonprofits: Build Data Maturity, Teams, Tools & Optimization - Strategies' + +description: Learn nonprofit analytics and data maturity strategies to build teams, choose tools and optimize programs-practical roadmaps, case studies, and open resources +intro: How can nonprofits move from basic reporting to optimization using analytics while building the right teams, tools, and governance? In this episode, Parvathy Krishnan, CTO at Analytics for a Better World and professional doctorate in data science, walks through practical steps for building data maturity in the social sector. Drawing on discovery workshops, fellowship pilots (including a waste-collection optimization project in Nairobi), and partnerships with academic and industry groups, Parvathy explains how to assess needs, design maturity roadmaps, and prioritize short- and long-term goals.

Listen to learn how to structure nonprofit data teams (analysts, data scientists, engineers, and blended roles), select technology (KoboToolbox, PostgreSQL, dashboards, Python/R, cloud deployment), and implement process and governance practices including privacy, SOPs, and version control. The episode also covers curriculum progression—from descriptive to diagnostic, predictive, and optimization—academy programs for practitioners and executives, open resources on YouTube and GitHub, and real-world optimization use cases like healthcare access and COVID testing lab placement. Ideal for nonprofit leaders, data practitioners, and funders seeking actionable guidance on analytics for nonprofits, data maturity, and optimization strategies +topics: +- nonprofit +- data maturity +- data science +- analytics +dateadded: 2023-03-04 + +duration: PT00H59M21S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=0 + endOffset: 70 +- name: 'Overview: Analytics for a Better World mission and guest intro' + startOffset: 70 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=70 + endOffset: 114 +- name: 'Career Path: From renewable energy to data science and CTO role' + startOffset: 114 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=114 + endOffset: 278 +- name: 'CTO Responsibilities: Connecting nonprofits with research and tech capacity' + startOffset: 278 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=278 + endOffset: 380 +- name: 'Discovery Workshops: Assessing nonprofit needs and data maturity' + startOffset: 380 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=380 + endOffset: 569 +- name: 'Fellowship Case Study: Waste-collection optimization pilot in Nairobi' + startOffset: 569 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=569 + endOffset: 753 +- name: 'Data Maturity Comparison: Nonprofit vs. private-sector analytics' + startOffset: 753 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=753 + endOffset: 923 +- name: 'Talent & Purpose: Motivating data professionals to join the public sector' + startOffset: 923 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=923 + endOffset: 1073 +- name: 'Academy Structure: Programs for practitioners, analytics translators, executives' + startOffset: 1073 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1073 + endOffset: 1214 +- name: 'Open Resources: YouTube lectures, GitHub, and open-source deliverables' + startOffset: 1214 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1214 + endOffset: 1346 +- name: 'Curriculum Focus: Descriptive → diagnostic → predictive → optimization' + startOffset: 1346 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1346 + endOffset: 1536 +- name: 'Audience Profile: MBA, business analytics, and technical students' + startOffset: 1536 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1536 + endOffset: 1699 +- name: 'Student Engagement: Thesis collaborations and researcher pathways' + startOffset: 1699 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1699 + endOffset: 1847 +- name: 'Maturity Roadmaps: Scans, short/long-term goals, and cost optimization' + startOffset: 1847 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1847 + endOffset: 2046 +- name: 'People Dimension: Roles for data collection, analysis, and app development' + startOffset: 2046 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2046 + endOffset: 2194 +- name: 'Process Dimension: Data governance, privacy, SOPs, and workflows' + startOffset: 2194 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2194 + endOffset: 2302 +- name: 'Technology Dimension: Centralized data, version control, and tech selection' + startOffset: 2302 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2302 + endOffset: 2368 +- name: 'Tool Recommendations: Dashboards, Python/R, and cloud deployment options' + startOffset: 2368 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2368 + endOffset: 2658 +- name: 'Data Platforms: KoboToolbox, PostgreSQL, and Digital Public Goods guidance' + startOffset: 2658 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2658 + endOffset: 2751 +- name: 'Team Profiles: Analysts, data scientists, engineers, and blended roles' + startOffset: 2751 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2751 + endOffset: 2955 +- name: 'Data Engineering Needs: Moving from research to deployed applications' + startOffset: 2955 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2955 + endOffset: 3006 +- name: 'Optimization Use Cases: Healthcare access and COVID testing lab placement' + startOffset: 3006 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3006 + endOffset: 3170 +- name: 'Partnerships & Staffing: Ortec, academic partners, and on-demand talent network' + startOffset: 3170 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3170 + endOffset: 3247 +- name: 'Organizational Model: Small core team and large extended research network' + startOffset: 3247 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3247 + endOffset: 3338 +- name: 'Becoming Data-Driven: Strategy plus investments in people, processes, technology' + startOffset: 3338 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3338 + endOffset: 3502 +- name: 'Recommended Reading & Daily Resources: Culture Map, 7 Habits, Towards Data + Science' + startOffset: 3502 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3502 + endOffset: 3600 +- name: Closing Remarks and links to Academy resources and contact info + startOffset: 3600 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3600 + endOffset: 3561 + transcript: - header: Podcast Introduction - header: 'Overview: Analytics for a Better World mission and guest intro' @@ -1061,137 +1183,6 @@ transcript: sec: 3631 time: '1:00:31' who: Alexey -description: Learn nonprofit analytics and data maturity strategies to build teams, - choose tools and optimize programs-practical roadmaps, case studies, and open resources. -intro: How can nonprofits move from basic reporting to optimization using analytics - while building the right teams, tools, and governance? In this episode, Parvathy - Krishnan, CTO at Analytics for a Better World and professional doctorate in data - science, walks through practical steps for building data maturity in the social - sector. Drawing on discovery workshops, fellowship pilots (including a waste-collection - optimization project in Nairobi), and partnerships with academic and industry groups, - Parvathy explains how to assess needs, design maturity roadmaps, and prioritize - short- and long-term goals.

Listen to learn how to structure nonprofit - data teams (analysts, data scientists, engineers, and blended roles), select technology - (KoboToolbox, PostgreSQL, dashboards, Python/R, cloud deployment), and implement - process and governance practices including privacy, SOPs, and version control. The - episode also covers curriculum progression—from descriptive to diagnostic, predictive, - and optimization—academy programs for practitioners and executives, open resources - on YouTube and GitHub, and real-world optimization use cases like healthcare access - and COVID testing lab placement. Ideal for nonprofit leaders, data practitioners, - and funders seeking actionable guidance on analytics for nonprofits, data maturity, - and optimization strategies. -dateadded: '2023-03-04' -duration: PT00H59M21S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=0 - endOffset: 70 -- name: 'Overview: Analytics for a Better World mission and guest intro' - startOffset: 70 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=70 - endOffset: 114 -- name: 'Career Path: From renewable energy to data science and CTO role' - startOffset: 114 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=114 - endOffset: 278 -- name: 'CTO Responsibilities: Connecting nonprofits with research and tech capacity' - startOffset: 278 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=278 - endOffset: 380 -- name: 'Discovery Workshops: Assessing nonprofit needs and data maturity' - startOffset: 380 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=380 - endOffset: 569 -- name: 'Fellowship Case Study: Waste-collection optimization pilot in Nairobi' - startOffset: 569 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=569 - endOffset: 753 -- name: 'Data Maturity Comparison: Nonprofit vs. private-sector analytics' - startOffset: 753 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=753 - endOffset: 923 -- name: 'Talent & Purpose: Motivating data professionals to join the public sector' - startOffset: 923 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=923 - endOffset: 1073 -- name: 'Academy Structure: Programs for practitioners, analytics translators, executives' - startOffset: 1073 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1073 - endOffset: 1214 -- name: 'Open Resources: YouTube lectures, GitHub, and open-source deliverables' - startOffset: 1214 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1214 - endOffset: 1346 -- name: 'Curriculum Focus: Descriptive → diagnostic → predictive → optimization' - startOffset: 1346 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1346 - endOffset: 1536 -- name: 'Audience Profile: MBA, business analytics, and technical students' - startOffset: 1536 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1536 - endOffset: 1699 -- name: 'Student Engagement: Thesis collaborations and researcher pathways' - startOffset: 1699 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1699 - endOffset: 1847 -- name: 'Maturity Roadmaps: Scans, short/long-term goals, and cost optimization' - startOffset: 1847 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1847 - endOffset: 2046 -- name: 'People Dimension: Roles for data collection, analysis, and app development' - startOffset: 2046 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2046 - endOffset: 2194 -- name: 'Process Dimension: Data governance, privacy, SOPs, and workflows' - startOffset: 2194 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2194 - endOffset: 2302 -- name: 'Technology Dimension: Centralized data, version control, and tech selection' - startOffset: 2302 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2302 - endOffset: 2368 -- name: 'Tool Recommendations: Dashboards, Python/R, and cloud deployment options' - startOffset: 2368 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2368 - endOffset: 2658 -- name: 'Data Platforms: KoboToolbox, PostgreSQL, and Digital Public Goods guidance' - startOffset: 2658 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2658 - endOffset: 2751 -- name: 'Team Profiles: Analysts, data scientists, engineers, and blended roles' - startOffset: 2751 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2751 - endOffset: 2955 -- name: 'Data Engineering Needs: Moving from research to deployed applications' - startOffset: 2955 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2955 - endOffset: 3006 -- name: 'Optimization Use Cases: Healthcare access and COVID testing lab placement' - startOffset: 3006 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3006 - endOffset: 3170 -- name: 'Partnerships & Staffing: Ortec, academic partners, and on-demand talent network' - startOffset: 3170 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3170 - endOffset: 3247 -- name: 'Organizational Model: Small core team and large extended research network' - startOffset: 3247 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3247 - endOffset: 3338 -- name: 'Becoming Data-Driven: Strategy plus investments in people, processes, technology' - startOffset: 3338 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3338 - endOffset: 3502 -- name: 'Recommended Reading & Daily Resources: Culture Map, 7 Habits, Towards Data - Science' - startOffset: 3502 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3502 - endOffset: 3600 -- name: Closing Remarks and links to Academy resources and contact info - startOffset: 3600 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3600 - endOffset: 3561 --- Links: diff --git a/_podcast/s02e07-abc-data-science.md b/_podcast/data-science-career-abc-framework.md similarity index 98% rename from _podcast/s02e07-abc-data-science.md rename to _podcast/data-science-career-abc-framework.md index f2580cce..189e5132 100644 --- a/_podcast/s02e07-abc-data-science.md +++ b/_podcast/data-science-career-abc-framework.md @@ -1,12 +1,11 @@ --- -title: 'Data Science Career Guide: ABC Framework (Analyst, Builder, Consultant) & - Transition Tips' +title: 'Data Science Career Guide: ABC Framework (Analyst, Builder, Consultant) & Transition Tips' short: The ABC’s of Data Science +season: 2 +episode: 7 guests: - dannyma image: images/podcast/s02e07-abc-data-science.jpg -season: 2 -episode: 7 ids: youtube: HVQ0DZOQcts anchor: The-ABCs-of-Data-Science---Danny-Ma-er33oa @@ -15,6 +14,140 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-ABCs-of-Data-Science---Danny-Ma-er33oa spotify: https://open.spotify.com/episode/5T1Nm3HvrS9oIMH6C2AWcf apple: https://podcasts.apple.com/us/podcast/the-abcs-of-data-science-danny-ma/id1541710331?i=1000510794953 + +description: 'Master the Data Science ABC Framework: Analyst, Builder, Consultant. Get SQL, Python, MLOps career tips, project roadmap, transition strategies to land roles.' +intro: 'How do you pick the right data science path—and actually make the transition? In this episode, Danny Ma, a recovering data scientist now focused on ML and data engineering, walks through his ABC Framework (Analyst, Builder, Consultant) and pragmatic steps for career moves. Danny, who runs the #DataWithDanny community (4,500+ members) and specializes in analytics, supervised ML, data architecture and digital customer experiments, traces his own shift from SQL/SAS/Excel workflows to Python, Kaggle projects and production systems.

We cover the ABC Framework origins and definitions: Type A (Analyst) — data exploration, visualization and storytelling; Type B (Builder) — ML engineering, MLOps and production mindset; Type C (Consultant/Leader) — stakeholder persuasion and strategy. Danny shares transition tactics: build projects first, learn theory as needed, core tools (Git, Docker, cloud), practicing engineering via mini-projects and mentorship, portfolio and referral strategies, and when advanced degrees matter. Tune in to get concrete guidance on skills to prioritize, how to gain production experience, and a clear roadmap from SQL → visualization → ML → deep learning to advance your data science career.' +topics: +- career transition +- data science +- machine learning +- data analysis +dateadded: 2021-02-26 + +duration: PT01H24M57S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=0 + endOffset: 49 +- name: LinkedIn Memes & Creative Editing for Data Audiences + startOffset: 49 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=49 + endOffset: 213 +- name: 'Career Journey: Analytics to Data Science' + startOffset: 213 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=213 + endOffset: 296 +- name: Transition to Python, Kaggle & Self-Directed Learning + startOffset: 296 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=296 + endOffset: 392 +- name: 'Early Tools: SQL, SAS and Excel Workflows' + startOffset: 392 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=392 + endOffset: 499 +- name: 'Moving into Data Science: Team Integration at a Bank' + startOffset: 499 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=499 + endOffset: 546 +- name: 'Machine Learning Projects: Propensity Models & Experimentation' + startOffset: 546 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=546 + endOffset: 689 +- name: Origins of the ABC Framework for Data Science Roles + startOffset: 689 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=689 + endOffset: 738 +- name: 'Defining the Three Profiles: Analyst, Builder, Consultant' + startOffset: 738 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=738 + endOffset: 797 +- name: 'Type A (Analyst): Data Exploration, Visualization & Storytelling' + startOffset: 797 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=797 + endOffset: 961 +- name: 'Type A Backgrounds: Research, Statistics & Analyst Pathways' + startOffset: 961 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=961 + endOffset: 1100 +- name: 'Type A Skillset: Programming, Theory, Experiment Design' + startOffset: 1100 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1100 + endOffset: 1201 +- name: 'Learning Strategy: Build Projects First, Learn Theory When Needed' + startOffset: 1201 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1201 + endOffset: 1314 +- name: 'Curiosity Spectrum: Depth of Inquiry & Learning Motivation' + startOffset: 1314 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1314 + endOffset: 1553 +- name: 'Type B (Builder): ML Engineering, MLOps & Production Systems' + startOffset: 1553 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1553 + endOffset: 1706 +- name: Technical Debt, Production Mindset & Systemic Risk + startOffset: 1706 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1706 + endOffset: 1826 +- name: 'Pathway A→B: Gaining Production Experience & On-the-Job Pressure' + startOffset: 1826 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1826 + endOffset: 1992 +- name: 'Core Tools for Transition: Git, Docker, Cloud Platforms' + startOffset: 1992 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1992 + endOffset: 2206 +- name: 'Practicing Engineering Skills Outside Work: Mentors & Mini-Projects' + startOffset: 2206 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2206 + endOffset: 2558 +- name: 'Type C (Consultant/Leader): Stakeholder Persuasion & Strategy' + startOffset: 2558 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2558 + endOffset: 2929 +- name: 'Testing Leadership: Shifting from Hands-On to People Management' + startOffset: 2929 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2929 + endOffset: 3288 +- name: 'Building a Lean Data Science Team: Roles, Tech Lead & Data Lead' + startOffset: 3288 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3288 + endOffset: 3716 +- name: Domain Expertise vs Technical Specialization for Career Mobility + startOffset: 3716 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3716 + endOffset: 3851 +- name: 'Breaking In: Project Portfolios, Referrals & Application Strategy' + startOffset: 3851 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3851 + endOffset: 4042 +- name: 'Entry Choice: Analyst vs Builder — Trade-offs & Competitive Edge' + startOffset: 4042 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4042 + endOffset: 4346 +- name: 'Bootcamps & Intensives: Benefits, Limits & Realistic Expectations' + startOffset: 4346 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4346 + endOffset: 4477 +- name: 'Serious SQL Course: Curriculum, Case Studies & Apprenticeship Model' + startOffset: 4477 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4477 + endOffset: 4745 +- name: 'Data Science Roadmap: SQL → Visualization → ML → Deep Learning' + startOffset: 4745 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4745 + endOffset: 4984 +- name: 'Advanced Degrees: When Master''s/PhD Matter in Data Science Roles' + startOffset: 4984 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4984 + endOffset: 5121 +- name: Episode Wrap-up, Resources & Next Steps + startOffset: 5121 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=5121 + endOffset: 5097 + transcript: - header: Podcast Introduction - header: LinkedIn Memes & Creative Editing for Data Audiences @@ -1251,147 +1384,6 @@ transcript: sec: 5146 time: '1:25:46' who: Danny -description: 'Master the Data Science ABC Framework: Analyst, Builder, Consultant. - Get SQL, Python, MLOps career tips, project roadmap, transition strategies to land - roles.' -intro: 'How do you pick the right data science path—and actually make the transition? - In this episode, Danny Ma, a recovering data scientist now focused on ML and data - engineering, walks through his ABC Framework (Analyst, Builder, Consultant) and - pragmatic steps for career moves. Danny, who runs the #DataWithDanny community (4,500+ - members) and specializes in analytics, supervised ML, data architecture and digital - customer experiments, traces his own shift from SQL/SAS/Excel workflows to Python, - Kaggle projects and production systems.

We cover the ABC Framework origins - and definitions: Type A (Analyst) — data exploration, visualization and storytelling; - Type B (Builder) — ML engineering, MLOps and production mindset; Type C (Consultant/Leader) - — stakeholder persuasion and strategy. Danny shares transition tactics: build projects - first, learn theory as needed, core tools (Git, Docker, cloud), practicing engineering - via mini-projects and mentorship, portfolio and referral strategies, and when advanced - degrees matter. Tune in to get concrete guidance on skills to prioritize, how to - gain production experience, and a clear roadmap from SQL → visualization → ML → - deep learning to advance your data science career.' -dateadded: '2021-02-26' -duration: PT01H24M57S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=0 - endOffset: 49 -- name: LinkedIn Memes & Creative Editing for Data Audiences - startOffset: 49 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=49 - endOffset: 213 -- name: 'Career Journey: Analytics to Data Science' - startOffset: 213 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=213 - endOffset: 296 -- name: Transition to Python, Kaggle & Self-Directed Learning - startOffset: 296 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=296 - endOffset: 392 -- name: 'Early Tools: SQL, SAS and Excel Workflows' - startOffset: 392 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=392 - endOffset: 499 -- name: 'Moving into Data Science: Team Integration at a Bank' - startOffset: 499 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=499 - endOffset: 546 -- name: 'Machine Learning Projects: Propensity Models & Experimentation' - startOffset: 546 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=546 - endOffset: 689 -- name: Origins of the ABC Framework for Data Science Roles - startOffset: 689 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=689 - endOffset: 738 -- name: 'Defining the Three Profiles: Analyst, Builder, Consultant' - startOffset: 738 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=738 - endOffset: 797 -- name: 'Type A (Analyst): Data Exploration, Visualization & Storytelling' - startOffset: 797 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=797 - endOffset: 961 -- name: 'Type A Backgrounds: Research, Statistics & Analyst Pathways' - startOffset: 961 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=961 - endOffset: 1100 -- name: 'Type A Skillset: Programming, Theory, Experiment Design' - startOffset: 1100 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1100 - endOffset: 1201 -- name: 'Learning Strategy: Build Projects First, Learn Theory When Needed' - startOffset: 1201 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1201 - endOffset: 1314 -- name: 'Curiosity Spectrum: Depth of Inquiry & Learning Motivation' - startOffset: 1314 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1314 - endOffset: 1553 -- name: 'Type B (Builder): ML Engineering, MLOps & Production Systems' - startOffset: 1553 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1553 - endOffset: 1706 -- name: Technical Debt, Production Mindset & Systemic Risk - startOffset: 1706 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1706 - endOffset: 1826 -- name: 'Pathway A→B: Gaining Production Experience & On-the-Job Pressure' - startOffset: 1826 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1826 - endOffset: 1992 -- name: 'Core Tools for Transition: Git, Docker, Cloud Platforms' - startOffset: 1992 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1992 - endOffset: 2206 -- name: 'Practicing Engineering Skills Outside Work: Mentors & Mini-Projects' - startOffset: 2206 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2206 - endOffset: 2558 -- name: 'Type C (Consultant/Leader): Stakeholder Persuasion & Strategy' - startOffset: 2558 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2558 - endOffset: 2929 -- name: 'Testing Leadership: Shifting from Hands-On to People Management' - startOffset: 2929 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2929 - endOffset: 3288 -- name: 'Building a Lean Data Science Team: Roles, Tech Lead & Data Lead' - startOffset: 3288 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3288 - endOffset: 3716 -- name: Domain Expertise vs Technical Specialization for Career Mobility - startOffset: 3716 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3716 - endOffset: 3851 -- name: 'Breaking In: Project Portfolios, Referrals & Application Strategy' - startOffset: 3851 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3851 - endOffset: 4042 -- name: 'Entry Choice: Analyst vs Builder — Trade-offs & Competitive Edge' - startOffset: 4042 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4042 - endOffset: 4346 -- name: 'Bootcamps & Intensives: Benefits, Limits & Realistic Expectations' - startOffset: 4346 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4346 - endOffset: 4477 -- name: 'Serious SQL Course: Curriculum, Case Studies & Apprenticeship Model' - startOffset: 4477 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4477 - endOffset: 4745 -- name: 'Data Science Roadmap: SQL → Visualization → ML → Deep Learning' - startOffset: 4745 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4745 - endOffset: 4984 -- name: 'Advanced Degrees: When Master''s/PhD Matter in Data Science Roles' - startOffset: 4984 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4984 - endOffset: 5121 -- name: Episode Wrap-up, Resources & Next Steps - startOffset: 5121 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=5121 - endOffset: 5097 --- Links: diff --git a/_podcast/s03e09-what-data-scientists-dont-mention.md b/_podcast/data-science-failures-and-mlops-lessons.md similarity index 97% rename from _podcast/s03e09-what-data-scientists-dont-mention.md rename to _podcast/data-science-failures-and-mlops-lessons.md index 327b25da..a59121d5 100644 --- a/_podcast/s03e09-what-data-scientists-dont-mention.md +++ b/_podcast/data-science-failures-and-mlops-lessons.md @@ -1,12 +1,11 @@ --- -title: 'Turn Data Science Project Failures into Career Wins: Production Lessons, MLOps - Fixes & Framing Failures on LinkedIn' +title: 'Turn Data Science Project Failures into Career Wins: Production Lessons, MLOps Fixes & Framing Failures on LinkedIn' short: What Data Scientists Don’t Mention in Their LinkedIn Profiles +season: 3 +episode: 9 guests: - yurykashnitsky image: images/podcast/s03e09-what-data-scientists-dont-mention.jpg -season: 3 -episode: 9 ids: youtube: c6dK1LWpv4g anchor: What-Data-Scientists-Dont-Mention-in-Their-LinkedIn-Profiles---Yury-Kashnitsky-e125jjl @@ -15,6 +14,136 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/What-Data-Scientists-Dont-Mention-in-Their-LinkedIn-Profiles---Yury-Kashnitsky-e125jjl spotify: https://open.spotify.com/episode/3KR6zErxqeDuQ2jo8NDvNx apple: https://podcasts.apple.com/us/podcast/what-data-scientists-dont-mention-in-their-linkedin/id1541710331?i=1000524260842 + +description: 'Discover how to turn data science project failures into career wins: practical MLOps fixes, production lessons, LinkedIn framing tips to boost hiring outcomes.' +intro: 'How do you turn data science project failures into tangible career wins — and how should you talk about them on LinkedIn? In this episode, Yury Kashnitsky, Ph.D. in applied math, Kaggle Master and Senior ML Scientist at Elsevier who also leads the open course mlcourse.ai, walks through real production ML lessons and MLOps fixes learned across academia, startups and industry.

We dig into common data science pitfalls and a concrete case study (a BERT-based proofreading regression stopped early), stakeholder communication for when to kill a project, and the missing role of a data product manager. Yury breaks down engineering vs research trade-offs in deployment, production fixes like reducing re-ranking scope to meet latency, when gradient boosting beats CTR heuristics, and DevOps anti-patterns such as SSH deploys and no CI/CD. We also cover practical topics: data labeling cost/quality, going from notebooks to production, multilingual telco NLP, resume choices, interview questions about revenue-producing ML, and how to frame failed projects on LinkedIn with honesty and lessons learned.

Listen to get actionable MLOps and production-ML strategies, communication tactics for stakeholders, and guidance on reframing failures into career momentum.' +topics: +- machine learning +- MLOps +- career growth +- communication +dateadded: 2021-06-06 + +duration: PT00H59M57S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=0 + endOffset: 90 +- name: 'Episode Theme: Failures and LinkedIn Omissions' + startOffset: 90 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=90 + endOffset: 152 +- name: 'Guest Opening: Background Snapshot' + startOffset: 152 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=152 + endOffset: 185 +- name: 'Career Journey: Aviation, Academia, and Transition to NLP' + startOffset: 185 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=185 + endOffset: 298 +- name: 'CV Choices: Omitting Hobbies and Personal Details' + startOffset: 298 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=298 + endOffset: 335 +- name: 'Project Failures Overview: Common Data Science Pitfalls' + startOffset: 335 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=335 + endOffset: 382 +- name: 'Case Study — Proofreading AI: BERT Regression and Early Termination' + startOffset: 382 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=382 + endOffset: 666 +- name: 'Stakeholder Communication: Making the Call to Stop a Project' + startOffset: 666 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=666 + endOffset: 691 +- name: 'Product Management Gap: Value of a Data Product Manager' + startOffset: 691 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=691 + endOffset: 1006 +- name: 'Customer Development: Rapid Validation vs Building ML First' + startOffset: 1006 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1006 + endOffset: 1080 +- name: 'Engineering vs Research: Deployment and Serving Constraints' + startOffset: 1080 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1080 + endOffset: 1144 +- name: 'Production Lesson: Gradient Boosting vs CTR Heuristic Baseline' + startOffset: 1144 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1144 + endOffset: 1525 +- name: 'Performance Fix: Re-ranking Scope Reduction to Meet Latency' + startOffset: 1525 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1525 + endOffset: 1556 +- name: 'DevOps Anti-patterns: SSH Deploys, No CI/CD and Technical Debt' + startOffset: 1556 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1556 + endOffset: 1691 +- name: 'From Notebooks to Production: BI, LTV Predictions, and MLOps Needs' + startOffset: 1691 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1691 + endOffset: 1844 +- name: 'Startup Anecdote: GPU Overstock, Bitcoin, and Sentiment Analysis' + startOffset: 1844 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1844 + endOffset: 2076 +- name: 'Data Labeling Reality: Cost, Quality, and Mechanical Turk' + startOffset: 2076 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2076 + endOffset: 2118 +- name: 'Resume Strategy: Omitting Short or Sensitive Startup Stints' + startOffset: 2118 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2118 + endOffset: 2172 +- name: 'Telco NLP: Multilingual Complaint Classification & Transfer Learning' + startOffset: 2172 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2172 + endOffset: 2394 +- name: 'Too Much Freedom: Research Time vs Impactful Production Work' + startOffset: 2394 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2394 + endOffset: 2467 +- name: 'Interview Tip: Ask About Active Revenue-Producing ML in Production' + startOffset: 2467 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2467 + endOffset: 2600 +- name: 'Digital Presence: GitHub, Open Courses, Talks and Hiring Impact' + startOffset: 2600 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2600 + endOffset: 2735 +- name: 'Work-Life Balance Hacks: Focus Time and Side Projects' + startOffset: 2735 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2735 + endOffset: 2907 +- name: 'Public Activity ROI: A/B Tests, Talks, and Career Opportunities' + startOffset: 2907 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2907 + endOffset: 2970 +- name: 'Framing Failed Projects on LinkedIn: Honesty and Lessons Learned' + startOffset: 2970 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2970 + endOffset: 3161 +- name: 'Business-Travel Boundaries: Perm Trips and Weekend Work Limits' + startOffset: 3161 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3161 + endOffset: 3516 +- name: 'Closing Thoughts: Embracing Failures and Building Resilience' + startOffset: 3516 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3516 + endOffset: 3624 +- name: 'Contact & Resources: Open Course and Social Links' + startOffset: 3624 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3624 + endOffset: 3687 +- name: Episode Outro + startOffset: 3687 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3687 + endOffset: 3597 + transcript: - header: Episode Introduction - header: 'Episode Theme: Failures and LinkedIn Omissions' @@ -807,144 +936,6 @@ transcript: sec: 3687 time: '1:01:27' who: Yury -description: 'Discover how to turn data science project failures into career wins: - practical MLOps fixes, production lessons, LinkedIn framing tips to boost hiring - outcomes.' -intro: 'How do you turn data science project failures into tangible career wins — - and how should you talk about them on LinkedIn? In this episode, Yury Kashnitsky, - Ph.D. in applied math, Kaggle Master and Senior ML Scientist at Elsevier who also - leads the open course mlcourse.ai, walks through real production ML lessons and - MLOps fixes learned across academia, startups and industry.

We dig into - common data science pitfalls and a concrete case study (a BERT-based proofreading - regression stopped early), stakeholder communication for when to kill a project, - and the missing role of a data product manager. Yury breaks down engineering vs - research trade-offs in deployment, production fixes like reducing re-ranking scope - to meet latency, when gradient boosting beats CTR heuristics, and DevOps anti-patterns - such as SSH deploys and no CI/CD. We also cover practical topics: data labeling - cost/quality, going from notebooks to production, multilingual telco NLP, resume - choices, interview questions about revenue-producing ML, and how to frame failed - projects on LinkedIn with honesty and lessons learned.

Listen to get actionable - MLOps and production-ML strategies, communication tactics for stakeholders, and - guidance on reframing failures into career momentum.' -dateadded: '2021-06-06' -duration: PT00H59M57S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=0 - endOffset: 90 -- name: 'Episode Theme: Failures and LinkedIn Omissions' - startOffset: 90 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=90 - endOffset: 152 -- name: 'Guest Opening: Background Snapshot' - startOffset: 152 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=152 - endOffset: 185 -- name: 'Career Journey: Aviation, Academia, and Transition to NLP' - startOffset: 185 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=185 - endOffset: 298 -- name: 'CV Choices: Omitting Hobbies and Personal Details' - startOffset: 298 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=298 - endOffset: 335 -- name: 'Project Failures Overview: Common Data Science Pitfalls' - startOffset: 335 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=335 - endOffset: 382 -- name: 'Case Study — Proofreading AI: BERT Regression and Early Termination' - startOffset: 382 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=382 - endOffset: 666 -- name: 'Stakeholder Communication: Making the Call to Stop a Project' - startOffset: 666 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=666 - endOffset: 691 -- name: 'Product Management Gap: Value of a Data Product Manager' - startOffset: 691 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=691 - endOffset: 1006 -- name: 'Customer Development: Rapid Validation vs Building ML First' - startOffset: 1006 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1006 - endOffset: 1080 -- name: 'Engineering vs Research: Deployment and Serving Constraints' - startOffset: 1080 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1080 - endOffset: 1144 -- name: 'Production Lesson: Gradient Boosting vs CTR Heuristic Baseline' - startOffset: 1144 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1144 - endOffset: 1525 -- name: 'Performance Fix: Re-ranking Scope Reduction to Meet Latency' - startOffset: 1525 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1525 - endOffset: 1556 -- name: 'DevOps Anti-patterns: SSH Deploys, No CI/CD and Technical Debt' - startOffset: 1556 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1556 - endOffset: 1691 -- name: 'From Notebooks to Production: BI, LTV Predictions, and MLOps Needs' - startOffset: 1691 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1691 - endOffset: 1844 -- name: 'Startup Anecdote: GPU Overstock, Bitcoin, and Sentiment Analysis' - startOffset: 1844 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1844 - endOffset: 2076 -- name: 'Data Labeling Reality: Cost, Quality, and Mechanical Turk' - startOffset: 2076 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2076 - endOffset: 2118 -- name: 'Resume Strategy: Omitting Short or Sensitive Startup Stints' - startOffset: 2118 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2118 - endOffset: 2172 -- name: 'Telco NLP: Multilingual Complaint Classification & Transfer Learning' - startOffset: 2172 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2172 - endOffset: 2394 -- name: 'Too Much Freedom: Research Time vs Impactful Production Work' - startOffset: 2394 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2394 - endOffset: 2467 -- name: 'Interview Tip: Ask About Active Revenue-Producing ML in Production' - startOffset: 2467 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2467 - endOffset: 2600 -- name: 'Digital Presence: GitHub, Open Courses, Talks and Hiring Impact' - startOffset: 2600 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2600 - endOffset: 2735 -- name: 'Work-Life Balance Hacks: Focus Time and Side Projects' - startOffset: 2735 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2735 - endOffset: 2907 -- name: 'Public Activity ROI: A/B Tests, Talks, and Career Opportunities' - startOffset: 2907 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2907 - endOffset: 2970 -- name: 'Framing Failed Projects on LinkedIn: Honesty and Lessons Learned' - startOffset: 2970 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2970 - endOffset: 3161 -- name: 'Business-Travel Boundaries: Perm Trips and Weekend Work Limits' - startOffset: 3161 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3161 - endOffset: 3516 -- name: 'Closing Thoughts: Embracing Failures and Building Resilience' - startOffset: 3516 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3516 - endOffset: 3624 -- name: 'Contact & Resources: Open Course and Social Links' - startOffset: 3624 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3624 - endOffset: 3687 -- name: Episode Outro - startOffset: 3687 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3687 - endOffset: 3597 --- diff --git a/_podcast/s10e01-data-science-for-social-impact.md b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md similarity index 97% rename from _podcast/s10e01-data-science-for-social-impact.md rename to _podcast/data-science-for-public-policy-ethical-ai-social-impact.md index d1fda942..3dcbacb3 100644 --- a/_podcast/s10e01-data-science-for-social-impact.md +++ b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md @@ -1,19 +1,130 @@ --- +title: Data Science for Public Policy — Ethical AI, Climate Justice & Impact Projects +short: Data Science for Social Impact +season: 10 episode: 1 guests: - christinecepelak +image: images/podcast/s10e01-data-science-for-social-impact.jpg ids: anchor: Data-Science-for-Social-Impact---Christine-Cepelak-e1li47e youtube: xWC1HAfekRk -image: images/podcast/s10e01-data-science-for-social-impact.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Science-for-Social-Impact---Christine-Cepelak-e1li47e apple: https://podcasts.apple.com/us/podcast/data-science-for-social-impact-christine-cepelak/id1541710331?i=1000571591865 spotify: https://open.spotify.com/episode/7fzBhDrfVfylnBLCJGwUHC?si=3b03d59083804346 youtube: https://www.youtube.com/watch?v=xWC1HAfekRk -season: 10 -short: Data Science for Social Impact -title: Data Science for Public Policy — Ethical AI, Climate Justice & Impact Projects + +description: 'Learn data science for public policy: ethical AI, climate justice & impact project strategies, career tips and actionable project design for social good.' +intro: How can data science meaningfully shape public policy without becoming a tech-first solution or creating new ethical harms? In this episode, Christine Cepelak, a writer and researcher of tech and social issues who’s studying Data Science for Public Policy and has years of experience managing social programs, walks through the practical realities of data science for public policy. We cover career paths and sector differences, a community organizing case study on electronics recycling, and real-world use cases like drone computer vision for refugee aid and rooftop sustainability. Christine digs into ethical AI concerns — including the EU AI Act and social scoring risks — plus project design for long-term impact, stakeholder collaboration with NGOs, and building data pipelines amid limited IT infrastructure. Listeners will also hear about public data gaps (recycling programs, corporate transparency), research applications such as satellite imagery for poverty estimation, and future priorities like climate justice and gender equality. Tune in to get concrete guidance on starting volunteer impact projects, where demand for impact data scientists lies, and how to design responsible, policy-driven data work +topics: +- data science +- public policy +- ethical AI +- social impact +dateadded: 2022-07-30 + +duration: PT00H58M44S + +quotableClips: +- name: Episode Intro & Guest Christine Cepelak + startOffset: 0 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=0 + endOffset: 93 +- name: 'Career Journey: Program Management to Data Science for Public Policy' + startOffset: 93 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=93 + endOffset: 247 +- name: 'Private vs Public Sector: Differences for Data Work' + startOffset: 247 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=247 + endOffset: 325 +- name: 'Public Policy Defined: Laws, Governance & Social Impact' + startOffset: 325 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=325 + endOffset: 507 +- name: 'Community Organizing Case Study: Electronics Recycling Campaign' + startOffset: 507 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=507 + endOffset: 614 +- name: 'Policy vs Political Science: Theory, Practice & Implementation' + startOffset: 614 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=614 + endOffset: 730 +- name: 'Education Landscape: Data Science for Public Policy Programs & DSSG' + startOffset: 730 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=730 + endOffset: 862 +- name: 'Domain Nuances: Data Science for Social Impact vs Typical Industry Work' + startOffset: 862 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=862 + endOffset: 1032 +- name: 'Use Cases: Drone Computer Vision for Refugee Aid & Rooftop Sustainability' + startOffset: 1032 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1032 + endOffset: 1266 +- name: 'Ethics & Regulation: Ethical AI, EU AI Act and Social Scoring Risks' + startOffset: 1266 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1266 + endOffset: 1581 +- name: 'Project Design: Long-term Impact, Iteration & Avoiding Tech-First Solutions' + startOffset: 1581 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1581 + endOffset: 1686 +- name: 'Stakeholder Collaboration: NGOs, HR Use Cases & Mining Domain Knowledge' + startOffset: 1686 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1686 + endOffset: 1832 +- name: 'Data Challenges: Building Pipelines with Limited IT Infrastructure' + startOffset: 1832 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1832 + endOffset: 2209 +- name: 'Public Data Gaps: Recycling Programs, Corporate Transparency & Access Issues' + startOffset: 2209 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2209 + endOffset: 2367 +- name: 'Future Focus Areas: Climate Justice, Gender Equality & Responsible Tech' + startOffset: 2367 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2367 + endOffset: 2419 +- name: 'Gender Inequality Solutions: Salary Transparency & Inclusive Hiring Policies' + startOffset: 2419 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2419 + endOffset: 2618 +- name: 'Corporate Responsibility: CSR Data Needs & Demand for Impact Data Scientists' + startOffset: 2618 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2618 + endOffset: 2785 +- name: 'Public Sector Roles: Chief Data Scientist and Government Data Strategy' + startOffset: 2785 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2785 + endOffset: 2899 +- name: 'Getting Started: Volunteer Projects, SDGs & Finding a Cause' + startOffset: 2899 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2899 + endOffset: 3083 +- name: 'Project Marketplaces & Career Advice: DSSG Projects and 80,000 Hours' + startOffset: 3083 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3083 + endOffset: 3256 +- name: 'Research Applications: Satellite Imagery for Poverty Estimation & Census + Gaps' + startOffset: 3256 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3256 + endOffset: 3382 +- name: 'Community Discussion: Ethics in AI Coffee Chats and Emerging Debates' + startOffset: 3382 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3382 + endOffset: 3486 +- name: 'Connect with Christine: Website, LinkedIn & Twitter' + startOffset: 3486 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3486 + endOffset: 3503 +- name: Episode Wrap-up & Resource Links + startOffset: 3503 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3503 + endOffset: 3524 + transcript: - header: Episode Intro & Guest Christine Cepelak - line: This week, we'll talk about data science for social impact. We have a special @@ -1154,124 +1265,6 @@ transcript: sec: 3524 time: '58:44' who: Christine -description: 'Learn data science for public policy: ethical AI, climate justice & - impact project strategies, career tips and actionable project design for social - good.' -intro: How can data science meaningfully shape public policy without becoming a tech-first - solution or creating new ethical harms? In this episode, Christine Cepelak, a writer - and researcher of tech and social issues who’s studying Data Science for Public - Policy and has years of experience managing social programs, walks through the practical - realities of data science for public policy. We cover career paths and sector differences, - a community organizing case study on electronics recycling, and real-world use cases - like drone computer vision for refugee aid and rooftop sustainability. Christine - digs into ethical AI concerns — including the EU AI Act and social scoring risks - — plus project design for long-term impact, stakeholder collaboration with NGOs, - and building data pipelines amid limited IT infrastructure. Listeners will also - hear about public data gaps (recycling programs, corporate transparency), research - applications such as satellite imagery for poverty estimation, and future priorities - like climate justice and gender equality. Tune in to get concrete guidance on starting - volunteer impact projects, where demand for impact data scientists lies, and how - to design responsible, policy-driven data work. -dateadded: '2022-07-30' -duration: PT00H58M44S -quotableClips: -- name: Episode Intro & Guest Christine Cepelak - startOffset: 0 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=0 - endOffset: 93 -- name: 'Career Journey: Program Management to Data Science for Public Policy' - startOffset: 93 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=93 - endOffset: 247 -- name: 'Private vs Public Sector: Differences for Data Work' - startOffset: 247 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=247 - endOffset: 325 -- name: 'Public Policy Defined: Laws, Governance & Social Impact' - startOffset: 325 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=325 - endOffset: 507 -- name: 'Community Organizing Case Study: Electronics Recycling Campaign' - startOffset: 507 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=507 - endOffset: 614 -- name: 'Policy vs Political Science: Theory, Practice & Implementation' - startOffset: 614 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=614 - endOffset: 730 -- name: 'Education Landscape: Data Science for Public Policy Programs & DSSG' - startOffset: 730 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=730 - endOffset: 862 -- name: 'Domain Nuances: Data Science for Social Impact vs Typical Industry Work' - startOffset: 862 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=862 - endOffset: 1032 -- name: 'Use Cases: Drone Computer Vision for Refugee Aid & Rooftop Sustainability' - startOffset: 1032 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1032 - endOffset: 1266 -- name: 'Ethics & Regulation: Ethical AI, EU AI Act and Social Scoring Risks' - startOffset: 1266 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1266 - endOffset: 1581 -- name: 'Project Design: Long-term Impact, Iteration & Avoiding Tech-First Solutions' - startOffset: 1581 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1581 - endOffset: 1686 -- name: 'Stakeholder Collaboration: NGOs, HR Use Cases & Mining Domain Knowledge' - startOffset: 1686 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1686 - endOffset: 1832 -- name: 'Data Challenges: Building Pipelines with Limited IT Infrastructure' - startOffset: 1832 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1832 - endOffset: 2209 -- name: 'Public Data Gaps: Recycling Programs, Corporate Transparency & Access Issues' - startOffset: 2209 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2209 - endOffset: 2367 -- name: 'Future Focus Areas: Climate Justice, Gender Equality & Responsible Tech' - startOffset: 2367 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2367 - endOffset: 2419 -- name: 'Gender Inequality Solutions: Salary Transparency & Inclusive Hiring Policies' - startOffset: 2419 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2419 - endOffset: 2618 -- name: 'Corporate Responsibility: CSR Data Needs & Demand for Impact Data Scientists' - startOffset: 2618 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2618 - endOffset: 2785 -- name: 'Public Sector Roles: Chief Data Scientist and Government Data Strategy' - startOffset: 2785 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2785 - endOffset: 2899 -- name: 'Getting Started: Volunteer Projects, SDGs & Finding a Cause' - startOffset: 2899 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2899 - endOffset: 3083 -- name: 'Project Marketplaces & Career Advice: DSSG Projects and 80,000 Hours' - startOffset: 3083 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3083 - endOffset: 3256 -- name: 'Research Applications: Satellite Imagery for Poverty Estimation & Census - Gaps' - startOffset: 3256 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3256 - endOffset: 3382 -- name: 'Community Discussion: Ethics in AI Coffee Chats and Emerging Debates' - startOffset: 3382 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3382 - endOffset: 3486 -- name: 'Connect with Christine: Website, LinkedIn & Twitter' - startOffset: 3486 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3486 - endOffset: 3503 -- name: Episode Wrap-up & Resource Links - startOffset: 3503 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3503 - endOffset: 3524 --- Links: diff --git a/_podcast/s03e04-interviewing-300-data-scientists.md b/_podcast/data-science-interview-and-cv-guide.md similarity index 96% rename from _podcast/s03e04-interviewing-300-data-scientists.md rename to _podcast/data-science-interview-and-cv-guide.md index 45fc9a2d..b425ca5a 100644 --- a/_podcast/s03e04-interviewing-300-data-scientists.md +++ b/_podcast/data-science-interview-and-cv-guide.md @@ -1,14 +1,11 @@ --- -title: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews - & Negotiation' -short: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews - & Negotiation' +title: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation' +short: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation' +season: 3 +episode: 4 guests: - olegnovikov image: images/podcast/s03e04-interviewing-300-data-scientists.jpg -season: 3 -episode: 4 -date: 2025-11-07 ids: youtube: AYi7b-8GPm4 anchor: What-I-Learned-After-Interviewing-300-Data-Scientists---Oleg-Novikov-e10ctbs @@ -17,15 +14,133 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/What-I-Learned-After-Interviewing-300-Data-Scientists---Oleg-Novikov-e10ctbs spotify: https://open.spotify.com/episode/406wN6xDkYPyLS8i9fUJL5 apple: https://podcasts.apple.com/us/podcast/what-i-learned-after-interviewing-300-data-scientists/id1541710331?i=1000520681105 + +description: Master CV optimization, take-home projects and mock interviews to land data science offers—learn SQL/ML prep, negotiation tactics and measurable project impact +intro: How do you make your data science application stand out, ace take-home projects, and negotiate an offer without leaving money on the table? In this episode, Oleg Novikov — creator of NextRound and former data science manager at Uber with a background in data and software engineering — walks through a practical data science interview guide covering CV optimization, take-home projects, mock interviews, and negotiation.

We dig into career trajectory from engineering to product data science, building projects that differentiate your application, and concrete product work like forecasting and LTV. Oleg demonstrates NextRound's mock-interview chatbot and personalized feedback, explains common hiring funnels (recruiter screen → take-home → interviews), and contrasts product data scientist vs. machine learning engineer expectations. You'll hear specific advice on treating your CV as a landing page, highlighting personal contributions, crafting case-study narratives from business goals to evaluation metrics, and preparing for technical assessments (ML fundamentals, SQL window functions, coding). We also cover handling rejection, replying graciously, evaluating offers, negotiation tactics when your current salary is low, and practical steps for PhDs breaking into industry.

Listen for actionable steps to refine your data science resume, prioritize take-home ROI, and use mock interviews to iterate faster +topics: +- data science +- software engineering +- machine learning +- career growth +- career transition +- job search +dateadded: 2021-05-07 +date: 2025-11-07 + +duration: PT01H08M38S + +quotableClips: +- name: Introduction & Episode Overview + startOffset: 76 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=76 + endOffset: 120 +- name: 'Career Path: Engineer → Recommenders → Data Science Management' + startOffset: 120 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=120 + endOffset: 162 +- name: 'Differentiating Application: Building a Project to Showcase Skills' + startOffset: 162 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=162 + endOffset: 319 +- name: 'Product Data Science at Uber: Forecasting & LTV Work' + startOffset: 319 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=319 + endOffset: 398 +- name: 'NextRound: Mock Interview Chatbot with Personalized Feedback' + startOffset: 398 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=398 + endOffset: 509 +- name: Why Companies Provide Generic Rejection Messages + startOffset: 509 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=509 + endOffset: 655 +- name: 'Designing Interview Scenarios: Common On-the-Job Dilemmas' + startOffset: 655 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=655 + endOffset: 804 +- name: 'Typical Hiring Funnel: Recruiter Screen → Take-Home → Interview Rounds' + startOffset: 804 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=804 + endOffset: 929 +- name: 'Role Spectrum: Product Data Scientist vs. Machine Learning Engineer' + startOffset: 929 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=929 + endOffset: 1033 +- name: 'Job Description Focus: Tailoring Your Application to the Role' + startOffset: 1033 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1033 + endOffset: 1108 +- name: 'CV Optimization: Treat Your CV as a Landing Page' + startOffset: 1108 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1108 + endOffset: 1551 +- name: 'CV Details: Highlight Personal Contribution and Remove Noise' + startOffset: 1551 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1551 + endOffset: 1671 +- name: 'Take-Home Projects: Time Investment and ROI Considerations' + startOffset: 1671 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1671 + endOffset: 1772 +- name: 'Behavioral Stories: Preparing Impactful Past-Project Narratives' + startOffset: 1772 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1772 + endOffset: 1923 +- name: 'Case Study Strategy: From Business Goals to Evaluation Metrics' + startOffset: 1923 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1923 + endOffset: 2198 +- name: 'Technical Assessments: ML Knowledge, SQL (Window Functions), and Coding' + startOffset: 2198 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2198 + endOffset: 2350 +- name: 'Handling Rejection: Ask for Feedback and Reapply Strategically' + startOffset: 2350 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2350 + endOffset: 2522 +- name: 'Offer Evaluation: Components, Market Comparison, and Negotiation' + startOffset: 2522 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2522 + endOffset: 2678 +- name: 'Personal Data on CV: Avoid Age, Photo, and Irrelevant Details' + startOffset: 2678 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2678 + endOffset: 2746 +- name: 'PhD to Industry: Cold-Start Projects, Synthetic Data, and Blogging' + startOffset: 2746 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2746 + endOffset: 2950 +- name: 'Replying to Rejections: Be Gracious and Preserve Relationships' + startOffset: 2950 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2950 + endOffset: 3017 +- name: Negotiation Tactics When Current Salary Is Low + startOffset: 3017 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3017 + endOffset: 3189 +- name: 'Applying Despite Experience Gaps: When It Makes Sense to Try' + startOffset: 3189 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3189 + endOffset: 3317 +- name: 'ATS Reality: Parsing Myths vs. Human Screening' + startOffset: 3317 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3317 + endOffset: 3494 +- name: 'Key Lessons from Hundreds of Interviews: Avoid Bias & Iterate' + startOffset: 3494 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3494 + endOffset: 3864 +- name: 'Rethinking CV Format: Historical Constraints and Modern Design' + startOffset: 3864 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3864 + endOffset: 4166 +- name: Closing Remarks and NextRound Resources + startOffset: 4166 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=4166 + endOffset: 4118 + transcript: - header: Introduction & Episode Overview -- line: This week we will talk about the interview process, getting hired as a data - scientist — and not only data scientists. We have a special guest today — Oleg. - Oleg worked as a data science manager at Uber, where he built data science teams. - He also has experience building several startups in Europe. Recently he created - NextRound which is a free service for practicing interviews, receiving personalized - feedback, and learning materials. Welcome! -- header: Introduction & Episode Overview - line: This week we will talk about the interview process, getting hired as a data scientist — and not only data scientists. We have a special guest today — Oleg. Oleg worked as a data science manager at Uber, where he built data science teams. @@ -927,138 +1042,6 @@ transcript: sec: 4194 time: '1:09:54' who: Alexey -intro: How do you make your data science application stand out, ace take-home projects, - and negotiate an offer without leaving money on the table? In this episode, Oleg - Novikov — creator of NextRound and former data science manager at Uber with a background - in data and software engineering — walks through a practical data science interview - guide covering CV optimization, take-home projects, mock interviews, and negotiation. -

We dig into career trajectory from engineering to product data science, - building projects that differentiate your application, and concrete product work - like forecasting and LTV. Oleg demonstrates NextRound's mock-interview chatbot and - personalized feedback, explains common hiring funnels (recruiter screen → take-home - → interviews), and contrasts product data scientist vs. machine learning engineer - expectations. You'll hear specific advice on treating your CV as a landing page, - highlighting personal contributions, crafting case-study narratives from business - goals to evaluation metrics, and preparing for technical assessments (ML fundamentals, - SQL window functions, coding). We also cover handling rejection, replying graciously, - evaluating offers, negotiation tactics when your current salary is low, and practical - steps for PhDs breaking into industry.

Listen for actionable steps to refine - your data science resume, prioritize take-home ROI, and use mock interviews to iterate - faster. -description: Master CV optimization, take-home projects and mock interviews to land - data science offers—learn SQL/ML prep, negotiation tactics and measurable project - impact. -dateadded: '2021-05-07' -duration: PT01H08M38S -quotableClips: -- name: Introduction & Episode Overview - startOffset: 76 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=76 - endOffset: 120 -- name: 'Career Path: Engineer → Recommenders → Data Science Management' - startOffset: 120 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=120 - endOffset: 162 -- name: 'Differentiating Application: Building a Project to Showcase Skills' - startOffset: 162 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=162 - endOffset: 319 -- name: 'Product Data Science at Uber: Forecasting & LTV Work' - startOffset: 319 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=319 - endOffset: 398 -- name: 'NextRound: Mock Interview Chatbot with Personalized Feedback' - startOffset: 398 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=398 - endOffset: 509 -- name: Why Companies Provide Generic Rejection Messages - startOffset: 509 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=509 - endOffset: 655 -- name: 'Designing Interview Scenarios: Common On-the-Job Dilemmas' - startOffset: 655 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=655 - endOffset: 804 -- name: 'Typical Hiring Funnel: Recruiter Screen → Take-Home → Interview Rounds' - startOffset: 804 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=804 - endOffset: 929 -- name: 'Role Spectrum: Product Data Scientist vs. Machine Learning Engineer' - startOffset: 929 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=929 - endOffset: 1033 -- name: 'Job Description Focus: Tailoring Your Application to the Role' - startOffset: 1033 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1033 - endOffset: 1108 -- name: 'CV Optimization: Treat Your CV as a Landing Page' - startOffset: 1108 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1108 - endOffset: 1551 -- name: 'CV Details: Highlight Personal Contribution and Remove Noise' - startOffset: 1551 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1551 - endOffset: 1671 -- name: 'Take-Home Projects: Time Investment and ROI Considerations' - startOffset: 1671 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1671 - endOffset: 1772 -- name: 'Behavioral Stories: Preparing Impactful Past-Project Narratives' - startOffset: 1772 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1772 - endOffset: 1923 -- name: 'Case Study Strategy: From Business Goals to Evaluation Metrics' - startOffset: 1923 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1923 - endOffset: 2198 -- name: 'Technical Assessments: ML Knowledge, SQL (Window Functions), and Coding' - startOffset: 2198 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2198 - endOffset: 2350 -- name: 'Handling Rejection: Ask for Feedback and Reapply Strategically' - startOffset: 2350 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2350 - endOffset: 2522 -- name: 'Offer Evaluation: Components, Market Comparison, and Negotiation' - startOffset: 2522 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2522 - endOffset: 2678 -- name: 'Personal Data on CV: Avoid Age, Photo, and Irrelevant Details' - startOffset: 2678 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2678 - endOffset: 2746 -- name: 'PhD to Industry: Cold-Start Projects, Synthetic Data, and Blogging' - startOffset: 2746 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2746 - endOffset: 2950 -- name: 'Replying to Rejections: Be Gracious and Preserve Relationships' - startOffset: 2950 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2950 - endOffset: 3017 -- name: Negotiation Tactics When Current Salary Is Low - startOffset: 3017 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3017 - endOffset: 3189 -- name: 'Applying Despite Experience Gaps: When It Makes Sense to Try' - startOffset: 3189 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3189 - endOffset: 3317 -- name: 'ATS Reality: Parsing Myths vs. Human Screening' - startOffset: 3317 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3317 - endOffset: 3494 -- name: 'Key Lessons from Hundreds of Interviews: Avoid Bias & Iterate' - startOffset: 3494 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3494 - endOffset: 3864 -- name: 'Rethinking CV Format: Historical Constraints and Modern Design' - startOffset: 3864 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3864 - endOffset: 4166 -- name: Closing Remarks and NextRound Resources - startOffset: 4166 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=4166 - endOffset: 4118 --- Links: diff --git a/_podcast/s10e02-decoding-data-science-job-descriptions.md b/_podcast/data-science-job-red-flags-and-mismatched-roles.md similarity index 97% rename from _podcast/s10e02-decoding-data-science-job-descriptions.md rename to _podcast/data-science-job-red-flags-and-mismatched-roles.md index ec386eb5..1a9ec781 100644 --- a/_podcast/s10e02-decoding-data-science-job-descriptions.md +++ b/_podcast/data-science-job-red-flags-and-mismatched-roles.md @@ -1,20 +1,153 @@ --- +title: "Data Science Jobs: How to Spot Misleading Job Titles, Hiring Red Flags & Build Better Data Teams" +short: "How to Spot Misleading Job Titles, Hiring Red Flags & Build Better Data Teams" +season: 10 episode: 2 guests: - terezaiofciu -date: 2025-11-07 +image: images/podcast/s10e02-decoding-data-science-job-descriptions.jpg ids: anchor: Decoding-Data-Science-Job-Descriptions---Tereza-Iofciu-e1m079l youtube: bqxBiIwtmX4 -image: images/podcast/s10e02-decoding-data-science-job-descriptions.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Decoding-Data-Science-Job-Descriptions---Tereza-Iofciu-e1m079l apple: https://podcasts.apple.com/us/podcast/decoding-data-science-job-descriptions-tereza-iofciu/id1541710331?i=1000575150345 spotify: https://open.spotify.com/episode/4v6h48B0c0Je8xLMo5zMs5?si=hcMUqpPPQYm2vrdi2py1UQ youtube: https://www.youtube.com/watch?v=bqxBiIwtmX4 -season: 10 -short: Decoding Data Science Job Descriptions -title: Spot Misleading Data Job Titles, Hiring Red Flags & Build Better Data Teams + +description: Discover how to spot misleading job titles, hiring red flags and build stronger data teams-assess tech stacks, interview rigor, salary ranges and career fit +intro: 'How can you tell if a "data scientist" job is really a data engineering role — or a mismatched hire waiting to happen? In this episode, Tereza Iofciu, PhD and seasoned data practitioner, walks through practical ways to spot misleading data job titles, hiring red flags, and how to build clearer, healthier data teams. Tereza brings experience across data science manager, data scientist, data engineer and product manager roles, plus teaching and community leadership (neuefische, PyLadies Hamburg, PSF community award), grounding her advice in real hiring and team-building work.

We cover why companies rename roles, examples from Scala, Elasticsearch, ETL and Airflow stacks, and the costs of vague job descriptions. You’ll get a role-clarity checklist (team structure, objectives, responsibilities vs. tech lists), signals of data maturity, interview pitfalls (time-consuming take-home tasks, syntax-focused tests), red flags in descriptions (long tech lists, “rockstar” language), and tactics for researching employers (LinkedIn, team pages, conference talks). Also discussed: salary transparency, remote-work fit, retention and career ladders.

Listen to learn concrete signals and questions to evaluate job descriptions, interviews, and shape better data hiring and team design.' +topics: +- data science +- data engineering +- career growth +- hiring +- data teams +- team building +- job search +dateadded: 2022-08-06 +date: 2025-11-07 + +duration: PT00H58M45S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=0 + endOffset: 101 +- name: 'Guest Bio: Tereza’s multidisciplinary data roles & community work' + startOffset: 101 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=101 + endOffset: 160 +- name: 'Academic Background: PhD, information retrieval, recommender systems' + startOffset: 160 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=160 + endOffset: 232 +- name: 'Industry Transition: XING to mytaxi/FREE NOW and evolving responsibilities' + startOffset: 232 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=232 + endOffset: 369 +- name: 'Technical Practices at XING: Scala, Elasticsearch, product-driven engineering' + startOffset: 369 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=369 + endOffset: 487 +- name: 'Building Data Infrastructure at mytaxi: ETL, Airflow and platform challenges' + startOffset: 487 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=487 + endOffset: 615 +- name: 'Job Titles vs. Reality: Renaming roles and shaping career narratives' + startOffset: 615 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=615 + endOffset: 667 +- name: 'Coaching Role: Neuefische bootcamp focus on product, teamwork and coaching' + startOffset: 667 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=667 + endOffset: 802 +- name: 'Teaching Challenges: PhDs, collaboration and professional skills' + startOffset: 802 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=802 + endOffset: 833 +- name: 'Hiring Misalignment: Company expectations versus candidate reality' + startOffset: 833 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=833 + endOffset: 985 +- name: 'Interview Practices: Take-home tasks and candidate time burden' + startOffset: 985 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=985 + endOffset: 1094 +- name: 'Candidate Preparedness: Defining goals and asking the right questions' + startOffset: 1094 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1094 + endOffset: 1206 +- name: 'Interpreting Job Titles: Spotting mislabeled data roles' + startOffset: 1206 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1206 + endOffset: 1310 +- name: 'Career-Stage Fit: Junior versus experienced candidate needs' + startOffset: 1310 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1310 + endOffset: 1381 +- name: 'Role Clarity Checklist: Team, objectives, responsibilities vs. tech lists' + startOffset: 1381 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1381 + endOffset: 1638 +- name: 'Data Team Signals: Presence of data engineering and analytics functions' + startOffset: 1638 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1638 + endOffset: 1820 +- name: 'Red Flags in Descriptions: Long tech lists and vague responsibilities' + startOffset: 1820 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1820 + endOffset: 1863 +- name: 'Language & Culture Signals: “Rockstar”, “ninja” and inclusivity cues' + startOffset: 1863 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1863 + endOffset: 2013 +- name: 'Interview Rigor Indicator: Bullet-point overload and syntax-focused tests' + startOffset: 2013 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2013 + endOffset: 2228 +- name: 'Salary Transparency: German norms and benefits of publishing ranges' + startOffset: 2228 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2228 + endOffset: 2331 +- name: 'Company Research Tactics: LinkedIn, team pages and conference presence' + startOffset: 2331 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2331 + endOffset: 2358 +- name: 'Colleagues & Role Models: Finding inspiring teammates and mentors' + startOffset: 2358 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2358 + endOffset: 2464 +- name: 'Retention & Career Ladders: Using LinkedIn to gauge internal mobility' + startOffset: 2464 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2464 + endOffset: 2625 +- name: 'Remote Work Fit: Assessing WFH policies and support structures' + startOffset: 2625 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2625 + endOffset: 2912 +- name: 'Data Maturity Model: Before, during, after data and hiring implications' + startOffset: 2912 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2912 + endOffset: 3024 +- name: 'Day-to-Day Expectations: Time allocation across maturity stages' + startOffset: 3024 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3024 + endOffset: 3381 +- name: 'Tech Stack Signals: Modern vs legacy tools and what they reveal' + startOffset: 3381 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3381 + endOffset: 3499 +- name: 'Community Visibility: Talks and knowledge sharing as healthy-team signals' + startOffset: 3499 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3499 + endOffset: 3607 +- name: 'Closing & Resources: Slides, talk links and final advice' + startOffset: 3607 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3607 + endOffset: 3525 + transcript: - header: Podcast Introduction - header: 'Guest Bio: Tereza’s multidisciplinary data roles & community work' @@ -1134,144 +1267,6 @@ transcript: sec: 3626 time: '1:00:26' who: Tereza -intro: 'How can you tell if a "data scientist" job is really a data engineering role - — or a mismatched hire waiting to happen? In this episode, Tereza Iofciu, PhD and - seasoned data practitioner, walks through practical ways to spot misleading data - job titles, hiring red flags, and how to build clearer, healthier data teams. Tereza - brings experience across data science manager, data scientist, data engineer and - product manager roles, plus teaching and community leadership (neuefische, PyLadies - Hamburg, PSF community award), grounding her advice in real hiring and team-building - work.

We cover why companies rename roles, examples from Scala, Elasticsearch, - ETL and Airflow stacks, and the costs of vague job descriptions. You’ll get a role-clarity - checklist (team structure, objectives, responsibilities vs. tech lists), signals - of data maturity, interview pitfalls (time-consuming take-home tasks, syntax-focused - tests), red flags in descriptions (long tech lists, “rockstar” language), and tactics - for researching employers (LinkedIn, team pages, conference talks). Also discussed: - salary transparency, remote-work fit, retention and career ladders.

Listen - to learn concrete signals and questions to evaluate job descriptions, interviews, - and shape better data hiring and team design.' -description: Discover how to spot misleading job titles, hiring red flags and build - stronger data teams-assess tech stacks, interview rigor, salary ranges and career - fit. -dateadded: '2022-08-06' -duration: PT00H58M45S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=0 - endOffset: 101 -- name: 'Guest Bio: Tereza’s multidisciplinary data roles & community work' - startOffset: 101 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=101 - endOffset: 160 -- name: 'Academic Background: PhD, information retrieval, recommender systems' - startOffset: 160 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=160 - endOffset: 232 -- name: 'Industry Transition: XING to mytaxi/FREE NOW and evolving responsibilities' - startOffset: 232 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=232 - endOffset: 369 -- name: 'Technical Practices at XING: Scala, Elasticsearch, product-driven engineering' - startOffset: 369 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=369 - endOffset: 487 -- name: 'Building Data Infrastructure at mytaxi: ETL, Airflow and platform challenges' - startOffset: 487 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=487 - endOffset: 615 -- name: 'Job Titles vs. Reality: Renaming roles and shaping career narratives' - startOffset: 615 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=615 - endOffset: 667 -- name: 'Coaching Role: Neuefische bootcamp focus on product, teamwork and coaching' - startOffset: 667 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=667 - endOffset: 802 -- name: 'Teaching Challenges: PhDs, collaboration and professional skills' - startOffset: 802 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=802 - endOffset: 833 -- name: 'Hiring Misalignment: Company expectations versus candidate reality' - startOffset: 833 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=833 - endOffset: 985 -- name: 'Interview Practices: Take-home tasks and candidate time burden' - startOffset: 985 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=985 - endOffset: 1094 -- name: 'Candidate Preparedness: Defining goals and asking the right questions' - startOffset: 1094 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1094 - endOffset: 1206 -- name: 'Interpreting Job Titles: Spotting mislabeled data roles' - startOffset: 1206 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1206 - endOffset: 1310 -- name: 'Career-Stage Fit: Junior versus experienced candidate needs' - startOffset: 1310 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1310 - endOffset: 1381 -- name: 'Role Clarity Checklist: Team, objectives, responsibilities vs. tech lists' - startOffset: 1381 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1381 - endOffset: 1638 -- name: 'Data Team Signals: Presence of data engineering and analytics functions' - startOffset: 1638 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1638 - endOffset: 1820 -- name: 'Red Flags in Descriptions: Long tech lists and vague responsibilities' - startOffset: 1820 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1820 - endOffset: 1863 -- name: 'Language & Culture Signals: “Rockstar”, “ninja” and inclusivity cues' - startOffset: 1863 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1863 - endOffset: 2013 -- name: 'Interview Rigor Indicator: Bullet-point overload and syntax-focused tests' - startOffset: 2013 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2013 - endOffset: 2228 -- name: 'Salary Transparency: German norms and benefits of publishing ranges' - startOffset: 2228 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2228 - endOffset: 2331 -- name: 'Company Research Tactics: LinkedIn, team pages and conference presence' - startOffset: 2331 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2331 - endOffset: 2358 -- name: 'Colleagues & Role Models: Finding inspiring teammates and mentors' - startOffset: 2358 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2358 - endOffset: 2464 -- name: 'Retention & Career Ladders: Using LinkedIn to gauge internal mobility' - startOffset: 2464 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2464 - endOffset: 2625 -- name: 'Remote Work Fit: Assessing WFH policies and support structures' - startOffset: 2625 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2625 - endOffset: 2912 -- name: 'Data Maturity Model: Before, during, after data and hiring implications' - startOffset: 2912 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2912 - endOffset: 3024 -- name: 'Day-to-Day Expectations: Time allocation across maturity stages' - startOffset: 3024 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3024 - endOffset: 3381 -- name: 'Tech Stack Signals: Modern vs legacy tools and what they reveal' - startOffset: 3381 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3381 - endOffset: 3499 -- name: 'Community Visibility: Talks and knowledge sharing as healthy-team signals' - startOffset: 3499 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3499 - endOffset: 3607 -- name: 'Closing & Resources: Slides, talk links and final advice' - startOffset: 3607 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3607 - endOffset: 3525 --- Links: diff --git a/_podcast/s06e09-data-science-manager.md b/_podcast/data-science-leadership-hiring-mlops.md similarity index 97% rename from _podcast/s06e09-data-science-manager.md rename to _podcast/data-science-leadership-hiring-mlops.md index 717d1240..4211485e 100644 --- a/_podcast/s06e09-data-science-manager.md +++ b/_podcast/data-science-leadership-hiring-mlops.md @@ -1,12 +1,11 @@ --- -title: 'Data Science Leadership: Product-First ML, Recommenders & RTB, MLOps, Hiring - & Mentoring' +title: 'Data Science Leadership: Product-First ML, Recommenders & RTB, MLOps, Hiring & Mentoring' short: Becoming a Data Science Manager +season: 6 +episode: 9 guests: - marianosemelman image: images/podcast/s06e09-data-science-manager.jpg -season: 6 -episode: 9 ids: youtube: qOLR84-KHoY anchor: Becoming-a-Data-Science-Manager---Mariano-Semelman-e1cbrf7 @@ -15,6 +14,143 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-Science-Manager---Mariano-Semelman-e1cbrf7 spotify: https://open.spotify.com/episode/28Sy4owRwvSJRFTeKAamz2 apple: https://podcasts.apple.com/us/podcast/becoming-a-data-science-manager-mariano-semelman/id1541710331?i=1000547222296 + +description: Discover data science leadership, recommender systems & MLOps tactics—hire, mentor and deploy models faster with practical frameworks and tips +intro: How do you lead a data science team that prioritizes product impact while building recommender systems, real‑time bidding (RTB) solutions, and maintainable MLOps? In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 years of experience, walks through practical leadership decisions that bridge models and products.

Mariano describes his shift from software development to data science leadership, daily responsibilities (meetings, mentoring, planning), and how he structures teams of data scientists and ML engineers. Key topics include product‑first ML, search and recommender systems, advertising and RTB campaign optimization, CRISP‑DM in production, diagnosing overfitting and feature issues, and pragmatic deployment patterns like start simple, fail fast, and iterative experiments. He also shares onboarding tactics (30‑60‑90 plans), feedback techniques ("ask permission, care, offer options"), one‑on‑ones, handling departures, code reviews as a manager, delegation through senior engineers, and hiring/remediation practices.

Listen to learn concrete approaches for prioritizing modeling time, running experiments in production, improving MLOps and NLP practices, and mentoring engineers to deliver measurable product outcomes +topics: +- data science +- machine learning +- MLOps +- leadership +- career growth +- team building +- hiring +dateadded: 2022-01-09 + +duration: PT01H05M19S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=0 + endOffset: 86 +- name: 'Guest Intro: Mariano Semelman, Head of Data Science at OLX' + startOffset: 86 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=86 + endOffset: 140 +- name: Passion for Product Applications in Data Science + startOffset: 140 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=140 + endOffset: 179 +- name: 'Career Journey: Software Dev to Data Science Leadership' + startOffset: 179 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=179 + endOffset: 345 +- name: 'Daily Responsibilities: Meetings, Mentoring & Planning' + startOffset: 345 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=345 + endOffset: 474 +- name: 'Team Composition: Data Scientists and ML Engineers' + startOffset: 474 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=474 + endOffset: 519 +- name: 'Transition to Management: First Team of Five' + startOffset: 519 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=519 + endOffset: 651 +- name: 'Decision Mindset: Saying Yes and Learning on the Job' + startOffset: 651 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=651 + endOffset: 772 +- name: '30‑60‑90 Plan: Onboarding, Listening, and Learning' + startOffset: 772 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=772 + endOffset: 916 +- name: 'Project Onboarding: Rapid Learning and Trusting Reports' + startOffset: 916 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=916 + endOffset: 1046 +- name: Embracing Wrong Assumptions to Trigger Discussion + startOffset: 1046 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1046 + endOffset: 1197 +- name: Experience with Search and Recommender Systems + startOffset: 1197 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1197 + endOffset: 1279 +- name: 'Advertising Domain: Real‑Time Bidding and Campaign Optimization' + startOffset: 1279 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1279 + endOffset: 1389 +- name: 'Transferable DS Practices: Problem Framing & Feature Engineering' + startOffset: 1389 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1389 + endOffset: 1576 +- name: 'Diagnosing Model Issues: Overfitting, Data, and Features' + startOffset: 1576 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1576 + endOffset: 1769 +- name: 'Product‑First Mindset: Prioritizing User Impact' + startOffset: 1769 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1769 + endOffset: 1806 +- name: 'From Model to Product: Experiments, Deployment, Rules of ML' + startOffset: 1806 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1806 + endOffset: 2016 +- name: 'Start Simple and Fail Fast: Iterative Testing in Production' + startOffset: 2016 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2016 + endOffset: 2172 +- name: CRISP‑DM Process and Deployment Realities + startOffset: 2172 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2172 + endOffset: 2210 +- name: 'Prioritization: Where Modeling Time Delivers Impact' + startOffset: 2210 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2210 + endOffset: 2425 +- name: 'Feedback Timing: When to Give Performance Feedback' + startOffset: 2425 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2425 + endOffset: 2657 +- name: 'Feedback Technique: Ask Permission, Care, and Offer Options' + startOffset: 2657 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2657 + endOffset: 2893 +- name: One‑on‑Ones and Creating a Safe Growth Environment + startOffset: 2893 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2893 + endOffset: 3039 +- name: 'Handling Departures: Supporting Team Members Who Leave' + startOffset: 3039 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3039 + endOffset: 3157 +- name: 'Technical Work as a Manager: Code Reviews and Prototypes' + startOffset: 3157 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3157 + endOffset: 3298 +- name: Delegation and Architectural Involvement via Senior Engineers + startOffset: 3298 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3298 + endOffset: 3348 +- name: 'Hiring and Remediation: Interviews, Probation, Development Plans' + startOffset: 3348 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3348 + endOffset: 3697 +- name: 'Staying Current: MLOps, NLP, and Engineering Best Practices' + startOffset: 3697 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3697 + endOffset: 3947 +- name: Key Takeaways and Follow‑Up Opportunities + startOffset: 3947 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3947 + endOffset: 3991 +- name: 'Contact Info: LinkedIn and Email for Mariano Semelman' + startOffset: 3991 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3991 + endOffset: 3919 + transcript: - header: Podcast Introduction - header: 'Guest Intro: Mariano Semelman, Head of Data Science at OLX' @@ -981,145 +1117,4 @@ transcript: sec: 4005 time: '1:06:45' who: Mariano -description: Discover data science leadership, recommender systems & MLOps tactics—hire, - mentor and deploy models faster with practical frameworks and tips. -intro: How do you lead a data science team that prioritizes product impact while building - recommender systems, real‑time bidding (RTB) solutions, and maintainable MLOps? - In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 - years of experience, walks through practical leadership decisions that bridge models - and products.

Mariano describes his shift from software development to - data science leadership, daily responsibilities (meetings, mentoring, planning), - and how he structures teams of data scientists and ML engineers. Key topics include - product‑first ML, search and recommender systems, advertising and RTB campaign optimization, - CRISP‑DM in production, diagnosing overfitting and feature issues, and pragmatic - deployment patterns like start simple, fail fast, and iterative experiments. He - also shares onboarding tactics (30‑60‑90 plans), feedback techniques ("ask permission, - care, offer options"), one‑on‑ones, handling departures, code reviews as a manager, - delegation through senior engineers, and hiring/remediation practices.

- Listen to learn concrete approaches for prioritizing modeling time, running experiments - in production, improving MLOps and NLP practices, and mentoring engineers to deliver - measurable product outcomes. -dateadded: '2022-01-09' -duration: PT01H05M19S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=0 - endOffset: 86 -- name: 'Guest Intro: Mariano Semelman, Head of Data Science at OLX' - startOffset: 86 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=86 - endOffset: 140 -- name: Passion for Product Applications in Data Science - startOffset: 140 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=140 - endOffset: 179 -- name: 'Career Journey: Software Dev to Data Science Leadership' - startOffset: 179 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=179 - endOffset: 345 -- name: 'Daily Responsibilities: Meetings, Mentoring & Planning' - startOffset: 345 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=345 - endOffset: 474 -- name: 'Team Composition: Data Scientists and ML Engineers' - startOffset: 474 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=474 - endOffset: 519 -- name: 'Transition to Management: First Team of Five' - startOffset: 519 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=519 - endOffset: 651 -- name: 'Decision Mindset: Saying Yes and Learning on the Job' - startOffset: 651 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=651 - endOffset: 772 -- name: '30‑60‑90 Plan: Onboarding, Listening, and Learning' - startOffset: 772 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=772 - endOffset: 916 -- name: 'Project Onboarding: Rapid Learning and Trusting Reports' - startOffset: 916 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=916 - endOffset: 1046 -- name: Embracing Wrong Assumptions to Trigger Discussion - startOffset: 1046 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1046 - endOffset: 1197 -- name: Experience with Search and Recommender Systems - startOffset: 1197 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1197 - endOffset: 1279 -- name: 'Advertising Domain: Real‑Time Bidding and Campaign Optimization' - startOffset: 1279 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1279 - endOffset: 1389 -- name: 'Transferable DS Practices: Problem Framing & Feature Engineering' - startOffset: 1389 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1389 - endOffset: 1576 -- name: 'Diagnosing Model Issues: Overfitting, Data, and Features' - startOffset: 1576 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1576 - endOffset: 1769 -- name: 'Product‑First Mindset: Prioritizing User Impact' - startOffset: 1769 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1769 - endOffset: 1806 -- name: 'From Model to Product: Experiments, Deployment, Rules of ML' - startOffset: 1806 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1806 - endOffset: 2016 -- name: 'Start Simple and Fail Fast: Iterative Testing in Production' - startOffset: 2016 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2016 - endOffset: 2172 -- name: CRISP‑DM Process and Deployment Realities - startOffset: 2172 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2172 - endOffset: 2210 -- name: 'Prioritization: Where Modeling Time Delivers Impact' - startOffset: 2210 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2210 - endOffset: 2425 -- name: 'Feedback Timing: When to Give Performance Feedback' - startOffset: 2425 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2425 - endOffset: 2657 -- name: 'Feedback Technique: Ask Permission, Care, and Offer Options' - startOffset: 2657 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2657 - endOffset: 2893 -- name: One‑on‑Ones and Creating a Safe Growth Environment - startOffset: 2893 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2893 - endOffset: 3039 -- name: 'Handling Departures: Supporting Team Members Who Leave' - startOffset: 3039 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3039 - endOffset: 3157 -- name: 'Technical Work as a Manager: Code Reviews and Prototypes' - startOffset: 3157 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3157 - endOffset: 3298 -- name: Delegation and Architectural Involvement via Senior Engineers - startOffset: 3298 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3298 - endOffset: 3348 -- name: 'Hiring and Remediation: Interviews, Probation, Development Plans' - startOffset: 3348 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3348 - endOffset: 3697 -- name: 'Staying Current: MLOps, NLP, and Engineering Best Practices' - startOffset: 3697 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3697 - endOffset: 3947 -- name: Key Takeaways and Follow‑Up Opportunities - startOffset: 3947 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3947 - endOffset: 3991 -- name: 'Contact Info: LinkedIn and Email for Mariano Semelman' - startOffset: 3991 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3991 - endOffset: 3919 --- diff --git a/_podcast/s13e06-secret-sauce-of-data-science-management.md b/_podcast/data-science-management-and-agile-machine-learning.md similarity index 97% rename from _podcast/s13e06-secret-sauce-of-data-science-management.md rename to _podcast/data-science-management-and-agile-machine-learning.md index ad568681..67c6efdc 100644 --- a/_podcast/s13e06-secret-sauce-of-data-science-management.md +++ b/_podcast/data-science-management-and-agile-machine-learning.md @@ -1,20 +1,112 @@ --- +title: 'Master Data Science Management: Agile ML, Debrief Culture, Metrics & Scale to Production' +short: The Secret Sauce of Data Science Management +season: 13 episode: 6 guests: - shirmeirlador +image: images/podcast/s13e06-secret-sauce-of-data-science-management.jpg ids: anchor: ow/datatalksclub/episodes/The-Secret-Sauce-of-Data-Science-Management---Shir-Meir-Lador-e21cu92 youtube: gcxP0qRO-MY -image: images/podcast/s13e06-secret-sauce-of-data-science-management.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/The-Secret-Sauce-of-Data-Science-Management---Shir-Meir-Lador-e21cu92 apple: https://podcasts.apple.com/us/podcast/the-secret-sauce-of-data-science-management-shir-meir-lador/id1541710331?i=1000606790142 spotify: https://open.spotify.com/episode/4kzcUCVPVN1Opq7XI1Dibd?si=f7GlEOs-TFiC9dxTJlXVyw youtube: https://www.youtube.com/watch?v=gcxP0qRO-MY -season: 13 -short: The Secret Sauce of Data Science Management -title: 'Master Data Science Management: Agile ML, Debrief Culture, Metrics & Scale - to Production' + +description: 'Master data science management: learn Agile ML, debrief culture, metrics and POC-to-production strategies to scale teams, boost impact and ship reliable models.' +intro: How do you run data science teams so experiments become reliable, measurable products? In this episode, Shir Meir Lador, a data science group manager at Intuit who builds machine and deep learning models for document intelligence in TurboTax and QuickBooks, walks through practical approaches to data science management and agile ML.

We explore the origins of debrief culture from military pilot training and how pre/post debriefs drive continuous improvement; concrete practices for agile ML including two-week sprints, exploration sprints, design stories and grooming; and how to scope work, handle AI project uncertainty, and use rapid experimentation to mitigate data risks. Shir also digs into metrics for production ML—business impact, A/B testing, customer-focused KPIs—and people metrics like pulse surveys, manager score and skip-level feedback. You’ll hear about leadership pillars (vision, driving results, culture), team development, goal alignment, cross-functional product partnerships, and tactics for fostering innovation (hackathons, paper clubs).

Listen for actionable guidance on measuring success, scaling ML to production, and building the managerial skills to lead high-performance data science teams. This episode is for managers and technical leads focused on production ML, machine learning operations, and team-driven impact +topics: +- management +- machine learning +dateadded: 2023-04-01 + +duration: PT00H56M57S + +quotableClips: +- name: 'Episode Introduction: The Secret Sauce of Data Science Management' + startOffset: 100 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=100 + endOffset: 160 +- name: 'Career Background: Electrical Engineering to Document Intelligence at Intuit' + startOffset: 160 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=160 + endOffset: 271 +- name: 'Military Leadership Lessons: Pilot Training & Debrief Culture Origins' + startOffset: 271 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=271 + endOffset: 324 +- name: 'Debriefing Practice: Pre/post Focus Areas for Continuous Improvement' + startOffset: 324 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=324 + endOffset: 558 +- name: 'Group Manager Role: Strategy, Mentoring, Standards and Roadmaps' + startOffset: 558 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=558 + endOffset: 713 +- name: 'Measuring Success: Business Impact and Team Engagement Metrics' + startOffset: 713 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=713 + endOffset: 776 +- name: 'People Metrics: Pulse Surveys, Manager Score and Skip-level Feedback' + startOffset: 776 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=776 + endOffset: 979 +- name: 'Leadership Pillars: Vision, Driving Results, Building High-performance Culture' + startOffset: 979 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=979 + endOffset: 1043 +- name: 'Managing Leadership Relationships: Communicating Vision and Securing Resources' + startOffset: 1043 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1043 + endOffset: 1464 +- name: 'Team Development: Goal-setting, One-on-ones, Feedback and Recognition' + startOffset: 1464 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1464 + endOffset: 1585 +- name: 'Goal Alignment: Cascading Roadmap Goals to Individual Development' + startOffset: 1585 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1585 + endOffset: 1920 +- name: 'Fostering Innovation: Hackathons, Paper Clubs and Learning Forums' + startOffset: 1920 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1920 + endOffset: 2071 +- name: 'Cross-Functional Integration: Product Partnerships and Expectation Management' + startOffset: 2071 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2071 + endOffset: 2466 +- name: 'AI Project Uncertainty: Data Risks, Unknowns and Rapid Experimentation' + startOffset: 2466 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2466 + endOffset: 2658 +- name: 'Agile for ML: Two-week Sprints, Exploration Tasks and Grooming Practices' + startOffset: 2658 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2658 + endOffset: 2736 +- name: 'Scoping ML Work: Exploration Sprints, Design Stories and Iterative Milestones' + startOffset: 2736 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2736 + endOffset: 2994 +- name: 'Core Manager Skills: Communication, Strategic Clarity and Growth Mindset' + startOffset: 2994 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2994 + endOffset: 3299 +- name: 'POC to Production: Customer-focused Metrics, A/B Testing and Incremental + Rollout' + startOffset: 3299 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3299 + endOffset: 3498 +- name: 'Resources & Further Reading: Shir’s Talks and Blog Posts' + startOffset: 3498 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3498 + endOffset: 3517 +- name: Episode Wrap-up and Closing Remarks + startOffset: 3517 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3517 + endOffset: 3417 + transcript: - header: 'Episode Introduction: The Secret Sauce of Data Science Management' - line: This week, we'll talk about the secret sauce of data science management. We @@ -1096,110 +1188,6 @@ transcript: sec: 3517 time: '58:37' who: Alexey -description: 'Master data science management: learn Agile ML, debrief culture, metrics - and POC-to-production strategies to scale teams, boost impact and ship reliable - models.' -intro: How do you run data science teams so experiments become reliable, measurable - products? In this episode, Shir Meir Lador, a data science group manager at Intuit - who builds machine and deep learning models for document intelligence in TurboTax - and QuickBooks, walks through practical approaches to data science management and - agile ML.

We explore the origins of debrief culture from military pilot - training and how pre/post debriefs drive continuous improvement; concrete practices - for agile ML including two-week sprints, exploration sprints, design stories and - grooming; and how to scope work, handle AI project uncertainty, and use rapid experimentation - to mitigate data risks. Shir also digs into metrics for production ML—business impact, - A/B testing, customer-focused KPIs—and people metrics like pulse surveys, manager - score and skip-level feedback. You’ll hear about leadership pillars (vision, driving - results, culture), team development, goal alignment, cross-functional product partnerships, - and tactics for fostering innovation (hackathons, paper clubs).

Listen - for actionable guidance on measuring success, scaling ML to production, and building - the managerial skills to lead high-performance data science teams. This episode - is for managers and technical leads focused on production ML, machine learning operations, - and team-driven impact. -dateadded: '2023-04-01' -duration: PT00H56M57S -quotableClips: -- name: 'Episode Introduction: The Secret Sauce of Data Science Management' - startOffset: 100 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=100 - endOffset: 160 -- name: 'Career Background: Electrical Engineering to Document Intelligence at Intuit' - startOffset: 160 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=160 - endOffset: 271 -- name: 'Military Leadership Lessons: Pilot Training & Debrief Culture Origins' - startOffset: 271 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=271 - endOffset: 324 -- name: 'Debriefing Practice: Pre/post Focus Areas for Continuous Improvement' - startOffset: 324 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=324 - endOffset: 558 -- name: 'Group Manager Role: Strategy, Mentoring, Standards and Roadmaps' - startOffset: 558 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=558 - endOffset: 713 -- name: 'Measuring Success: Business Impact and Team Engagement Metrics' - startOffset: 713 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=713 - endOffset: 776 -- name: 'People Metrics: Pulse Surveys, Manager Score and Skip-level Feedback' - startOffset: 776 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=776 - endOffset: 979 -- name: 'Leadership Pillars: Vision, Driving Results, Building High-performance Culture' - startOffset: 979 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=979 - endOffset: 1043 -- name: 'Managing Leadership Relationships: Communicating Vision and Securing Resources' - startOffset: 1043 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1043 - endOffset: 1464 -- name: 'Team Development: Goal-setting, One-on-ones, Feedback and Recognition' - startOffset: 1464 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1464 - endOffset: 1585 -- name: 'Goal Alignment: Cascading Roadmap Goals to Individual Development' - startOffset: 1585 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1585 - endOffset: 1920 -- name: 'Fostering Innovation: Hackathons, Paper Clubs and Learning Forums' - startOffset: 1920 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1920 - endOffset: 2071 -- name: 'Cross-Functional Integration: Product Partnerships and Expectation Management' - startOffset: 2071 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2071 - endOffset: 2466 -- name: 'AI Project Uncertainty: Data Risks, Unknowns and Rapid Experimentation' - startOffset: 2466 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2466 - endOffset: 2658 -- name: 'Agile for ML: Two-week Sprints, Exploration Tasks and Grooming Practices' - startOffset: 2658 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2658 - endOffset: 2736 -- name: 'Scoping ML Work: Exploration Sprints, Design Stories and Iterative Milestones' - startOffset: 2736 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2736 - endOffset: 2994 -- name: 'Core Manager Skills: Communication, Strategic Clarity and Growth Mindset' - startOffset: 2994 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2994 - endOffset: 3299 -- name: 'POC to Production: Customer-focused Metrics, A/B Testing and Incremental - Rollout' - startOffset: 3299 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3299 - endOffset: 3498 -- name: 'Resources & Further Reading: Shir’s Talks and Blog Posts' - startOffset: 3498 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3498 - endOffset: 3517 -- name: Episode Wrap-up and Closing Remarks - startOffset: 3517 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3517 - endOffset: 3417 --- Links: diff --git a/_podcast/s06e03-manager-vs-expert.md b/_podcast/data-science-manager-vs-expert-hiring-guide.md similarity index 97% rename from _podcast/s06e03-manager-vs-expert.md rename to _podcast/data-science-manager-vs-expert-hiring-guide.md index 1f5dd102..3b828ba1 100644 --- a/_podcast/s06e03-manager-vs-expert.md +++ b/_podcast/data-science-manager-vs-expert-hiring-guide.md @@ -1,12 +1,11 @@ --- -title: 'Data Science Manager vs Expert: Hiring Strategy, Skills, Team Building & When - to Use ML' +title: 'Data Science Manager vs Expert: Hiring Strategy, Skills, Team Building & When to Use ML' short: Data Science Manager vs Data Science Expert +season: 6 +episode: 3 guests: - barbarasobkowiak image: images/podcast/s06e03-manager-vs-expert.jpg -season: 6 -episode: 3 ids: youtube: hFmIgaN-F8Y anchor: Data-Science-Manager-vs-Data-Science-Expert---Barbara-Sobkowiak-e1ah3od @@ -15,6 +14,132 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Science-Manager-vs-Data-Science-Expert---Barbara-Sobkowiak-e1ah3od spotify: https://open.spotify.com/episode/5Ug8YA3hKY9Kr5hVFDqZ77 apple: https://podcasts.apple.com/us/podcast/data-science-manager-vs-data-science-expert-barbara/id1541710331?i=1000542496818 + +description: Learn hiring strategies for Data Science Manager vs Data Science Expert—when to hire experts, build teams, assess ML needs, and boost business impact +intro: 'When should you hire a data science manager versus a deep technical expert, and how do you decide whether machine learning is actually the right solution? In this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, and manager by passion — walks through her career from GIS → SQL → BI to leading teams, and tackles hiring strategy, role design, and practical ML use cases like mental health monitoring and demand forecasting.

We cover common pitfalls (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager skill balance between technical literacy and soft skills, and what “hands-on” really means for managers: high‑level understanding, code review, and time allocation. Learn when to hire a data science expert for complex models or domain knowledge, and when a manager-plus-generalist approach or a startup “unicorn” makes sense. Barbara also discusses team building (learning plans, pairing), project prioritization, model monitoring, feasibility checks (data quality and baselines), and measuring impact with KPIs and client discovery.

Listen to gain practical hiring criteria, role profiles, and decision frameworks for when to use machine learning and how to build teams that deliver.' +topics: +- data science +- machine learning +- leadership +- team building +dateadded: 2021-11-21 + +duration: PT00H59M33S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=0 + endOffset: 89 +- name: 'Episode Topic: Data Science Manager vs Data Science Expert' + startOffset: 89 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=89 + endOffset: 120 +- name: 'Career Journey: GIS → SQL → BI → Data Science Manager' + startOffset: 120 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=120 + endOffset: 266 +- name: 'ML Use Cases: Mental Health Monitoring & Demand Forecasting' + startOffset: 266 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=266 + endOffset: 298 +- name: 'Misleading Job Ads: Manager vs Expert Confusion on LinkedIn' + startOffset: 298 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=298 + endOffset: 448 +- name: 'Root Causes: HR/IT Job Descriptions Missing Managerial Needs' + startOffset: 448 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=448 + endOffset: 502 +- name: 'Manager Skill Balance: Technical Knowledge vs Soft Skills' + startOffset: 502 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=502 + endOffset: 722 +- name: 'Technical Expectation: High-Level Understanding vs Deep Expertise' + startOffset: 722 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=722 + endOffset: 809 +- name: 'Manager Responsibilities: Strategy, Team Development, Stakeholder Communication' + startOffset: 809 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=809 + endOffset: 949 +- name: 'Hands-On Reality: Coding, Model Review, and Time Allocation' + startOffset: 949 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=949 + endOffset: 1054 +- name: 'Manager Experience: Hands-On ML Helpful but Not Mandatory' + startOffset: 1054 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1054 + endOffset: 1180 +- name: 'Business Development: Manager Role in Sales and Client Strategy' + startOffset: 1180 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1180 + endOffset: 1251 +- name: 'Team Development: Learning Plans, Courses, and Pairing' + startOffset: 1251 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1251 + endOffset: 1434 +- name: 'Quality Oversight: Code Reviews vs Managerial Guidance' + startOffset: 1434 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1434 + endOffset: 1502 +- name: 'Data Science Expert: Deep Technical and Domain Expertise' + startOffset: 1502 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1502 + endOffset: 1728 +- name: 'Hiring an Expert: When Complex Models and Domain Knowledge Are Needed' + startOffset: 1728 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1728 + endOffset: 1837 +- name: 'Hiring Strategy: Manager + Expert vs Generalist for Startups' + startOffset: 1837 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1837 + endOffset: 1916 +- name: 'Manager Job Profile: Team Building, Communication, and AI Literacy' + startOffset: 1916 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1916 + endOffset: 2044 +- name: 'Risks of Hiring Experts as Managers: Team and Business Translation Gaps' + startOffset: 2044 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2044 + endOffset: 2317 +- name: 'Startup Hiring: Unicorns Who Wear Many Hats' + startOffset: 2317 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2317 + endOffset: 2447 +- name: 'Project Prioritization: Estimation, Resource Allocation, and Buffers' + startOffset: 2447 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2447 + endOffset: 2774 +- name: 'Measuring Impact: Client Feedback, KPIs, and Model Monitoring' + startOffset: 2774 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2774 + endOffset: 3012 +- name: 'Client Discovery: Baselines, Data Availability, and Success Metrics' + startOffset: 3012 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3012 + endOffset: 3237 +- name: 'Feasibility Check: Data Quality and Necessity of Machine Learning' + startOffset: 3237 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3237 + endOffset: 3271 +- name: 'Diversity Spotlight: Women in Data Science and Interview Confidence' + startOffset: 3271 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3271 + endOffset: 3543 +- name: 'Connect with Guest: Barbara Sobkowiak on LinkedIn' + startOffset: 3543 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3543 + endOffset: 3560 +- name: 'Career Advice: Find Satisfaction, Mentors, and Networking' + startOffset: 3560 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3560 + endOffset: 3624 +- name: Episode Wrap-up and Unanswered Questions + startOffset: 3624 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3624 + endOffset: 3573 + transcript: - header: Podcast Introduction - header: 'Episode Topic: Data Science Manager vs Data Science Expert' @@ -959,139 +1084,6 @@ transcript: sec: 3662 time: '1:01:02' who: Barbara -description: Learn hiring strategies for Data Science Manager vs Data Science Expert—when - to hire experts, build teams, assess ML needs, and boost business impact. -intro: 'When should you hire a data science manager versus a deep technical expert, - and how do you decide whether machine learning is actually the right solution? In - this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, - and manager by passion — walks through her career from GIS → SQL → BI to leading - teams, and tackles hiring strategy, role design, and practical ML use cases like - mental health monitoring and demand forecasting.

We cover common pitfalls - (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager - skill balance between technical literacy and soft skills, and what “hands-on” really - means for managers: high‑level understanding, code review, and time allocation. - Learn when to hire a data science expert for complex models or domain knowledge, - and when a manager-plus-generalist approach or a startup “unicorn” makes sense. - Barbara also discusses team building (learning plans, pairing), project prioritization, - model monitoring, feasibility checks (data quality and baselines), and measuring - impact with KPIs and client discovery.

Listen to gain practical hiring - criteria, role profiles, and decision frameworks for when to use machine learning - and how to build teams that deliver.' -dateadded: '2021-11-21' -duration: PT00H59M33S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=0 - endOffset: 89 -- name: 'Episode Topic: Data Science Manager vs Data Science Expert' - startOffset: 89 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=89 - endOffset: 120 -- name: 'Career Journey: GIS → SQL → BI → Data Science Manager' - startOffset: 120 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=120 - endOffset: 266 -- name: 'ML Use Cases: Mental Health Monitoring & Demand Forecasting' - startOffset: 266 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=266 - endOffset: 298 -- name: 'Misleading Job Ads: Manager vs Expert Confusion on LinkedIn' - startOffset: 298 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=298 - endOffset: 448 -- name: 'Root Causes: HR/IT Job Descriptions Missing Managerial Needs' - startOffset: 448 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=448 - endOffset: 502 -- name: 'Manager Skill Balance: Technical Knowledge vs Soft Skills' - startOffset: 502 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=502 - endOffset: 722 -- name: 'Technical Expectation: High-Level Understanding vs Deep Expertise' - startOffset: 722 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=722 - endOffset: 809 -- name: 'Manager Responsibilities: Strategy, Team Development, Stakeholder Communication' - startOffset: 809 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=809 - endOffset: 949 -- name: 'Hands-On Reality: Coding, Model Review, and Time Allocation' - startOffset: 949 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=949 - endOffset: 1054 -- name: 'Manager Experience: Hands-On ML Helpful but Not Mandatory' - startOffset: 1054 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1054 - endOffset: 1180 -- name: 'Business Development: Manager Role in Sales and Client Strategy' - startOffset: 1180 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1180 - endOffset: 1251 -- name: 'Team Development: Learning Plans, Courses, and Pairing' - startOffset: 1251 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1251 - endOffset: 1434 -- name: 'Quality Oversight: Code Reviews vs Managerial Guidance' - startOffset: 1434 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1434 - endOffset: 1502 -- name: 'Data Science Expert: Deep Technical and Domain Expertise' - startOffset: 1502 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1502 - endOffset: 1728 -- name: 'Hiring an Expert: When Complex Models and Domain Knowledge Are Needed' - startOffset: 1728 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1728 - endOffset: 1837 -- name: 'Hiring Strategy: Manager + Expert vs Generalist for Startups' - startOffset: 1837 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1837 - endOffset: 1916 -- name: 'Manager Job Profile: Team Building, Communication, and AI Literacy' - startOffset: 1916 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1916 - endOffset: 2044 -- name: 'Risks of Hiring Experts as Managers: Team and Business Translation Gaps' - startOffset: 2044 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2044 - endOffset: 2317 -- name: 'Startup Hiring: Unicorns Who Wear Many Hats' - startOffset: 2317 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2317 - endOffset: 2447 -- name: 'Project Prioritization: Estimation, Resource Allocation, and Buffers' - startOffset: 2447 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2447 - endOffset: 2774 -- name: 'Measuring Impact: Client Feedback, KPIs, and Model Monitoring' - startOffset: 2774 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2774 - endOffset: 3012 -- name: 'Client Discovery: Baselines, Data Availability, and Success Metrics' - startOffset: 3012 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3012 - endOffset: 3237 -- name: 'Feasibility Check: Data Quality and Necessity of Machine Learning' - startOffset: 3237 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3237 - endOffset: 3271 -- name: 'Diversity Spotlight: Women in Data Science and Interview Confidence' - startOffset: 3271 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3271 - endOffset: 3543 -- name: 'Connect with Guest: Barbara Sobkowiak on LinkedIn' - startOffset: 3543 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3543 - endOffset: 3560 -- name: 'Career Advice: Find Satisfaction, Mentors, and Networking' - startOffset: 3560 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3560 - endOffset: 3624 -- name: Episode Wrap-up and Unanswered Questions - startOffset: 3624 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3624 - endOffset: 3573 --- Links: diff --git a/_podcast/s09e07-designing-data-science-organization.md b/_podcast/data-science-team-structure-and-org-design.md similarity index 97% rename from _podcast/s09e07-designing-data-science-organization.md rename to _podcast/data-science-team-structure-and-org-design.md index a2cbc11e..916ed1fd 100644 --- a/_podcast/s09e07-designing-data-science-organization.md +++ b/_podcast/data-science-team-structure-and-org-design.md @@ -1,20 +1,137 @@ --- +title: 'Designing High-Impact Data Science Teams: Centralized vs Embedded Models, Experimentation & Staffing' +short: Designing a Data Science Team +season: 9 episode: 7 guests: - lisacohen +image: images/podcast/s09e07-designing-data-science-organization.jpg ids: anchor: Designing-a-Data-Science-Organization---Lisa-Cohen-e1kcm5e youtube: F_rJ4fg5ZEA -image: images/podcast/s09e07-designing-data-science-organization.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Designing-a-Data-Science-Organization---Lisa-Cohen-e1kcm5e apple: https://podcasts.apple.com/us/podcast/designing-a-data-science-organization-lisa-cohen/id1541710331?i=1000569172916 spotify: https://open.spotify.com/episode/62ZzHBEuOLbm6ft0u9dlh7?si=182bea5ac49243af youtube: https://www.youtube.com/watch?v=F_rJ4fg5ZEA -season: 9 -short: Designing a Data Science Organization -title: 'Designing High-Impact Data Science Orgs: Centralized vs Embedded Models, Experimentation - & Staffing' + +description: 'Discover how to design high-impact data science orgs: centralized vs embedded models, staffing ratios and experimentation to speed decisions and scale impact.' +intro: 'How should you structure a data science organization to maximize product impact: centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director of Data Science at Twitter who leads 70 data scientists and previously led Azure Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation patterns for designing high‑impact data science orgs.

We cover centralized vs embedded models and what “embedding” really means for reporting lines and day‑to‑day integration with feature teams; Twitter’s hybrid per‑division approach for product and ads; staffing guidance (including an engineers‑to‑data‑scientist ratio reference); and rhythms for cross‑functional planning, OKRs, and dependency management. Lisa also discusses experimentation and experiment review, defining success metrics and ship criteria, knowledge sharing practices, differences between analytics and ML‑heavy data science, and how to partner with product, engineering, design, and research.

Listen to gain actionable guidance on choosing an org model, setting staffing expectations, establishing experiment and metrics practices, and aligning data pipelines, data quality, and OKRs to drive data‑driven product decisions.' +topics: +- data science +- data teams +- leadership +- machine learning +dateadded: 2022-07-08 + +duration: PT00H58M55S + +quotableClips: +- name: 'Guest Introduction: Lisa Cohen, Director of Data Science at Twitter' + startOffset: 77 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=77 + endOffset: 102 +- name: 'Career Background: Applied Math, Microsoft telemetry, Azure to Twitter' + startOffset: 102 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=102 + endOffset: 387 +- name: 'Org Models Overview: Centralized vs decentralized data science organization' + startOffset: 387 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=387 + endOffset: 514 +- name: 'Embedding Explained: Reporting lines vs day‑to‑day integration with feature + teams' + startOffset: 514 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=514 + endOffset: 641 +- name: 'Hybrid Structure: Centralization per division and multiple DS orgs' + startOffset: 641 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=641 + endOffset: 926 +- name: 'Reporting Structure: Embedded teams vs centralized data science reporting' + startOffset: 926 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=926 + endOffset: 1123 +- name: 'Team Rhythms & Planning: Cross‑functional ceremonies and dependency management' + startOffset: 1123 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1123 + endOffset: 1318 +- name: 'Cross‑Functional Alignment: OKRs and aligning goals across levels' + startOffset: 1318 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1318 + endOffset: 1493 +- name: 'Twitter’s Approach: Hybrid per‑division model for product and ads' + startOffset: 1493 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1493 + endOffset: 1548 +- name: 'Decentralized Model: Immersive domain context, faster decisions, career tradeoffs' + startOffset: 1548 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1548 + endOffset: 1765 +- name: 'Centralized Model: Knowledge sharing, consistency, and context‑building challenges' + startOffset: 1765 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1765 + endOffset: 1852 +- name: 'Communicating Insights: Translating metrics for product, engineering, and + design' + startOffset: 1852 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1852 + endOffset: 1988 +- name: 'Starting Data Science: Foundations—data pipelines, data quality, and analytics' + startOffset: 1988 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1988 + endOffset: 2209 +- name: 'Staffing Guidance: Engineers‑to‑data‑scientist ratios and ML partnerships + (8:1 reference)' + startOffset: 2209 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2209 + endOffset: 2539 +- name: 'Knowledge Sharing & Publication: Research archives, Slack channels, and push + mechanisms' + startOffset: 2539 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2539 + endOffset: 2769 +- name: 'Product Partnership: Co‑ownership with product, engineering, design, and + research' + startOffset: 2769 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2769 + endOffset: 2840 +- name: 'Metrics & Experimentation: Defining success metrics, ship criteria, and experiment + review' + startOffset: 2840 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2840 + endOffset: 3044 +- name: 'Analytics vs Data Science: Analysts driving dashboards vs ML‑heavy DS work' + startOffset: 3044 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3044 + endOffset: 3150 +- name: 'OKRs & Exploration Time: Using objectives to prioritize and allocate research + time' + startOffset: 3150 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3150 + endOffset: 3256 +- name: 'Resolving Conflicts: Data‑driven opportunity sizing for prioritization decisions' + startOffset: 3256 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3256 + endOffset: 3348 +- name: 'Data‑Driven Product Innovation: Guiding roadmap decisions with trusted data' + startOffset: 3348 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3348 + endOffset: 3451 +- name: 'Qualitative Research Collaboration: Bridging user studies with quantitative + analysis' + startOffset: 3451 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3451 + endOffset: 3578 +- name: 'Contact & Resources: Lisa on Twitter, LinkedIn, and Medium' + startOffset: 3578 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3578 + endOffset: 3603 +- name: Episode Wrap‑Up and Closing Remarks + startOffset: 3603 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3603 + endOffset: 3535 + transcript: - header: 'Guest Introduction: Lisa Cohen, Director of Data Science at Twitter' - line: This week, we'll talk about designing a data science organization. We have @@ -1150,131 +1267,6 @@ transcript: sec: 3612 time: '1:00:12' who: Alexey -description: 'Discover how to design high-impact data science orgs: centralized vs - embedded models, staffing ratios and experimentation to speed decisions and scale - impact.' -intro: 'How should you structure a data science organization to maximize product impact: - centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director - of Data Science at Twitter who leads 70 data scientists and previously led Azure - Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation - patterns for designing high‑impact data science orgs.

We cover centralized - vs embedded models and what “embedding” really means for reporting lines and day‑to‑day - integration with feature teams; Twitter’s hybrid per‑division approach for product - and ads; staffing guidance (including an engineers‑to‑data‑scientist ratio reference); - and rhythms for cross‑functional planning, OKRs, and dependency management. Lisa - also discusses experimentation and experiment review, defining success metrics and - ship criteria, knowledge sharing practices, differences between analytics and ML‑heavy - data science, and how to partner with product, engineering, design, and research. -

Listen to gain actionable guidance on choosing an org model, setting staffing - expectations, establishing experiment and metrics practices, and aligning data pipelines, - data quality, and OKRs to drive data‑driven product decisions.' -dateadded: '2022-07-08' -duration: PT00H58M55S -quotableClips: -- name: 'Guest Introduction: Lisa Cohen, Director of Data Science at Twitter' - startOffset: 77 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=77 - endOffset: 102 -- name: 'Career Background: Applied Math, Microsoft telemetry, Azure to Twitter' - startOffset: 102 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=102 - endOffset: 387 -- name: 'Org Models Overview: Centralized vs decentralized data science organization' - startOffset: 387 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=387 - endOffset: 514 -- name: 'Embedding Explained: Reporting lines vs day‑to‑day integration with feature - teams' - startOffset: 514 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=514 - endOffset: 641 -- name: 'Hybrid Structure: Centralization per division and multiple DS orgs' - startOffset: 641 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=641 - endOffset: 926 -- name: 'Reporting Structure: Embedded teams vs centralized data science reporting' - startOffset: 926 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=926 - endOffset: 1123 -- name: 'Team Rhythms & Planning: Cross‑functional ceremonies and dependency management' - startOffset: 1123 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1123 - endOffset: 1318 -- name: 'Cross‑Functional Alignment: OKRs and aligning goals across levels' - startOffset: 1318 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1318 - endOffset: 1493 -- name: 'Twitter’s Approach: Hybrid per‑division model for product and ads' - startOffset: 1493 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1493 - endOffset: 1548 -- name: 'Decentralized Model: Immersive domain context, faster decisions, career tradeoffs' - startOffset: 1548 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1548 - endOffset: 1765 -- name: 'Centralized Model: Knowledge sharing, consistency, and context‑building challenges' - startOffset: 1765 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1765 - endOffset: 1852 -- name: 'Communicating Insights: Translating metrics for product, engineering, and - design' - startOffset: 1852 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1852 - endOffset: 1988 -- name: 'Starting Data Science: Foundations—data pipelines, data quality, and analytics' - startOffset: 1988 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1988 - endOffset: 2209 -- name: 'Staffing Guidance: Engineers‑to‑data‑scientist ratios and ML partnerships - (8:1 reference)' - startOffset: 2209 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2209 - endOffset: 2539 -- name: 'Knowledge Sharing & Publication: Research archives, Slack channels, and push - mechanisms' - startOffset: 2539 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2539 - endOffset: 2769 -- name: 'Product Partnership: Co‑ownership with product, engineering, design, and - research' - startOffset: 2769 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2769 - endOffset: 2840 -- name: 'Metrics & Experimentation: Defining success metrics, ship criteria, and experiment - review' - startOffset: 2840 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2840 - endOffset: 3044 -- name: 'Analytics vs Data Science: Analysts driving dashboards vs ML‑heavy DS work' - startOffset: 3044 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3044 - endOffset: 3150 -- name: 'OKRs & Exploration Time: Using objectives to prioritize and allocate research - time' - startOffset: 3150 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3150 - endOffset: 3256 -- name: 'Resolving Conflicts: Data‑driven opportunity sizing for prioritization decisions' - startOffset: 3256 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3256 - endOffset: 3348 -- name: 'Data‑Driven Product Innovation: Guiding roadmap decisions with trusted data' - startOffset: 3348 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3348 - endOffset: 3451 -- name: 'Qualitative Research Collaboration: Bridging user studies with quantitative - analysis' - startOffset: 3451 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3451 - endOffset: 3578 -- name: 'Contact & Resources: Lisa on Twitter, LinkedIn, and Medium' - startOffset: 3578 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3578 - endOffset: 3603 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3603 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3603 - endOffset: 3535 --- Links: diff --git a/_podcast/s12e05-indie-hacking.md b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md similarity index 97% rename from _podcast/s12e05-indie-hacking.md rename to _podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md index 3f26ea7e..ec5d1feb 100644 --- a/_podcast/s12e05-indie-hacking.md +++ b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md @@ -1,29 +1,150 @@ --- +title: 'Indie Hacking and Bootstrapping Side Projects for Data Scientists: Build, Launch & Monetize Indie Hacker Products' +short: Indie Hacking and Bootstrapping Side Projects for Data Scientists +season: 12 episode: 5 guests: - paulineclavelloux -date: 2025-11-07 -topics: -- Entrepreneurship -- Indie Hacking -- Freelance -- Product Development -- Startups -- Bootstrapping -- Data Tools +image: images/podcast/s12e05-indie-hacking.jpg ids: anchor: Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb youtube: KsV_SVXlTo8 -image: images/podcast/s12e05-indie-hacking.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb apple: https://podcasts.apple.com/us/podcast/indie-hacking-pauline-clavelloux/id1541710331?i=1000595787491 spotify: https://open.spotify.com/episode/2DlD756csrDFAxfuTjSKwY?si=_H2G3bJtQIuJMAe8daEIYg youtube: https://www.youtube.com/watch?v=KsV_SVXlTo8 -season: 12 -short: Indie Hacking -title: 'Build, Launch & Monetize Indie Hacker Products: Crypto Alerts, Generative - AI & Growth' + +description: 'Build indie-hacking products: launch crypto alerts & generative AI apps, validate ideas, choose tech, price effectively and monetize for sustainable growth.' +intro: 'How do you build, launch, and actually monetize indie-hacker products in crypto alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux — an IBM data science manager and consultant with eight years’ experience who also ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired selfie-to-art tool) — walks through the practical steps.

We cover Pauline’s career path and an ML production case study (money‑laundering detection), then move into indie-hacking essentials: bootstrapping, splitting time between a full‑time role and side projects, and validating ideas. You’ll hear how she productized projects (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API fine‑tuning vs self‑hosted GPUs), managed operating costs, and launched via Twitter and niche listings. The conversation also tackles customer acquisition, pricing constraints, marketing and content strategy, and skills gained across GCP, data engineering, web dev, and growth.

Listen for actionable guidance on product launch, monetization, and time management for indie hackers working on crypto alerts and generative AI—concrete steps to validate, build, and grow side products without external funding.' +topics: +- indie hacking +- bootstrapping +- side projects +- data science +- machine learning +- generative AI +- entrepreneurship +- freelance +dateadded: 2023-01-21 +date: 2025-11-07 + +duration: PT00H59M27S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=0 + endOffset: 72 +- name: 'Career Journey: Engineering Student to IBM Data Scientist' + startOffset: 72 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=72 + endOffset: 178 +- name: 'Consulting Work: Project Types and Client Engagement' + startOffset: 178 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=178 + endOffset: 290 +- name: 'Manager Role: Deliverables, Roadmaps, and Client Communication' + startOffset: 290 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=290 + endOffset: 331 +- name: 'Case Study: Money-Laundering Detection Project and Deployment' + startOffset: 331 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=331 + endOffset: 443 +- name: 'Indie Hacking Explained: Bootstrapping Without External Funding' + startOffset: 443 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=443 + endOffset: 538 +- name: 'Day Job + Side Projects: Time Allocation and Routine' + startOffset: 538 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=538 + endOffset: 674 +- name: 'Cryptopy Origin: Building Crypto Alerts for Personal Trading' + startOffset: 674 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=674 + endOffset: 909 +- name: 'Productization: Company Setup, Landing Pages, Legal and Payments' + startOffset: 909 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=909 + endOffset: 1125 +- name: 'Technology Choices: Python/Flask, Team Contributions, and Architecture' + startOffset: 1125 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1125 + endOffset: 1173 +- name: 'Marketing Efforts: Audience Reach, Social Channels, and Challenges' + startOffset: 1173 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1173 + endOffset: 1306 +- name: Operating Costs and Niche Product Strategy + startOffset: 1306 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1306 + endOffset: 1413 +- name: 'UnrealMe Origin: DreamBooth Inspiration and Rapid Prototyping' + startOffset: 1413 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1413 + endOffset: 1548 +- name: 'Implementation Decisions: API Fine-Tuning vs Self-Hosted GPUs' + startOffset: 1548 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1548 + endOffset: 1721 +- name: 'Launch Channels: Twitter, Black Friday Listings, and Early Sales' + startOffset: 1721 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1721 + endOffset: 1891 +- name: Customer Acquisition and Pricing Constraints + startOffset: 1891 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1891 + endOffset: 2095 +- name: 'Motivation: Creative Drive and Why Indie Hacking Matters' + startOffset: 2095 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2095 + endOffset: 2147 +- name: 'Skills Gained: GCP, Data Engineering, Web Dev, and Marketing' + startOffset: 2147 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2147 + endOffset: 2367 +- name: 'Work–Life Balance: Passion, Energy, and Time Management' + startOffset: 2367 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2367 + endOffset: 2547 +- name: 'AboutStartup.io: Blog Concept, Interviews, and Monetization Paths' + startOffset: 2547 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2547 + endOffset: 2739 +- name: 'Idea Generation: Frustration-Led Problems and Opportunity Sourcing' + startOffset: 2739 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2739 + endOffset: 2934 +- name: 'Idea Validation: Competitor Scan, Skills Check, and Build Criteria' + startOffset: 2934 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2934 + endOffset: 3035 +- name: 'Twitter Growth: Personal Branding and Audience Building' + startOffset: 3035 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3035 + endOffset: 3158 +- name: 'Content Strategy: Balancing Data Science and Indie-Hacking Posts' + startOffset: 3158 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3158 + endOffset: 3204 +- name: 'Community Access: Contact Options via Twitter and Slack' + startOffset: 3204 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3204 + endOffset: 3275 +- name: 'Indie Inspiration: Pieter Levels and the “Many Projects” Approach' + startOffset: 3275 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3275 + endOffset: 3430 +- name: 'Recommended Resources: Data Sense and AboutStartup.io' + startOffset: 3430 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3430 + endOffset: 3499 +- name: Episode Wrap-Up and Final Thoughts + startOffset: 3499 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3499 + endOffset: 3567 + transcript: - header: Episode Introduction - header: Episode Introduction @@ -1217,139 +1338,6 @@ transcript: sec: 3567 time: '59:27' who: Pauline -intro: 'How do you build, launch, and actually monetize indie-hacker products in crypto - alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux - — an IBM data science manager and consultant with eight years’ experience who also - ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired - selfie-to-art tool) — walks through the practical steps.

We cover Pauline’s - career path and an ML production case study (money‑laundering detection), then move - into indie-hacking essentials: bootstrapping, splitting time between a full‑time - role and side projects, and validating ideas. You’ll hear how she productized projects - (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API - fine‑tuning vs self‑hosted GPUs), managed operating costs, and launched via Twitter - and niche listings. The conversation also tackles customer acquisition, pricing - constraints, marketing and content strategy, and skills gained across GCP, data - engineering, web dev, and growth.

Listen for actionable guidance on product - launch, monetization, and time management for indie hackers working on crypto alerts - and generative AI—concrete steps to validate, build, and grow side products without - external funding.' -description: 'Build indie-hacking products: launch crypto alerts & generative AI apps, - validate ideas, choose tech, price effectively and monetize for sustainable growth.' -dateadded: '2023-01-21' -duration: PT00H59M27S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=0 - endOffset: 72 -- name: 'Career Journey: Engineering Student to IBM Data Scientist' - startOffset: 72 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=72 - endOffset: 178 -- name: 'Consulting Work: Project Types and Client Engagement' - startOffset: 178 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=178 - endOffset: 290 -- name: 'Manager Role: Deliverables, Roadmaps, and Client Communication' - startOffset: 290 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=290 - endOffset: 331 -- name: 'Case Study: Money-Laundering Detection Project and Deployment' - startOffset: 331 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=331 - endOffset: 443 -- name: 'Indie Hacking Explained: Bootstrapping Without External Funding' - startOffset: 443 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=443 - endOffset: 538 -- name: 'Day Job + Side Projects: Time Allocation and Routine' - startOffset: 538 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=538 - endOffset: 674 -- name: 'Cryptopy Origin: Building Crypto Alerts for Personal Trading' - startOffset: 674 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=674 - endOffset: 909 -- name: 'Productization: Company Setup, Landing Pages, Legal and Payments' - startOffset: 909 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=909 - endOffset: 1125 -- name: 'Technology Choices: Python/Flask, Team Contributions, and Architecture' - startOffset: 1125 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1125 - endOffset: 1173 -- name: 'Marketing Efforts: Audience Reach, Social Channels, and Challenges' - startOffset: 1173 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1173 - endOffset: 1306 -- name: Operating Costs and Niche Product Strategy - startOffset: 1306 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1306 - endOffset: 1413 -- name: 'UnrealMe Origin: DreamBooth Inspiration and Rapid Prototyping' - startOffset: 1413 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1413 - endOffset: 1548 -- name: 'Implementation Decisions: API Fine-Tuning vs Self-Hosted GPUs' - startOffset: 1548 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1548 - endOffset: 1721 -- name: 'Launch Channels: Twitter, Black Friday Listings, and Early Sales' - startOffset: 1721 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1721 - endOffset: 1891 -- name: Customer Acquisition and Pricing Constraints - startOffset: 1891 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1891 - endOffset: 2095 -- name: 'Motivation: Creative Drive and Why Indie Hacking Matters' - startOffset: 2095 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2095 - endOffset: 2147 -- name: 'Skills Gained: GCP, Data Engineering, Web Dev, and Marketing' - startOffset: 2147 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2147 - endOffset: 2367 -- name: 'Work–Life Balance: Passion, Energy, and Time Management' - startOffset: 2367 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2367 - endOffset: 2547 -- name: 'AboutStartup.io: Blog Concept, Interviews, and Monetization Paths' - startOffset: 2547 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2547 - endOffset: 2739 -- name: 'Idea Generation: Frustration-Led Problems and Opportunity Sourcing' - startOffset: 2739 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2739 - endOffset: 2934 -- name: 'Idea Validation: Competitor Scan, Skills Check, and Build Criteria' - startOffset: 2934 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2934 - endOffset: 3035 -- name: 'Twitter Growth: Personal Branding and Audience Building' - startOffset: 3035 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3035 - endOffset: 3158 -- name: 'Content Strategy: Balancing Data Science and Indie-Hacking Posts' - startOffset: 3158 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3158 - endOffset: 3204 -- name: 'Community Access: Contact Options via Twitter and Slack' - startOffset: 3204 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3204 - endOffset: 3275 -- name: 'Indie Inspiration: Pieter Levels and the “Many Projects” Approach' - startOffset: 3275 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3275 - endOffset: 3430 -- name: 'Recommended Resources: Data Sense and AboutStartup.io' - startOffset: 3430 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3430 - endOffset: 3499 -- name: Episode Wrap-Up and Final Thoughts - startOffset: 3499 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3499 - endOffset: 3567 --- Links: diff --git a/_podcast/s14e03-data-strategy-key-principles-and-best-practices.md b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md similarity index 97% rename from _podcast/s14e03-data-strategy-key-principles-and-best-practices.md rename to _podcast/data-strategy-and-dataops-for-ai-powered-products.md index cc6828ab..a93e5f38 100644 --- a/_podcast/s14e03-data-strategy-key-principles-and-best-practices.md +++ b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md @@ -1,20 +1,125 @@ --- +title: 'Actionable Data Strategy & DataOps for AI-Powered Products: Pitch, Measure, Use GPT' +short: 'Data Strategy: Key Principles and Best Practices' +season: 14 episode: 3 guests: - boyanangelov +image: images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg ids: anchor: atatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete youtube: jGbfeYdlCiQ -image: images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete apple: https://podcasts.apple.com/us/podcast/data-strategy-key-principles-and-best-practices-boyan/id1541710331?i=1000614629229 spotify: https://open.spotify.com/episode/7tITQ4nLypogRLUjjK75mx?si=722BlhoLSGuxZlE9ia7VhA youtube: https://www.youtube.com/watch?v=jGbfeYdlCiQ -season: 14 -short: 'Data Strategy: Key Principles and Best Practices' -title: 'Actionable Data Strategy & DataOps for AI-Powered Products: Pitch, Measure, - Use GPT' + +description: 'Master actionable data strategy, DataOps & GPT: learn to pitch small AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products.' +intro: How do you turn AI ambitions into measurable, deliverable data products? In this episode Boyan Angelov — author of Elements of Data Strategy and leader of data strategy at Exxeta AG — walks through practical steps to make data strategy actionable for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused plan and explains the due diligence needed to align business goals with feasible use cases.

Topics covered include use case ideation, feasibility and prioritization, managing influence cascades and scope creep, impact assessment and portfolio management, and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for data — and clarify platform, AI and BI roles and the core skills required for strategists. Boyan also shows how GPT and ChatGPT can be used as a writing co‑pilot for outlines, pitches and technical guidance, and recommends starting small with budgeted use cases plus baseline and post-implementation metrics to measure success.

Listen to get concrete guidance on pitching, measuring and operationalizing a data strategy for AI-powered products — including practical DataOps and GPT workflows you can apply right away +topics: +- data strategy +- dataops +- AI +- data strategy +dateadded: 2023-05-27 + +duration: PT00H56M39S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=0 + endOffset: 113 +- name: Guest Introduction & Current Role + startOffset: 113 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=113 + endOffset: 170 +- name: Guest Background & Career Path + startOffset: 170 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=170 + endOffset: 347 +- name: 'Becoming a Data Strategist: Accidental Transition' + startOffset: 347 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=347 + endOffset: 493 +- name: 'Defining Data Strategy: Actionable, Flexible Plans' + startOffset: 493 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=493 + endOffset: 613 +- name: Due Diligence & Aligning Business Goals + startOffset: 613 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=613 + endOffset: 808 +- name: 'Designing Strategy: Use Case Ideation, Feasibility & Prioritization' + startOffset: 808 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=808 + endOffset: 981 +- name: Influence Cascade & Scope Creep in Data Products + startOffset: 981 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=981 + endOffset: 1102 +- name: Impact Assessment, Portfolio Management & Delivery + startOffset: 1102 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1102 + endOffset: 1410 +- name: Data- and AI-Powered Products Defined + startOffset: 1410 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1410 + endOffset: 1497 +- name: 'DataOps Principles: Lean, Agile & CI/CD Practices' + startOffset: 1497 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1497 + endOffset: 1638 +- name: 'Strategy Roles: Platform, AI and BI Distinctions' + startOffset: 1638 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1638 + endOffset: 1802 +- name: 'Core Skills: Data Knowledge, Communication & Systems Thinking' + startOffset: 1802 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1802 + endOffset: 2195 +- name: 'Path to Data Strategist: Business Fluency & Deliberate Practice' + startOffset: 2195 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2195 + endOffset: 2349 +- name: 'Translation Skill: Explaining Use Cases to Stakeholders' + startOffset: 2349 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2349 + endOffset: 2491 +- name: 'From Strategist to CTO: Ownership, Budgeting & Management' + startOffset: 2491 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2491 + endOffset: 2626 +- name: 'GPT as Writing Co‑Pilot: Sidebars, Editing & Ethical Considerations' + startOffset: 2626 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2626 + endOffset: 2840 +- name: 'GPT for Drafting: Outlines, PowerPoint & Chapter Structure' + startOffset: 2840 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2840 + endOffset: 3062 +- name: 'ChatGPT for Data Strategy: Ideation, Prompting & Tech Guidance' + startOffset: 3062 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3062 + endOffset: 3164 +- name: 'Pitching Strategy: Start Small with a Budgeted Use Case' + startOffset: 3164 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3164 + endOffset: 3332 +- name: 'Baselines & Measurement: Pre- and Post-Implementation Metrics' + startOffset: 3332 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3332 + endOffset: 3416 +- name: 'Recommended Reading: Data Strategy, DataOps & Infonomics' + startOffset: 3416 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3416 + endOffset: 3512 +- name: Episode Wrap-Up & Resources + startOffset: 3512 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3512 + endOffset: 3399 + transcript: - header: Podcast Introduction - header: Guest Introduction & Current Role @@ -1139,119 +1244,6 @@ transcript: sec: 3512 time: '58:32' who: Alexey -description: 'Master actionable data strategy, DataOps & GPT: learn to pitch small - AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products.' -intro: How do you turn AI ambitions into measurable, deliverable data products? In - this episode Boyan Angelov — author of Elements of Data Strategy and leader of data - strategy at Exxeta AG — walks through practical steps to make data strategy actionable - for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, - HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused - plan and explains the due diligence needed to align business goals with feasible - use cases.

Topics covered include use case ideation, feasibility and prioritization, - managing influence cascades and scope creep, impact assessment and portfolio management, - and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for - data — and clarify platform, AI and BI roles and the core skills required for strategists. - Boyan also shows how GPT and ChatGPT can be used as a writing co‑pilot for outlines, - pitches and technical guidance, and recommends starting small with budgeted use - cases plus baseline and post-implementation metrics to measure success.

- Listen to get concrete guidance on pitching, measuring and operationalizing a data - strategy for AI-powered products — including practical DataOps and GPT workflows - you can apply right away. -dateadded: '2023-05-27' -duration: PT00H56M39S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=0 - endOffset: 113 -- name: Guest Introduction & Current Role - startOffset: 113 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=113 - endOffset: 170 -- name: Guest Background & Career Path - startOffset: 170 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=170 - endOffset: 347 -- name: 'Becoming a Data Strategist: Accidental Transition' - startOffset: 347 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=347 - endOffset: 493 -- name: 'Defining Data Strategy: Actionable, Flexible Plans' - startOffset: 493 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=493 - endOffset: 613 -- name: Due Diligence & Aligning Business Goals - startOffset: 613 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=613 - endOffset: 808 -- name: 'Designing Strategy: Use Case Ideation, Feasibility & Prioritization' - startOffset: 808 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=808 - endOffset: 981 -- name: Influence Cascade & Scope Creep in Data Products - startOffset: 981 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=981 - endOffset: 1102 -- name: Impact Assessment, Portfolio Management & Delivery - startOffset: 1102 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1102 - endOffset: 1410 -- name: Data- and AI-Powered Products Defined - startOffset: 1410 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1410 - endOffset: 1497 -- name: 'DataOps Principles: Lean, Agile & CI/CD Practices' - startOffset: 1497 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1497 - endOffset: 1638 -- name: 'Strategy Roles: Platform, AI and BI Distinctions' - startOffset: 1638 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1638 - endOffset: 1802 -- name: 'Core Skills: Data Knowledge, Communication & Systems Thinking' - startOffset: 1802 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1802 - endOffset: 2195 -- name: 'Path to Data Strategist: Business Fluency & Deliberate Practice' - startOffset: 2195 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2195 - endOffset: 2349 -- name: 'Translation Skill: Explaining Use Cases to Stakeholders' - startOffset: 2349 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2349 - endOffset: 2491 -- name: 'From Strategist to CTO: Ownership, Budgeting & Management' - startOffset: 2491 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2491 - endOffset: 2626 -- name: 'GPT as Writing Co‑Pilot: Sidebars, Editing & Ethical Considerations' - startOffset: 2626 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2626 - endOffset: 2840 -- name: 'GPT for Drafting: Outlines, PowerPoint & Chapter Structure' - startOffset: 2840 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2840 - endOffset: 3062 -- name: 'ChatGPT for Data Strategy: Ideation, Prompting & Tech Guidance' - startOffset: 3062 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3062 - endOffset: 3164 -- name: 'Pitching Strategy: Start Small with a Budgeted Use Case' - startOffset: 3164 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3164 - endOffset: 3332 -- name: 'Baselines & Measurement: Pre- and Post-Implementation Metrics' - startOffset: 3332 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3332 - endOffset: 3416 -- name: 'Recommended Reading: Data Strategy, DataOps & Infonomics' - startOffset: 3416 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3416 - endOffset: 3512 -- name: Episode Wrap-Up & Resources - startOffset: 3512 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3512 - endOffset: 3399 --- Links: diff --git a/_podcast/s01e01-roles.md b/_podcast/data-team-roles.md similarity index 98% rename from _podcast/s01e01-roles.md rename to _podcast/data-team-roles.md index 60d51e34..04515d84 100644 --- a/_podcast/s01e01-roles.md +++ b/_podcast/data-team-roles.md @@ -1,15 +1,11 @@ --- -title: 'Data Team Roles Explained: Skills, Responsibilities, and How Teams Ship ML - Products' +title: 'Data Team Roles Explained: Skills, Responsibilities, and How Teams Ship ML Products' short: Roles in a Data Team +season: 1 +episode: 1 guests: - alexeygrigorev image: images/podcast/s01e01-roles.jpg -keywords: data team roles, data scientist, data engineer, machine learning engineer, - data analyst, MLOps engineer, product manager, data team structure, data science - roles, ML engineer vs data engineer, data team responsibilities, data science career -season: 1 -episode: 1 ids: youtube: UukjwSIAnpw anchor: Roles-in-a-data-team---Alexey-Grigorev-emqcft @@ -18,7 +14,22 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Roles-in-a-data-team---Alexey-Grigorev-emqcft spotify: TODO apple: TODO -dateadded: '2021-02-23' + +topics: +- team building +- data teams +- data science +- machine learning +- data analysis +- data engineering +- MLOps +- product management +- leadership +dateadded: 2021-02-23 + + + +keywords: data team roles, data scientist, data engineer, machine learning engineer, data analyst, MLOps engineer, product manager, data team structure, data science roles, ML engineer vs data engineer, data team responsibilities, data science career --- The topic today is the roles in data teams. We want to understand what kind of people work in the data team, what responsibilities they have, what they do, and what they need to know. diff --git a/_podcast/s03e04-effective-communication-with-business.md b/_podcast/data-translator-role-and-data-strategy.md similarity index 97% rename from _podcast/s03e04-effective-communication-with-business.md rename to _podcast/data-translator-role-and-data-strategy.md index 647a4324..0c7ebca3 100644 --- a/_podcast/s03e04-effective-communication-with-business.md +++ b/_podcast/data-translator-role-and-data-strategy.md @@ -1,13 +1,11 @@ --- -title: 'Data Strategist Guide: Effective Communication to Bridge Data Teams & Management - for Data-Driven Growth' -short: 'Data Strategist Guide to Driving Growth: Prototypes, MVPs & Building Data - Trust' +title: 'Data Strategist Guide: Effective Communication to Bridge Data Teams & Management for Data-Driven Growth' +short: 'Data Strategist Guide to Driving Growth: Prototypes, MVPs & Building Data Trust' +season: 3 +episode: 4 guests: - liorbarak image: images/podcast/s03e04-effective-communication-with-business.jpg -season: 3 -episode: 4 ids: youtube: gqroEsTyLD0 anchor: Effective-Communication-with-Business-for-Data-Professionals---Lior-Barak-e1002rm @@ -16,6 +14,121 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Effective-Communication-with-Business-for-Data-Professionals---Lior-Barak-e1002rm spotify: https://open.spotify.com/episode/4RF592cRWxHgcXbx6pV0Ja apple: https://podcasts.apple.com/us/podcast/effective-communication-business-for-data-professionals/id1541710331?i=1000519463715 + +description: Discover how a data translator bridges management and tech to drive data-driven growth—practical data strategy, forecasts, prototypes, and team alignment +intro: 'How do you bridge the gap between data teams and management so analytics actually drives growth? In this episode, Lior Barak — author of "Data is Like a Plate of Hummus," co-host of WHAT the Data?! and founder of Tale About Data with 12+ years building data teams — lays out the role of a data translator: a product-minded strategist who converts technical outputs into business-aligned action.

We explore practical tactics for building data trust (proactive alerts, QA dashboards, and confidence intervals for forecasts), embedding with business teams to learn workflows, and using data-led growth to improve recruitment, marketing, and operations. Lior walks through ways to overcome resistance — hackathons and side projects — and advocates lean delivery: MVPs, prototype-first development, clear handover strategies, and scaling with OKRs. He also covers how to explain effort to non-technical stakeholders, break silos through co-working, and use chat-driven remote collaboration effectively.

Listen to learn concrete approaches for data strategy, data communication, and production-ready delivery that help your organization move from data chaos to measurable, data-driven growth.' +topics: +- data strategy +- communication +- project management +- leadership +- data teams +dateadded: 2021-05-01 + +duration: PT00H57M23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=0 + endOffset: 91 +- name: 'Episode Theme: Bridging Data & Management' + startOffset: 91 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=91 + endOffset: 150 +- name: 'Guest Background: Lior''s data and product journey' + startOffset: 150 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=150 + endOffset: 248 +- name: 'Role Defined: Data strategist as translator between business and tech' + startOffset: 248 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=248 + endOffset: 466 +- name: 'Communication Tactics: Proactive alerts to maintain data trust' + startOffset: 466 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=466 + endOffset: 648 +- name: 'Forecast Transparency: Confidence intervals and QA dashboards' + startOffset: 648 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=648 + endOffset: 795 +- name: 'Translator Profiles: Product-minded data advocates for alignment' + startOffset: 795 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=795 + endOffset: 860 +- name: 'Embedment: Sitting with business to learn workflows and needs' + startOffset: 860 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=860 + endOffset: 1053 +- name: 'Data-led Growth: Using data to improve recruitment, marketing, and ops' + startOffset: 1053 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1053 + endOffset: 1225 +- name: 'Overcoming Resistance: Hackathons and side projects to prove value' + startOffset: 1225 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1225 + endOffset: 1434 +- name: 'Lean Delivery: MVPs, iterative development, and scaling with OKRs' + startOffset: 1434 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1434 + endOffset: 1577 +- name: 'Prototype-first: Embrace imperfect code to validate solutions' + startOffset: 1577 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1577 + endOffset: 1759 +- name: 'Handover Strategy: Creating ownership for productionization' + startOffset: 1759 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1759 + endOffset: 1962 +- name: 'Value over Aesthetics: Message clarity beats polish in early stages' + startOffset: 1962 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1962 + endOffset: 2092 +- name: 'Hummus Metaphor: Quick prototype versus crafted product' + startOffset: 2092 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2092 + endOffset: 2193 +- name: 'Non-technical Stakeholders: Explain effort in plain language' + startOffset: 2193 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2193 + endOffset: 2384 +- name: 'Cross-team Empathy: Breaking silos through co-working and lunches' + startOffset: 2384 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2384 + endOffset: 2575 +- name: 'Remote Collaboration: Chat-driven triggers and selective meeting use' + startOffset: 2575 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2575 + endOffset: 2729 +- name: 'Product Perspectives: Consumers vs engineers — the hummus analogy' + startOffset: 2729 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2729 + endOffset: 3050 +- name: 'Local Anecdote: Recommended hummus spots in Berlin' + startOffset: 3050 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3050 + endOffset: 3096 +- name: 'Book Overview: Purpose of "Data is Like a Plate of Hummus"' + startOffset: 3096 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3096 + endOffset: 3200 +- name: 'Strategy Foundations: Build a stable data ground before models' + startOffset: 3200 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3200 + endOffset: 3349 +- name: 'Data Chaos: Clearing requests, educating users, and leading growth' + startOffset: 3349 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3349 + endOffset: 3460 +- name: 'Resources & Contact: Lior''s LinkedIn, Twitter, and podcast' + startOffset: 3460 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3460 + endOffset: 3481 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3481 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3481 + endOffset: 3443 + transcript: - header: Podcast Introduction - header: 'Episode Theme: Bridging Data & Management' @@ -929,126 +1042,6 @@ transcript: sec: 3534 time: '58:54' who: Lior -description: Discover how a data translator bridges management and tech to drive data-driven - growth—practical data strategy, forecasts, prototypes, and team alignment. -intro: 'How do you bridge the gap between data teams and management so analytics actually - drives growth? In this episode, Lior Barak — author of "Data is Like a Plate of Hummus," - co-host of WHAT the Data?! and founder of Tale About Data with 12+ years building - data teams — lays out the role of a data translator: a product-minded strategist - who converts technical outputs into business-aligned action.

We explore - practical tactics for building data trust (proactive alerts, QA dashboards, and - confidence intervals for forecasts), embedding with business teams to learn workflows, - and using data-led growth to improve recruitment, marketing, and operations. Lior - walks through ways to overcome resistance — hackathons and side projects — and advocates - lean delivery: MVPs, prototype-first development, clear handover strategies, and - scaling with OKRs. He also covers how to explain effort to non-technical stakeholders, - break silos through co-working, and use chat-driven remote collaboration effectively. -

Listen to learn concrete approaches for data strategy, data communication, - and production-ready delivery that help your organization move from data chaos to - measurable, data-driven growth.' -dateadded: '2021-05-01' -duration: PT00H57M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=0 - endOffset: 91 -- name: 'Episode Theme: Bridging Data & Management' - startOffset: 91 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=91 - endOffset: 150 -- name: 'Guest Background: Lior''s data and product journey' - startOffset: 150 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=150 - endOffset: 248 -- name: 'Role Defined: Data strategist as translator between business and tech' - startOffset: 248 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=248 - endOffset: 466 -- name: 'Communication Tactics: Proactive alerts to maintain data trust' - startOffset: 466 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=466 - endOffset: 648 -- name: 'Forecast Transparency: Confidence intervals and QA dashboards' - startOffset: 648 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=648 - endOffset: 795 -- name: 'Translator Profiles: Product-minded data advocates for alignment' - startOffset: 795 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=795 - endOffset: 860 -- name: 'Embedment: Sitting with business to learn workflows and needs' - startOffset: 860 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=860 - endOffset: 1053 -- name: 'Data-led Growth: Using data to improve recruitment, marketing, and ops' - startOffset: 1053 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1053 - endOffset: 1225 -- name: 'Overcoming Resistance: Hackathons and side projects to prove value' - startOffset: 1225 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1225 - endOffset: 1434 -- name: 'Lean Delivery: MVPs, iterative development, and scaling with OKRs' - startOffset: 1434 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1434 - endOffset: 1577 -- name: 'Prototype-first: Embrace imperfect code to validate solutions' - startOffset: 1577 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1577 - endOffset: 1759 -- name: 'Handover Strategy: Creating ownership for productionization' - startOffset: 1759 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1759 - endOffset: 1962 -- name: 'Value over Aesthetics: Message clarity beats polish in early stages' - startOffset: 1962 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1962 - endOffset: 2092 -- name: 'Hummus Metaphor: Quick prototype versus crafted product' - startOffset: 2092 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2092 - endOffset: 2193 -- name: 'Non-technical Stakeholders: Explain effort in plain language' - startOffset: 2193 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2193 - endOffset: 2384 -- name: 'Cross-team Empathy: Breaking silos through co-working and lunches' - startOffset: 2384 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2384 - endOffset: 2575 -- name: 'Remote Collaboration: Chat-driven triggers and selective meeting use' - startOffset: 2575 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2575 - endOffset: 2729 -- name: 'Product Perspectives: Consumers vs engineers — the hummus analogy' - startOffset: 2729 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2729 - endOffset: 3050 -- name: 'Local Anecdote: Recommended hummus spots in Berlin' - startOffset: 3050 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3050 - endOffset: 3096 -- name: 'Book Overview: Purpose of "Data is Like a Plate of Hummus"' - startOffset: 3096 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3096 - endOffset: 3200 -- name: 'Strategy Foundations: Build a stable data ground before models' - startOffset: 3200 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3200 - endOffset: 3349 -- name: 'Data Chaos: Clearing requests, educating users, and leading growth' - startOffset: 3349 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3349 - endOffset: 3460 -- name: 'Resources & Contact: Lior''s LinkedIn, Twitter, and podcast' - startOffset: 3460 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3460 - endOffset: 3481 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3481 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3481 - endOffset: 3443 --- diff --git a/_podcast/s11e03-from-data-science-to-dataops.md b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md similarity index 97% rename from _podcast/s11e03-from-data-science-to-dataops.md rename to _podcast/dataops-and-gitops-best-practices-for-data-teams.md index 7ab1059b..94b606b6 100644 --- a/_podcast/s11e03-from-data-science-to-dataops.md +++ b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md @@ -1,20 +1,149 @@ --- +title: 'DataOps & GitOps for Data Teams: Onboarding, IaC, Reproducibility & Production Best Practices' +short: From Data Science to DataOps +season: 11 episode: 3 guests: - tomaszhinc +image: images/podcast/s11e03-from-data-science-to-dataops.jpg ids: anchor: From-Data-Science-to-DataOps---Tomasz-Hinc-e1p7sjb youtube: lem7knxqNzg -image: images/podcast/s11e03-from-data-science-to-dataops.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Data-Science-to-DataOps---Tomasz-Hinc-e1p7sjb apple: https://podcasts.apple.com/us/podcast/from-data-science-to-dataops-tomasz-hinc/id1541710331?i=1000583457504 spotify: https://open.spotify.com/episode/6jLgdl59sVCdVNJezdIqJY?si=NXasnXtFQVO0KAcCFbvUtQ youtube: https://www.youtube.com/watch?v=lem7knxqNzg -season: 11 -short: From Data Science to DataOps -title: 'DataOps & GitOps for Data Teams: Onboarding, IaC, Reproducibility & Production - Best Practices' + +description: Master DataOps, GitOps and IaC best practices for reproducibility, onboarding and production reliability — actionable Git workflows, Terraform, Docker tips +intro: How do you make data work less fragile and easier to onboard while keeping production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner from Poznań with roots in econometrics, product analytics, data engineering and ML, walks through practical DataOps and GitOps patterns for data teams. We cover platform onboarding (requesting infra vs. merge requests), Infrastructure as Code with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis dry‑run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, dependency management—and common production pitfalls like silent failures and Airflow caveats. You’ll hear about reducing onboarding friction for data scientists, the minimal operational skills every data role benefits from (Git, CLI, IAM), and platform team responsibilities for review, enablement and proactive support. If you’re focused on Infrastructure as Code, GitOps, reproducible pipelines, or practical production best practices for batch workloads and CI migrations, this episode delivers hands‑on advice, learning paths and tooling choices to make your data work faster, safer and more maintainable +topics: +- DataOps +- GitOps +- data teams +- tools +dateadded: 2022-10-22 + +duration: PT01H05M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=0 + endOffset: 100 +- name: Guest Introduction & Episode Overview + startOffset: 100 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=100 + endOffset: 145 +- name: 'Career Journey: Econometrics → ML Trainee → Data Roles' + startOffset: 145 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=145 + endOffset: 271 +- name: 'Early Experience: OLX, Government Statistics, Academia' + startOffset: 271 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=271 + endOffset: 320 +- name: 'ML Education: Multi‑Dimensional Analysis to Machine Learning' + startOffset: 320 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=320 + endOffset: 394 +- name: 'Behavioral Analysis & Product Analytics: Clickstream Modeling' + startOffset: 394 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=394 + endOffset: 428 +- name: 'Operational Realities: ETL Failures, Production Constraints' + startOffset: 428 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=428 + endOffset: 760 +- name: 'Platform Onboarding: Requesting Infra vs. Doing a Merge Request' + startOffset: 760 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=760 + endOffset: 787 +- name: 'Platform Teams’ Role: Review, Enablement, and Safe Practices' + startOffset: 787 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=787 + endOffset: 852 +- name: 'Motivation Shift: From Model‑Centric to Data‑Centric Work' + startOffset: 852 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=852 + endOffset: 1139 +- name: 'Defining DataOps: Enabling Faster, Less Scary Data Work (DataOps, DevOps)' + startOffset: 1139 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1139 + endOffset: 1256 +- name: 'DataOps & Infra: SQL, Secrets, GitOps, and Developer Enablement' + startOffset: 1256 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1256 + endOffset: 1384 +- name: 'GitOps & IaC Overview: Terraform, Terragrunt, Atlantis' + startOffset: 1384 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1384 + endOffset: 1422 +- name: 'Infrastructure as Code: Declarative Configurations & Reproducibility' + startOffset: 1422 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1422 + endOffset: 1581 +- name: 'GitOps Workflow: Branch, Merge Request, Atlantis Dry Run, Apply' + startOffset: 1581 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1581 + endOffset: 1654 +- name: 'Onboarding Friction: Tooling Challenges for Data Scientists' + startOffset: 1654 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1654 + endOffset: 1774 +- name: 'Learning Path: Narrow Scope, Hands‑On Mentorship, Roadmap Advice' + startOffset: 1774 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1774 + endOffset: 2155 +- name: 'Terminal Comfort: Shell Setup, Autocomplete, and Productivity Tweaks' + startOffset: 2155 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2155 + endOffset: 2300 +- name: 'Learning Resources: YouTube, Articles, and CLI Tutorials' + startOffset: 2300 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2300 + endOffset: 2444 +- name: 'DataOps vs Data Engineering: Support & Communication vs Pipeline Coding' + startOffset: 2444 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2444 + endOffset: 2512 +- name: 'Proactive Support: Monitoring, Onboarding, and Cross‑Team Education' + startOffset: 2512 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2512 + endOffset: 2663 +- name: 'Suitable Backgrounds: Any Data Role; Log Reading & Troubleshooting' + startOffset: 2663 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2663 + endOffset: 2875 +- name: 'Minimal Operational Skills: Git, Command Line, IAM, Password Managers' + startOffset: 2875 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2875 + endOffset: 3277 +- name: 'Distinction from Management: Cross‑Team Enablement vs Team Leads' + startOffset: 3277 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3277 + endOffset: 3404 +- name: 'Infrastructure Choices for Data: Batch Workloads, ECS/AWS Batch vs Kubernetes' + startOffset: 3404 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3404 + endOffset: 3506 +- name: 'Company‑Scale Migration: Jenkins → GitLab CI and Broad Collaboration' + startOffset: 3506 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3506 + endOffset: 3687 +- name: 'Reproducibility & Dependencies: Fixed Versions, Docker, Silent Failures' + startOffset: 3687 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3687 + endOffset: 3748 +- name: 'Confidence in Data: Pragmatic Edge‑Case Checks & Airflow Caveats' + startOffset: 3748 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3748 + endOffset: 3941 +- name: Closing Remarks, Resources, and Subscribe Call to Action + startOffset: 3941 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3941 + endOffset: 3909 + transcript: - header: Podcast Introduction - header: Guest Introduction & Episode Overview @@ -980,142 +1109,6 @@ transcript: sec: 4009 time: '1:06:49' who: Alexey -description: Master DataOps, GitOps and IaC best practices for reproducibility, onboarding - and production reliability — actionable Git workflows, Terraform, Docker tips. -intro: How do you make data work less fragile and easier to onboard while keeping - production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner - from Poznań with roots in econometrics, product analytics, data engineering and - ML, walks through practical DataOps and GitOps patterns for data teams. We cover - platform onboarding (requesting infra vs. merge requests), Infrastructure as Code - with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis - dry‑run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, - dependency management—and common production pitfalls like silent failures and Airflow - caveats. You’ll hear about reducing onboarding friction for data scientists, the - minimal operational skills every data role benefits from (Git, CLI, IAM), and platform - team responsibilities for review, enablement and proactive support. If you’re focused - on Infrastructure as Code, GitOps, reproducible pipelines, or practical production - best practices for batch workloads and CI migrations, this episode delivers hands‑on - advice, learning paths and tooling choices to make your data work faster, safer - and more maintainable. -dateadded: '2022-10-22' -duration: PT01H05M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=0 - endOffset: 100 -- name: Guest Introduction & Episode Overview - startOffset: 100 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=100 - endOffset: 145 -- name: 'Career Journey: Econometrics → ML Trainee → Data Roles' - startOffset: 145 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=145 - endOffset: 271 -- name: 'Early Experience: OLX, Government Statistics, Academia' - startOffset: 271 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=271 - endOffset: 320 -- name: 'ML Education: Multi‑Dimensional Analysis to Machine Learning' - startOffset: 320 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=320 - endOffset: 394 -- name: 'Behavioral Analysis & Product Analytics: Clickstream Modeling' - startOffset: 394 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=394 - endOffset: 428 -- name: 'Operational Realities: ETL Failures, Production Constraints' - startOffset: 428 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=428 - endOffset: 760 -- name: 'Platform Onboarding: Requesting Infra vs. Doing a Merge Request' - startOffset: 760 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=760 - endOffset: 787 -- name: 'Platform Teams’ Role: Review, Enablement, and Safe Practices' - startOffset: 787 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=787 - endOffset: 852 -- name: 'Motivation Shift: From Model‑Centric to Data‑Centric Work' - startOffset: 852 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=852 - endOffset: 1139 -- name: 'Defining DataOps: Enabling Faster, Less Scary Data Work (DataOps, DevOps)' - startOffset: 1139 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1139 - endOffset: 1256 -- name: 'DataOps & Infra: SQL, Secrets, GitOps, and Developer Enablement' - startOffset: 1256 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1256 - endOffset: 1384 -- name: 'GitOps & IaC Overview: Terraform, Terragrunt, Atlantis' - startOffset: 1384 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1384 - endOffset: 1422 -- name: 'Infrastructure as Code: Declarative Configurations & Reproducibility' - startOffset: 1422 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1422 - endOffset: 1581 -- name: 'GitOps Workflow: Branch, Merge Request, Atlantis Dry Run, Apply' - startOffset: 1581 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1581 - endOffset: 1654 -- name: 'Onboarding Friction: Tooling Challenges for Data Scientists' - startOffset: 1654 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1654 - endOffset: 1774 -- name: 'Learning Path: Narrow Scope, Hands‑On Mentorship, Roadmap Advice' - startOffset: 1774 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1774 - endOffset: 2155 -- name: 'Terminal Comfort: Shell Setup, Autocomplete, and Productivity Tweaks' - startOffset: 2155 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2155 - endOffset: 2300 -- name: 'Learning Resources: YouTube, Articles, and CLI Tutorials' - startOffset: 2300 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2300 - endOffset: 2444 -- name: 'DataOps vs Data Engineering: Support & Communication vs Pipeline Coding' - startOffset: 2444 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2444 - endOffset: 2512 -- name: 'Proactive Support: Monitoring, Onboarding, and Cross‑Team Education' - startOffset: 2512 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2512 - endOffset: 2663 -- name: 'Suitable Backgrounds: Any Data Role; Log Reading & Troubleshooting' - startOffset: 2663 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2663 - endOffset: 2875 -- name: 'Minimal Operational Skills: Git, Command Line, IAM, Password Managers' - startOffset: 2875 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2875 - endOffset: 3277 -- name: 'Distinction from Management: Cross‑Team Enablement vs Team Leads' - startOffset: 3277 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3277 - endOffset: 3404 -- name: 'Infrastructure Choices for Data: Batch Workloads, ECS/AWS Batch vs Kubernetes' - startOffset: 3404 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3404 - endOffset: 3506 -- name: 'Company‑Scale Migration: Jenkins → GitLab CI and Broad Collaboration' - startOffset: 3506 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3506 - endOffset: 3687 -- name: 'Reproducibility & Dependencies: Fixed Versions, Docker, Silent Failures' - startOffset: 3687 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3687 - endOffset: 3748 -- name: 'Confidence in Data: Pragmatic Edge‑Case Checks & Airflow Caveats' - startOffset: 3748 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3748 - endOffset: 3941 -- name: Closing Remarks, Resources, and Subscribe Call to Action - startOffset: 3941 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3941 - endOffset: 3909 --- Links: diff --git a/_podcast/s08e05-storytime-for-dataops.md b/_podcast/dataops-automation-and-reliable-data-pipelines.md similarity index 98% rename from _podcast/s08e05-storytime-for-dataops.md rename to _podcast/dataops-automation-and-reliable-data-pipelines.md index 1c6eb2ee..8513d5b7 100644 --- a/_podcast/s08e05-storytime-for-dataops.md +++ b/_podcast/dataops-automation-and-reliable-data-pipelines.md @@ -1,39 +1,151 @@ --- -season: 8 -episode: 5 title: 'Mastering DataOps: Automation, Observability & CI/CD for Reliable Data Pipelines' short: Storytime for DataOps -description: 'Master DataOps: automate pipelines, data observability and CI/CD to - cut errors, speed deployments, and deliver reliable, testable data pipelines.' +season: 8 +episode: 5 guests: - christopherbergh -intro: 'How do you build reliable data pipelines that move fast without breaking production? - In this episode, Christopher Bergh — CEO and Head Chef at DataKitchen, co-author - of the DataOps Cookbook and Manifesto, and a 25+-year veteran across research, engineering, - analytics, and leadership — walks through practical approaches to mastering DataOps: - automation, observability, and CI/CD for dependable data delivery.

We cover - core targets like error reduction, deployment cycle time, and team productivity; - the role of data observability and monitoring in catching production errors; and - the trade-offs between “done” and “good.” Chris explains the shift from runbooks - to automated playbooks, an automation-first mindset (“code that acts on data”), - and seven practical steps for healthier pipelines—VC, tests, CI/CD, and more. He - contrasts DataOps with MLOps, argues for end-to-end versioning, and discusses tooling - choices including dbt, Great Expectations, and SQL tests, plus platform orchestration - and governance.

Listen to learn concrete tactics for improving data quality, - shrinking incident toil, proving systems with end-to-end testing, and where to focus - time and tooling to accelerate reliable analytics delivery.' +image: images/podcast/s08e05-storytime-for-dataops.jpg ids: anchor: Storytime-for-DataOps---Christopher-Bergh-e1hgl0m youtube: 0Fx5PCoLkf4 -image: images/podcast/s08e05-storytime-for-dataops.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Storytime-for-DataOps---Christopher-Bergh-e1hgl0m apple: https://podcasts.apple.com/us/podcast/storytime-for-dataops-christopher-bergh/id1541710331?i=1000558399936 spotify: https://open.spotify.com/episode/2PcBsHslUVnjXFhC9hv6zk youtube: https://www.youtube.com/watch?v=0Fx5PCoLkf4 + +description: 'Master DataOps: automate pipelines, data observability and CI/CD to cut errors, speed deployments, and deliver reliable, testable data pipelines.' +intro: 'How do you build reliable data pipelines that move fast without breaking production? In this episode, Christopher Bergh — CEO and Head Chef at DataKitchen, co-author of the DataOps Cookbook and Manifesto, and a 25+-year veteran across research, engineering, analytics, and leadership — walks through practical approaches to mastering DataOps: automation, observability, and CI/CD for dependable data delivery.

We cover core targets like error reduction, deployment cycle time, and team productivity; the role of data observability and monitoring in catching production errors; and the trade-offs between “done” and “good.” Chris explains the shift from runbooks to automated playbooks, an automation-first mindset (“code that acts on data”), and seven practical steps for healthier pipelines—VC, tests, CI/CD, and more. He contrasts DataOps with MLOps, argues for end-to-end versioning, and discusses tooling choices including dbt, Great Expectations, and SQL tests, plus platform orchestration and governance.

Listen to learn concrete tactics for improving data quality, shrinking incident toil, proving systems with end-to-end testing, and where to focus time and tooling to accelerate reliable analytics delivery.' topics: - dataops - practices +dateadded: 2022-04-23 + +duration: PT01H02M23S + +quotableClips: +- name: 'Opening banter: "Father of DataOps" anecdote' + startOffset: 1 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1 + endOffset: 80 +- name: Chris Bergh background and career pivot to data leadership + startOffset: 80 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=80 + endOffset: 121 +- name: 'Transition: from software engineer to managing data teams; factory metaphor' + startOffset: 121 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=121 + endOffset: 255 +- name: 'Factory + Agile: balancing production stability and rapid change' + startOffset: 255 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=255 + endOffset: 402 +- name: 'Core targets: error reduction, deployment cycle time, and team productivity' + startOffset: 402 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=402 + endOffset: 442 +- name: Data observability & monitoring for data quality and production errors + startOffset: 442 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=442 + endOffset: 711 +- name: 'Production quality consequences: detecting and remediating simple failures' + startOffset: 711 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=711 + endOffset: 742 +- name: 'Processes vs tools: leadership, automation, and organizational focus' + startOffset: 742 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=742 + endOffset: 800 +- name: 'Naming the movement: choosing "DataOps" and the DevOps analogy' + startOffset: 800 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=800 + endOffset: 1094 +- name: 'Human impact: stress, blame culture, and owning the process' + startOffset: 1094 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1094 + endOffset: 1196 +- name: 'Defining "done" vs "good": readiness criteria and trade-offs' + startOffset: 1196 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1196 + endOffset: 1262 +- name: 'Heroism vs feedback: early releases and customer iteration' + startOffset: 1262 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1262 + endOffset: 1499 +- name: 'Two iteration loops: customer validation and data/model validity' + startOffset: 1499 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1499 + endOffset: 1694 +- name: 'Optimizing value streams: breaking silos across teams and governance' + startOffset: 1694 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1694 + endOffset: 1883 +- name: 'Deferred-value traps: data lake/cloud hype and postponed outcomes' + startOffset: 1883 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1883 + endOffset: 2027 +- name: 'Seven practical steps for healthier data pipelines: VC, tests, CI/CD' + startOffset: 2027 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2027 + endOffset: 2077 +- name: 'Runbooks to automation: move from checklists to automated playbooks' + startOffset: 2077 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2077 + endOffset: 2233 +- name: 'Automation-first mindset: "code that acts on data" beyond labels' + startOffset: 2233 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2233 + endOffset: 2281 +- name: 'Replaceability: handoffs, documentation, and on-call reduction' + startOffset: 2281 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2281 + endOffset: 2429 +- name: 'Hairball anti-pattern: technical debt, maintainability, and refactoring' + startOffset: 2429 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2429 + endOffset: 2586 +- name: 'Adoption barriers: proving systems with end-to-end testing and data' + startOffset: 2586 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2586 + endOffset: 2652 +- name: Test environments & test data challenges; recommend ~15% time for process + startOffset: 2652 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2652 + endOffset: 2905 +- name: 'Tooling for DataOps: dbt, Great Expectations, SQL tests, and strategies' + startOffset: 2905 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2905 + endOffset: 3042 +- name: 'DataOps vs MLOps: shared DevOps principles applied to models and pipelines' + startOffset: 3042 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3042 + endOffset: 3081 +- name: 'End-to-end versioning: code, models, visualizations, governance as one unit' + startOffset: 3081 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3081 + endOffset: 3213 +- name: 'DataKitchen snapshot: company mission, "Head Chef" role, and team focus' + startOffset: 3213 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3213 + endOffset: 3392 +- name: 'Platform overview: orchestrating environments, tests, and observability' + startOffset: 3392 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3392 + endOffset: 3400 +- name: 'Market context: DataOps vendor landscape and funding trends' + startOffset: 3400 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3400 + endOffset: 3627 +- name: 'Learning resources: DataOps Cookbook, manifesto, courses, and manager guide' + startOffset: 3627 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3627 + endOffset: 3708 +- name: 'Closing remarks: adoption outlook and links to resources' + startOffset: 3708 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3708 + endOffset: 3743 + transcript: - header: 'Opening banter: "Father of DataOps" anecdote' - line: Some people call you the Father of DataOps. @@ -1102,129 +1214,6 @@ transcript: sec: 3744 time: '1:02:24' who: Alexey -dateadded: '2022-04-23' -duration: PT01H02M23S -quotableClips: -- name: 'Opening banter: "Father of DataOps" anecdote' - startOffset: 1 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1 - endOffset: 80 -- name: Chris Bergh background and career pivot to data leadership - startOffset: 80 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=80 - endOffset: 121 -- name: 'Transition: from software engineer to managing data teams; factory metaphor' - startOffset: 121 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=121 - endOffset: 255 -- name: 'Factory + Agile: balancing production stability and rapid change' - startOffset: 255 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=255 - endOffset: 402 -- name: 'Core targets: error reduction, deployment cycle time, and team productivity' - startOffset: 402 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=402 - endOffset: 442 -- name: Data observability & monitoring for data quality and production errors - startOffset: 442 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=442 - endOffset: 711 -- name: 'Production quality consequences: detecting and remediating simple failures' - startOffset: 711 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=711 - endOffset: 742 -- name: 'Processes vs tools: leadership, automation, and organizational focus' - startOffset: 742 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=742 - endOffset: 800 -- name: 'Naming the movement: choosing "DataOps" and the DevOps analogy' - startOffset: 800 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=800 - endOffset: 1094 -- name: 'Human impact: stress, blame culture, and owning the process' - startOffset: 1094 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1094 - endOffset: 1196 -- name: 'Defining "done" vs "good": readiness criteria and trade-offs' - startOffset: 1196 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1196 - endOffset: 1262 -- name: 'Heroism vs feedback: early releases and customer iteration' - startOffset: 1262 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1262 - endOffset: 1499 -- name: 'Two iteration loops: customer validation and data/model validity' - startOffset: 1499 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1499 - endOffset: 1694 -- name: 'Optimizing value streams: breaking silos across teams and governance' - startOffset: 1694 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1694 - endOffset: 1883 -- name: 'Deferred-value traps: data lake/cloud hype and postponed outcomes' - startOffset: 1883 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1883 - endOffset: 2027 -- name: 'Seven practical steps for healthier data pipelines: VC, tests, CI/CD' - startOffset: 2027 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2027 - endOffset: 2077 -- name: 'Runbooks to automation: move from checklists to automated playbooks' - startOffset: 2077 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2077 - endOffset: 2233 -- name: 'Automation-first mindset: "code that acts on data" beyond labels' - startOffset: 2233 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2233 - endOffset: 2281 -- name: 'Replaceability: handoffs, documentation, and on-call reduction' - startOffset: 2281 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2281 - endOffset: 2429 -- name: 'Hairball anti-pattern: technical debt, maintainability, and refactoring' - startOffset: 2429 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2429 - endOffset: 2586 -- name: 'Adoption barriers: proving systems with end-to-end testing and data' - startOffset: 2586 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2586 - endOffset: 2652 -- name: Test environments & test data challenges; recommend ~15% time for process - startOffset: 2652 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2652 - endOffset: 2905 -- name: 'Tooling for DataOps: dbt, Great Expectations, SQL tests, and strategies' - startOffset: 2905 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2905 - endOffset: 3042 -- name: 'DataOps vs MLOps: shared DevOps principles applied to models and pipelines' - startOffset: 3042 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3042 - endOffset: 3081 -- name: 'End-to-end versioning: code, models, visualizations, governance as one unit' - startOffset: 3081 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3081 - endOffset: 3213 -- name: 'DataKitchen snapshot: company mission, "Head Chef" role, and team focus' - startOffset: 3213 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3213 - endOffset: 3392 -- name: 'Platform overview: orchestrating environments, tests, and observability' - startOffset: 3392 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3392 - endOffset: 3400 -- name: 'Market context: DataOps vendor landscape and funding trends' - startOffset: 3400 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3400 - endOffset: 3627 -- name: 'Learning resources: DataOps Cookbook, manifesto, courses, and manager guide' - startOffset: 3627 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3627 - endOffset: 3708 -- name: 'Closing remarks: adoption outlook and links to resources' - startOffset: 3708 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3708 - endOffset: 3743 --- Links: diff --git a/_podcast/s02e11-dataops.md b/_podcast/dataops-principles-and-scalable-data-platforms.md similarity index 97% rename from _podcast/s02e11-dataops.md rename to _podcast/dataops-principles-and-scalable-data-platforms.md index 0a55ce5d..5d9a9a94 100644 --- a/_podcast/s02e11-dataops.md +++ b/_podcast/dataops-principles-and-scalable-data-platforms.md @@ -1,12 +1,11 @@ --- -title: 'DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self‑Service - Lakehouse & Reproducibility' +title: 'DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self‑Service Lakehouse & Reproducibility' short: DataOps 101 +season: 2 +episode: 11 guests: - larsalbertsson image: images/podcast/s02e11-dataops.jpg -season: 2 -episode: 11 ids: youtube: vyF3yGsF6UY anchor: DataOps-101---Lars-Albertsson-ethsp1 @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/DataOps-101---Lars-Albertsson-ethsp1 spotify: https://open.spotify.com/episode/5c2m4FVq4KPCfSXndCAzNd apple: https://podcasts.apple.com/us/podcast/dataops-101-lars-albertsson/id1541710331?i=1000514542438 + +description: Discover DataOps strategies, immutable pipelines & a self-service lakehouse to boost reproducibility, scale data platforms, enable analysts and speed delivery +intro: How do you scale a data platform that supports self‑service analytics while keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, founder of Scling and former Google, Spotify and Schibsted engineer, walks through pragmatic DataOps principles for building scalable data platforms.

We dig into building self‑service at Spotify, orchestration with Luigi, and the core platform components—storage, compute and workflow engines—plus compute choices like Spark, Flink, containers and managed services. Lars explains immutable, functional pipeline design to solve reproducibility problems, contrasts data lakes and warehouses (raw dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, CDC and database versioning strategies. He also explores batch vs streaming trade‑offs, micro‑batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, and risks around data mesh and decentralization.

Listeners will come away with concrete architectural trade‑offs, patterns for immutable pipelines and self‑service lakehouse design, and recommended readings from the Scling list to deepen expertise in DataOps, lineage, versioning and practical data engineering +topics: +- DataOps +- date engineering +- MLOps +dateadded: 2021-03-27 + +duration: PT01H09M27S + +quotableClips: +- name: Episode Opening & Guest Introduction + startOffset: 159 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=159 + endOffset: 218 +- name: 'Career Journey: Google, Spotify, Consulting and Scling' + startOffset: 218 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=218 + endOffset: 472 +- name: 'Scaling Data Teams: Building Self‑Service at Spotify' + startOffset: 472 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=472 + endOffset: 648 +- name: 'Orchestration Spotlight: Luigi as a Data Build System' + startOffset: 648 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=648 + endOffset: 710 +- name: 'DataOps Defined: Enablement, Workflows and People Alignment' + startOffset: 710 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=710 + endOffset: 1002 +- name: 'Data Platform Principles: Immutability & Functional Architecture' + startOffset: 1002 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1002 + endOffset: 1212 +- name: 'Reproducibility Problems: Mutable ETL vs Immutable Pipelines' + startOffset: 1212 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1212 + endOffset: 1289 +- name: 'Data Lake vs Data Warehouse: Raw Data, Aggregates & Use Cases' + startOffset: 1289 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1289 + endOffset: 1409 +- name: 'Data Lake Fundamentals: Object Storage, Governance & Raw Dumps' + startOffset: 1409 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1409 + endOffset: 1702 +- name: 'Ingress & Egress: Offline Processing and Self‑Service SQL' + startOffset: 1702 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1702 + endOffset: 1834 +- name: 'Core Platform Components: Storage, Compute & Workflow Engine' + startOffset: 1834 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1834 + endOffset: 1878 +- name: 'Compute Options: Spark, Flink, Containers and Managed Services' + startOffset: 1878 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1878 + endOffset: 2157 +- name: 'Cloud Trade‑offs: Prepackaged Platforms vs DIY Assembly' + startOffset: 2157 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2157 + endOffset: 2397 +- name: 'Recommended Reading: Lambda Architecture, Practical DataOps & Scling List' + startOffset: 2397 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2397 + endOffset: 2513 +- name: 'Batch vs Streaming: Latency Tradeoffs and Typical Use Cases' + startOffset: 2513 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2513 + endOffset: 2711 +- name: 'Micro‑batching vs Streaming: Dependency Management & Predictability' + startOffset: 2711 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2711 + endOffset: 2812 +- name: 'DataOps Maturity: Test‑Certified Practices, Quality & Schema Automation' + startOffset: 2812 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2812 + endOffset: 3013 +- name: 'Enabling Self‑Service Analytics: Embedding Engineers with Analysts' + startOffset: 3013 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3013 + endOffset: 3211 +- name: 'MLOps vs DataOps: Shared Principles and ML‑Specific Requirements' + startOffset: 3211 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3211 + endOffset: 3466 +- name: 'Data Mesh Overview: Decentralization, Ownership & Governance Risks' + startOffset: 3466 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3466 + endOffset: 3782 +- name: 'Splitting the Platform: When to Decentralize vs Centralize' + startOffset: 3782 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3782 + endOffset: 3858 +- name: 'Lineage & Versioning: Code‑Defined Pipelines vs Catalog Tools' + startOffset: 3858 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3858 + endOffset: 3961 +- name: 'Database Versioning: Full Dumps, CDC (Change Data Capture) Strategies' + startOffset: 3961 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3961 + endOffset: 4072 +- name: 'Lakehouse Architecture: Warehouse Features Layered on Data Lake' + startOffset: 4072 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4072 + endOffset: 4261 +- name: 'Further Resources: Scling Reading List & Presentations' + startOffset: 4261 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4261 + endOffset: 4326 +- name: Episode Closing + startOffset: 4326 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4326 + endOffset: 4167 + transcript: - header: Episode Opening & Guest Introduction - line: This week, we'll talk about data Ops — what is this and how is it different @@ -918,130 +1034,6 @@ transcript: sec: 4326 time: '1:12:06' who: Lars -description: Discover DataOps strategies, immutable pipelines & a self-service lakehouse - to boost reproducibility, scale data platforms, enable analysts and speed delivery. -intro: How do you scale a data platform that supports self‑service analytics while - keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, - founder of Scling and former Google, Spotify and Schibsted engineer, walks through - pragmatic DataOps principles for building scalable data platforms.

We dig - into building self‑service at Spotify, orchestration with Luigi, and the core platform - components—storage, compute and workflow engines—plus compute choices like Spark, - Flink, containers and managed services. Lars explains immutable, functional pipeline - design to solve reproducibility problems, contrasts data lakes and warehouses (raw - dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, - CDC and database versioning strategies. He also explores batch vs streaming trade‑offs, - micro‑batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, - and risks around data mesh and decentralization.

Listeners will come away - with concrete architectural trade‑offs, patterns for immutable pipelines and self‑service - lakehouse design, and recommended readings from the Scling list to deepen expertise - in DataOps, lineage, versioning and practical data engineering. -dateadded: '2021-03-27' -duration: PT01H09M27S -quotableClips: -- name: Episode Opening & Guest Introduction - startOffset: 159 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=159 - endOffset: 218 -- name: 'Career Journey: Google, Spotify, Consulting and Scling' - startOffset: 218 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=218 - endOffset: 472 -- name: 'Scaling Data Teams: Building Self‑Service at Spotify' - startOffset: 472 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=472 - endOffset: 648 -- name: 'Orchestration Spotlight: Luigi as a Data Build System' - startOffset: 648 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=648 - endOffset: 710 -- name: 'DataOps Defined: Enablement, Workflows and People Alignment' - startOffset: 710 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=710 - endOffset: 1002 -- name: 'Data Platform Principles: Immutability & Functional Architecture' - startOffset: 1002 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1002 - endOffset: 1212 -- name: 'Reproducibility Problems: Mutable ETL vs Immutable Pipelines' - startOffset: 1212 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1212 - endOffset: 1289 -- name: 'Data Lake vs Data Warehouse: Raw Data, Aggregates & Use Cases' - startOffset: 1289 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1289 - endOffset: 1409 -- name: 'Data Lake Fundamentals: Object Storage, Governance & Raw Dumps' - startOffset: 1409 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1409 - endOffset: 1702 -- name: 'Ingress & Egress: Offline Processing and Self‑Service SQL' - startOffset: 1702 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1702 - endOffset: 1834 -- name: 'Core Platform Components: Storage, Compute & Workflow Engine' - startOffset: 1834 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1834 - endOffset: 1878 -- name: 'Compute Options: Spark, Flink, Containers and Managed Services' - startOffset: 1878 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1878 - endOffset: 2157 -- name: 'Cloud Trade‑offs: Prepackaged Platforms vs DIY Assembly' - startOffset: 2157 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2157 - endOffset: 2397 -- name: 'Recommended Reading: Lambda Architecture, Practical DataOps & Scling List' - startOffset: 2397 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2397 - endOffset: 2513 -- name: 'Batch vs Streaming: Latency Tradeoffs and Typical Use Cases' - startOffset: 2513 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2513 - endOffset: 2711 -- name: 'Micro‑batching vs Streaming: Dependency Management & Predictability' - startOffset: 2711 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2711 - endOffset: 2812 -- name: 'DataOps Maturity: Test‑Certified Practices, Quality & Schema Automation' - startOffset: 2812 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2812 - endOffset: 3013 -- name: 'Enabling Self‑Service Analytics: Embedding Engineers with Analysts' - startOffset: 3013 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3013 - endOffset: 3211 -- name: 'MLOps vs DataOps: Shared Principles and ML‑Specific Requirements' - startOffset: 3211 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3211 - endOffset: 3466 -- name: 'Data Mesh Overview: Decentralization, Ownership & Governance Risks' - startOffset: 3466 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3466 - endOffset: 3782 -- name: 'Splitting the Platform: When to Decentralize vs Centralize' - startOffset: 3782 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3782 - endOffset: 3858 -- name: 'Lineage & Versioning: Code‑Defined Pipelines vs Catalog Tools' - startOffset: 3858 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3858 - endOffset: 3961 -- name: 'Database Versioning: Full Dumps, CDC (Change Data Capture) Strategies' - startOffset: 3961 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3961 - endOffset: 4072 -- name: 'Lakehouse Architecture: Warehouse Features Layered on Data Lake' - startOffset: 4072 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4072 - endOffset: 4261 -- name: 'Further Resources: Scling Reading List & Presentations' - startOffset: 4261 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4261 - endOffset: 4326 -- name: Episode Closing - startOffset: 4326 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4326 - endOffset: 4167 --- We talked about: diff --git a/_podcast/s07e01-datatalksclub-behind-the-scenes.md b/_podcast/datatalksclub-building-scaling-data-community.md similarity index 97% rename from _podcast/s07e01-datatalksclub-behind-the-scenes.md rename to _podcast/datatalksclub-building-scaling-data-community.md index ca0e28a3..074bf74f 100644 --- a/_podcast/s07e01-datatalksclub-behind-the-scenes.md +++ b/_podcast/datatalksclub-building-scaling-data-community.md @@ -1,13 +1,12 @@ --- -title: 'DataTalks.Club Behind the Scenes: Alexey Grigorev on Scaling and Growing the - Community' +title: 'DataTalks.Club Behind the Scenes: Alexey Grigorev on Scaling and Growing the Community' short: DataTalks.Club Behind the Scenes +season: 7 +episode: 1 guests: - eugeneyan - alexeygrigorev image: images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg -season: 7 -episode: 1 ids: youtube: IxTyq96juVE anchor: DataTalks-Club-Behind-the-Scenes---Eugene-Yan--Alexey-Grigorev-e1d4567 @@ -16,6 +15,116 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/DataTalks-Club-Behind-the-Scenes---Eugene-Yan--Alexey-Grigorev-e1d4567 spotify: https://open.spotify.com/episode/3ltAxUsCE8EAf0pRb9zxDK apple: https://podcasts.apple.com/us/podcast/datatalks-club-behind-the-scenes-eugene-yan-alexey/id1541710331?i=1000548608967 + +description: Discover how to scale a 9k+ data science community, automate events, and advance your machine learning career with deployment, mentorship and growth tactics. +intro: 'How do you scale a grassroots machine learning community from a few forum posts to thousands of active members? In this episode, Alexey Grigorev — founder of DataTalks.Club — sits down with Eugene Yan to walk through the real-world steps behind scaling and growing a machine learning community. Alexey shares his origins (forums, landing page, early events), the growth inflection that led to ~9k members, and practical event formats that work: Open Source Spotlight, Minis, Book of the Week, live coding and office hours.

We cover tactical topics listeners can apply: content production and automation (planning, Zapier, Eventbrite), monetization and sponsorship models, and how project-based offerings like ML Bookcamp and Machine Learning Zoomcamp emphasize end-to-end deployment (Flask, AWS Lambda, Kubernetes, Kubeflow). Alexey also discusses community management lessons — mentoring, product mindset, avoiding tool churn — plus career and productivity advice such as learning-by-projects, public deadlines, and maintaining motivation.

If you’re building or scaling a machine learning community, this episode offers concrete strategies for community growth, event design, content automation, and running project-focused training.' +topics: +- community building +- machine learning +- data science +- data engineering +- MLOps +dateadded: 2022-01-23 + +duration: PT00H56M57S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=0 + endOffset: 9 +- name: 'Career Transition: Java to Machine Learning (Coursera, Andrew Ng)' + startOffset: 9 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=9 + endOffset: 86 +- name: Freelancing, Master's, and first data-science roles; building data pipelines + startOffset: 86 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=86 + endOffset: 306 +- name: 'Career Lessons: step outside comfort zone; product mindset; prefer simple + models' + startOffset: 306 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=306 + endOffset: 387 +- name: 'Principal Data Scientist Role: internal consulting, architecture, mentoring' + startOffset: 387 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=387 + endOffset: 576 +- name: 'Motivation to Start the Community: early interactions and LinkedIn outreach' + startOffset: 576 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=576 + endOffset: 605 +- name: 'Community Origins: forums, landing page, first events and format inspiration' + startOffset: 605 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=605 + endOffset: 1014 +- name: 'Community Growth & Events: conference boost and scaling to ~9k members' + startOffset: 1014 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1014 + endOffset: 1222 +- name: 'Content Production & Automation: planning, scheduling, Zapier, Eventbrite' + startOffset: 1222 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1222 + endOffset: 1478 +- name: 'Event Formats: Open Source Spotlight, Minis, Book of the Week' + startOffset: 1478 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1478 + endOffset: 1671 +- name: 'Notable Guests & Popular Episodes: Martin Kleppmann, Elena Samuylova, Santiago' + startOffset: 1671 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1671 + endOffset: 1897 +- name: 'Monetization & Sponsorship: costs, TopCoder, Toloka crowdsourcing workshop' + startOffset: 1897 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1897 + endOffset: 2302 +- name: 'ML Bookcamp & Machine Learning Zoomcamp: project-based, end-to-end learning' + startOffset: 2302 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2302 + endOffset: 2346 +- name: 'Deployment Focus in the Book/Course: Flask, AWS Lambda, Kubernetes, Kubeflow' + startOffset: 2346 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2346 + endOffset: 2569 +- name: 'Career Advice: join communities, answer questions, find mentors' + startOffset: 2569 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2569 + endOffset: 2635 +- name: 'Motivation & Persistence: handling frustration and sustaining interest' + startOffset: 2635 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2635 + endOffset: 2740 +- name: 'Tool Evaluation Strategy: avoid tool churn, follow lasting trends, Kedro + curiosity' + startOffset: 2740 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2740 + endOffset: 2936 +- name: 'Productivity & Workflow: public deadlines, accountability, batching work' + startOffset: 2936 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2936 + endOffset: 3031 +- name: 'Learning by Projects & Notes: just-in-time learning, Notion, READMEs, GitHub + docs' + startOffset: 3031 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3031 + endOffset: 3184 +- name: 'Community Inspiration & Format Ideas: borrowing from ML Ops and JavaRanch' + startOffset: 3184 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3184 + endOffset: 3307 +- name: 'Interactive Formats: live coding, office hours, ML Zoomcamp sessions' + startOffset: 3307 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3307 + endOffset: 3356 +- name: Community Thanks & Future Plans + startOffset: 3356 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3356 + endOffset: 3410 +- name: Podcast Closing + startOffset: 3410 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3410 + endOffset: 3417 + transcript: - header: Podcast Introduction - header: 'Career Transition: Java to Machine Learning (Coursera, Andrew Ng)' @@ -929,122 +1038,6 @@ transcript: sec: 3426 time: '57:06' who: Alexey -description: 'Discover a machine learning career from Java: build data pipelines, - finish ML Zoomcamp projects, deploy with Flask & Kubernetes, and grow a 9k community.' -intro: 'How do you move from a Java background into a sustainable machine learning - career while building production-ready data pipelines and scaling a learning community? - In this episode, Alexey Grigorev, founder of DataTalks.Club, and Eugene Yan, Applied - Scientist at Amazon, walk through real-world steps for that transition—from taking - Andrew Ng’s Coursera course to first data-science roles, freelancing, and master’s - programs.

They discuss building pragmatic data pipelines, adopting a product - mindset, and why simple models often win in production. Learn what a principal data - scientist actually does—internal consulting, architecture, and mentoring—and how - that perspective shaped community efforts that grew to roughly 9,000 members. Alexey - explains the origins and formats of DataTalks.Club events, conference-driven growth, - and the automation behind content production (Zapier, Eventbrite).

You’ll - also get an inside look at project-based learning with ML Bookcamp / Machine Learning - Zoomcamp, deployment focus (Flask, AWS Lambda, Kubernetes, Kubeflow), monetization - and sponsorship realities, and practical career tactics: joining communities, finding - mentors, and learning by projects. Tune in for actionable guidance on machine learning - career development, ML in production, and community building.' -dateadded: '2022-01-23' -duration: PT00H56M57S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=0 - endOffset: 9 -- name: 'Career Transition: Java to Machine Learning (Coursera, Andrew Ng)' - startOffset: 9 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=9 - endOffset: 86 -- name: Freelancing, Master's, and first data-science roles; building data pipelines - startOffset: 86 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=86 - endOffset: 306 -- name: 'Career Lessons: step outside comfort zone; product mindset; prefer simple - models' - startOffset: 306 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=306 - endOffset: 387 -- name: 'Principal Data Scientist Role: internal consulting, architecture, mentoring' - startOffset: 387 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=387 - endOffset: 576 -- name: 'Motivation to Start the Community: early interactions and LinkedIn outreach' - startOffset: 576 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=576 - endOffset: 605 -- name: 'Community Origins: forums, landing page, first events and format inspiration' - startOffset: 605 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=605 - endOffset: 1014 -- name: 'Community Growth & Events: conference boost and scaling to ~9k members' - startOffset: 1014 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1014 - endOffset: 1222 -- name: 'Content Production & Automation: planning, scheduling, Zapier, Eventbrite' - startOffset: 1222 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1222 - endOffset: 1478 -- name: 'Event Formats: Open Source Spotlight, Minis, Book of the Week' - startOffset: 1478 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1478 - endOffset: 1671 -- name: 'Notable Guests & Popular Episodes: Martin Kleppmann, Elena Samuylova, Santiago' - startOffset: 1671 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1671 - endOffset: 1897 -- name: 'Monetization & Sponsorship: costs, TopCoder, Toloka crowdsourcing workshop' - startOffset: 1897 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1897 - endOffset: 2302 -- name: 'ML Bookcamp & Machine Learning Zoomcamp: project-based, end-to-end learning' - startOffset: 2302 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2302 - endOffset: 2346 -- name: 'Deployment Focus in the Book/Course: Flask, AWS Lambda, Kubernetes, Kubeflow' - startOffset: 2346 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2346 - endOffset: 2569 -- name: 'Career Advice: join communities, answer questions, find mentors' - startOffset: 2569 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2569 - endOffset: 2635 -- name: 'Motivation & Persistence: handling frustration and sustaining interest' - startOffset: 2635 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2635 - endOffset: 2740 -- name: 'Tool Evaluation Strategy: avoid tool churn, follow lasting trends, Kedro - curiosity' - startOffset: 2740 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2740 - endOffset: 2936 -- name: 'Productivity & Workflow: public deadlines, accountability, batching work' - startOffset: 2936 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2936 - endOffset: 3031 -- name: 'Learning by Projects & Notes: just-in-time learning, Notion, READMEs, GitHub - docs' - startOffset: 3031 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3031 - endOffset: 3184 -- name: 'Community Inspiration & Format Ideas: borrowing from ML Ops and JavaRanch' - startOffset: 3184 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3184 - endOffset: 3307 -- name: 'Interactive Formats: live coding, office hours, ML Zoomcamp sessions' - startOffset: 3307 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3307 - endOffset: 3356 -- name: Community Thanks & Future Plans - startOffset: 3356 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3356 - endOffset: 3410 -- name: Podcast Closing - startOffset: 3410 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3410 - endOffset: 3417 --- Links: diff --git a/_podcast/s15e03-llms-for-everyone.md b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md similarity index 97% rename from _podcast/s15e03-llms-for-everyone.md rename to _podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md index 2dcb4930..f686fbf3 100644 --- a/_podcast/s15e03-llms-for-everyone.md +++ b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md @@ -1,20 +1,144 @@ --- +title: 'Deploying LLMs in Production: Fine-Tuning, Retrieval & Open-Source vs API Tradeoffs' +short: LLMs for Everyone +season: 15 episode: 3 guests: - meryemarik +image: images/podcast/s15e03-llms-for-everyone.jpg ids: anchor: atatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf youtube: 6dn6uZFkk04 -image: images/podcast/s15e03-llms-for-everyone.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf apple: https://podcasts.apple.com/us/podcast/llms-for-everyone-meryem-arik/id1541710331?i=1000622675129 spotify: https://open.spotify.com/episode/0tmi2ytNk1bEPldcbhkvhN?si=DtU2OM3RTFmPBdY8sFCv5g youtube: https://www.youtube.com/watch?v=6dn6uZFkk04 -season: 15 -short: LLMs for Everyone -title: 'Deploying LLMs in Production: Fine-Tuning, Retrieval & Open-Source vs API - Tradeoffs' + +description: 'Discover LLM deployment tactics: fine-tuning, retrieval and open-source vs API tradeoffs to cut latency, control costs, and ground production models.' +intro: 'How do you take large language models from experiment to reliable production—balancing fine-tuning, retrieval strategies, and the tradeoffs between open‑source models and API services? In this episode, Meryem Arik, a recovering physicist and co‑founder of TitanML, walks through practical choices for LLM deployment based on her pivot from computer vision to building tools that make models smaller, cheaper, and easier to run in production.

We cover model fundamentals and selection (classification vs generative tasks), open‑source model options like LLaMA, FLAN‑T5, Falcon and MPT, and the operational realities of serving: model size, compression, inference optimization, latency and cost tradeoffs. Meryem explains when to prototype with GPT‑3.5/4 APIs versus self‑hosting, the risks of API model drift, and why fine‑tuning or retrieval‑augmented generation often beats continuous retraining. You’ll also get a clear breakdown of retrieval patterns, vector databases for semantic search, dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing architectures, reducing costs, and grounding answers reliably with retrieval.' +dateadded: 2023-07-29 + +duration: PT00H59M31S + +quotableClips: +- name: 'Episode Introduction: LLMs for Everyone' + startOffset: 0 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=0 + endOffset: 67 +- name: 'Guest Introduction: Meryem Arik and TitanML' + startOffset: 67 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=67 + endOffset: 105 +- name: 'Career Journey: Theoretical Physics → Banking → Tech' + startOffset: 105 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=105 + endOffset: 133 +- name: 'Founding TitanML: pivot from computer vision to LLM deployability' + startOffset: 133 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=133 + endOffset: 289 +- name: 'Startup Realities: co-founder roles, operations, and tradeoffs' + startOffset: 289 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=289 + endOffset: 402 +- name: 'Early LLM Interest: customer-driven pivot and GPT‑3 experience' + startOffset: 402 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=402 + endOffset: 557 +- name: 'ChatGPT Breakthrough: conversational interface and accessibility' + startOffset: 557 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=557 + endOffset: 624 +- name: 'LLM Fundamentals: generative vs. non‑generative models and transformers' + startOffset: 624 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=624 + endOffset: 704 +- name: 'Model Selection: classification tasks vs. generative tasks' + startOffset: 704 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=704 + endOffset: 825 +- name: 'Open‑source Model Landscape: LLaMA, FLAN‑T5, Falcon, MPT' + startOffset: 825 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=825 + endOffset: 885 +- name: 'Why LLMs Matter: handling unstructured text at scale' + startOffset: 885 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=885 + endOffset: 1008 +- name: 'Open‑source vs API Models: control, privacy, and fine‑tuning benefits' + startOffset: 1008 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1008 + endOffset: 1126 +- name: 'Model Drift & API Risk: hidden model changes and production impact' + startOffset: 1126 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1126 + endOffset: 1417 +- name: 'TitanML Product Suite: Train, Optimized, and Takeoff server' + startOffset: 1417 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1417 + endOffset: 1526 +- name: 'Serving Challenges: model size, compression, and inference optimization' + startOffset: 1526 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1526 + endOffset: 1590 +- name: 'Fine‑tuning Purpose: specialization, domain adaptation, and tone' + startOffset: 1590 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1590 + endOffset: 1898 +- name: 'Fine‑tuning Generative Models: data formats and end‑task considerations' + startOffset: 1898 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1898 + endOffset: 2038 +- name: 'Workforce Impact: productivity gains and job disruption scenarios' + startOffset: 2038 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2038 + endOffset: 2446 +- name: 'Dealing with Changing Knowledge: retrieval over continuous retraining' + startOffset: 2446 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2446 + endOffset: 2522 +- name: 'Grounding Answers: indexing docs and retrieval‑augmented responses' + startOffset: 2522 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2522 + endOffset: 2802 +- name: 'Retrieval Patterns: injecting passages, summarizers, and grounding layers' + startOffset: 2802 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2802 + endOffset: 2881 +- name: 'Vector Databases Explained: embeddings, indexing, and semantic search' + startOffset: 2881 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2881 + endOffset: 2984 +- name: 'Prototyping vs Production: when to use GPT‑3.5/4 APIs vs open‑source LLMs' + startOffset: 2984 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2984 + endOffset: 3095 +- name: 'Latency & Cost Tradeoffs: self‑hosting performance and hardware choices' + startOffset: 3095 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3095 + endOffset: 3214 +- name: 'Data Quality Metrics: gold‑standard examples and output‑driven evaluation' + startOffset: 3214 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3214 + endOffset: 3332 +- name: 'Dataset Expansion: LLM‑assisted augmentation for training data' + startOffset: 3332 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3332 + endOffset: 3399 +- name: 'Evaluation & Benchmarking: classification vs generative metrics and human review' + startOffset: 3399 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3399 + endOffset: 3548 +- name: 'Learning Resources: Hugging Face, Cohere LLM University, community content' + startOffset: 3548 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3548 + endOffset: 3638 +- name: Episode Close and Final Remarks + startOffset: 3638 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3638 + endOffset: 3571 + transcript: - header: 'Episode Introduction: LLMs for Everyone' - header: 'Guest Introduction: Meryem Arik and TitanML' @@ -1164,143 +1288,6 @@ transcript: sec: 3638 time: '1:00:38' who: Alexey -description: 'Discover LLM deployment tactics: fine-tuning, retrieval and open-source - vs API tradeoffs to cut latency, control costs, and ground production models.' -intro: 'How do you take large language models from experiment to reliable production—balancing - fine-tuning, retrieval strategies, and the tradeoffs between open‑source models - and API services? In this episode, Meryem Arik, a recovering physicist and co‑founder - of TitanML, walks through practical choices for LLM deployment based on her pivot - from computer vision to building tools that make models smaller, cheaper, and easier - to run in production.

We cover model fundamentals and selection (classification - vs generative tasks), open‑source model options like LLaMA, FLAN‑T5, Falcon and - MPT, and the operational realities of serving: model size, compression, inference - optimization, latency and cost tradeoffs. Meryem explains when to prototype with - GPT‑3.5/4 APIs versus self‑hosting, the risks of API model drift, and why fine‑tuning - or retrieval‑augmented generation often beats continuous retraining. You’ll also - get a clear breakdown of retrieval patterns, vector databases for semantic search, - dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff - product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing - architectures, reducing costs, and grounding answers reliably with retrieval.' -dateadded: '2023-07-29' -duration: PT00H59M31S -quotableClips: -- name: 'Episode Introduction: LLMs for Everyone' - startOffset: 0 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=0 - endOffset: 67 -- name: 'Guest Introduction: Meryem Arik and TitanML' - startOffset: 67 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=67 - endOffset: 105 -- name: 'Career Journey: Theoretical Physics → Banking → Tech' - startOffset: 105 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=105 - endOffset: 133 -- name: 'Founding TitanML: pivot from computer vision to LLM deployability' - startOffset: 133 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=133 - endOffset: 289 -- name: 'Startup Realities: co-founder roles, operations, and tradeoffs' - startOffset: 289 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=289 - endOffset: 402 -- name: 'Early LLM Interest: customer-driven pivot and GPT‑3 experience' - startOffset: 402 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=402 - endOffset: 557 -- name: 'ChatGPT Breakthrough: conversational interface and accessibility' - startOffset: 557 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=557 - endOffset: 624 -- name: 'LLM Fundamentals: generative vs. non‑generative models and transformers' - startOffset: 624 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=624 - endOffset: 704 -- name: 'Model Selection: classification tasks vs. generative tasks' - startOffset: 704 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=704 - endOffset: 825 -- name: 'Open‑source Model Landscape: LLaMA, FLAN‑T5, Falcon, MPT' - startOffset: 825 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=825 - endOffset: 885 -- name: 'Why LLMs Matter: handling unstructured text at scale' - startOffset: 885 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=885 - endOffset: 1008 -- name: 'Open‑source vs API Models: control, privacy, and fine‑tuning benefits' - startOffset: 1008 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1008 - endOffset: 1126 -- name: 'Model Drift & API Risk: hidden model changes and production impact' - startOffset: 1126 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1126 - endOffset: 1417 -- name: 'TitanML Product Suite: Train, Optimized, and Takeoff server' - startOffset: 1417 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1417 - endOffset: 1526 -- name: 'Serving Challenges: model size, compression, and inference optimization' - startOffset: 1526 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1526 - endOffset: 1590 -- name: 'Fine‑tuning Purpose: specialization, domain adaptation, and tone' - startOffset: 1590 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1590 - endOffset: 1898 -- name: 'Fine‑tuning Generative Models: data formats and end‑task considerations' - startOffset: 1898 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1898 - endOffset: 2038 -- name: 'Workforce Impact: productivity gains and job disruption scenarios' - startOffset: 2038 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2038 - endOffset: 2446 -- name: 'Dealing with Changing Knowledge: retrieval over continuous retraining' - startOffset: 2446 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2446 - endOffset: 2522 -- name: 'Grounding Answers: indexing docs and retrieval‑augmented responses' - startOffset: 2522 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2522 - endOffset: 2802 -- name: 'Retrieval Patterns: injecting passages, summarizers, and grounding layers' - startOffset: 2802 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2802 - endOffset: 2881 -- name: 'Vector Databases Explained: embeddings, indexing, and semantic search' - startOffset: 2881 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2881 - endOffset: 2984 -- name: 'Prototyping vs Production: when to use GPT‑3.5/4 APIs vs open‑source LLMs' - startOffset: 2984 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2984 - endOffset: 3095 -- name: 'Latency & Cost Tradeoffs: self‑hosting performance and hardware choices' - startOffset: 3095 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3095 - endOffset: 3214 -- name: 'Data Quality Metrics: gold‑standard examples and output‑driven evaluation' - startOffset: 3214 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3214 - endOffset: 3332 -- name: 'Dataset Expansion: LLM‑assisted augmentation for training data' - startOffset: 3332 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3332 - endOffset: 3399 -- name: 'Evaluation & Benchmarking: classification vs generative metrics and human - review' - startOffset: 3399 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3399 - endOffset: 3548 -- name: 'Learning Resources: Hugging Face, Cohere LLM University, community content' - startOffset: 3548 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3548 - endOffset: 3638 -- name: Episode Close and Final Remarks - startOffset: 3638 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3638 - endOffset: 3571 --- Links: diff --git a/_podcast/s03e07-market-yourself.md b/_podcast/developer-personal-brand-learn-in-public.md similarity index 98% rename from _podcast/s03e07-market-yourself.md rename to _podcast/developer-personal-brand-learn-in-public.md index 973d6d85..bd137f16 100644 --- a/_podcast/s03e07-market-yourself.md +++ b/_podcast/developer-personal-brand-learn-in-public.md @@ -1,12 +1,11 @@ --- title: 'Learn in Public: Personal Branding & Career Marketing for Developers' short: 'Learn in Public: Personal Branding & Career Marketing for Developers' +season: 3 +episode: 7 guests: - swyx image: images/podcast/s03e07-market-yourself.jpg -season: 3 -episode: 7 -date: 2025-11-07 ids: youtube: tkBCPqWKCL8 anchor: How-to-Market-Yourself-without-Being-a-Celebrity---Shawn-Swyx-Wang-e11ai8t @@ -15,6 +14,136 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/How-to-Market-Yourself-without-Being-a-Celebrity---Shawn-Swyx-Wang-e11ai8t spotify: https://open.spotify.com/episode/6uLyKxpVZv0wItCNyGPdAN apple: https://podcasts.apple.com/us/podcast/how-to-market-yourself-without-being-celebrity-shawn/id1541710331?i=1000522670386 + +description: 'Discover personal branding & career marketing for devs: learn-in-public tactics, niche choice and internal promotion to boost visibility and land promotions.' +intro: 'How do developers build visibility, earn promotions, and steer their careers by learning in public? In this episode, Shawn Swyx Wang — Senior Developer Advocate for AWS Amplify, author of The Coding Career Handbook, and former engineer at Netlify and Temporal — walks through a practical framework for personal branding and career marketing for developers. We unpack why self-marketing matters beyond job hunting and the five-part personal marketing framework: brand, domain, value, skills, and channel.

You''ll hear concrete guidance on choosing and validating a niche (meetups, conferences, community signals), building an owned platform (blog, newsletter, mailing list), and distribution tactics from early social growth to the engagement move "pick up what they put down." Swyx also covers career transition strategies, hiring portfolios and case studies, internal pathways like lateral moves and signature initiatives, and creating reusable talks and demos. Practical tools discussed include brag documents, demos for internal promotion, and open knowledge projects as visibility builders. Tune in to get actionable steps to craft a developer personal brand, grow influence, and apply learn-in-public tactics to advance your career and job opportunities.' +topics: +- personal brand +- career growth +- career transition +dateadded: 2021-05-22 +date: 2025-11-07 + +duration: PT01H02M41S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=0 + endOffset: 69 +- name: 'Guest Overview: Swyx and the learn in public movement' + startOffset: 69 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=69 + endOffset: 144 +- name: 'Career Journey: finance to coding, Netlify, AWS, Temporal' + startOffset: 144 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=144 + endOffset: 376 +- name: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' + startOffset: 376 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=376 + endOffset: 513 +- name: 'Marketing Beyond Job Hunting: open source and internal persuasion' + startOffset: 513 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=513 + endOffset: 626 +- name: 'Personal Marketing Framework: brand, domain, value, skills, channel' + startOffset: 626 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=626 + endOffset: 756 +- name: 'Personal Brand for Non‑star Developers: find distinctiveness' + startOffset: 756 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=756 + endOffset: 787 +- name: 'Brand Consistency: photo, name, and repeated impressions' + startOffset: 787 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=787 + endOffset: 1123 +- name: 'Domain Selection: choosing topics to write and speak about' + startOffset: 1123 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1123 + endOffset: 1272 +- name: 'Niche Strategy: choosing the right level of specialization' + startOffset: 1272 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1272 + endOffset: 1352 +- name: 'Validating a Niche: meetups, conferences, and community signals' + startOffset: 1352 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1352 + endOffset: 1433 +- name: 'Learn in Public: honest progress, corrections, and earned expertise' + startOffset: 1433 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1433 + endOffset: 1554 +- name: 'Owned Platforms & Blogging: mailing lists, newsletters, and personal site' + startOffset: 1554 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1554 + endOffset: 1632 +- name: 'Starting Distribution: social media to drive people to your site' + startOffset: 1632 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1632 + endOffset: 1827 +- name: 'Engagement Tactic: Pick up what they put down to get noticed' + startOffset: 1827 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1827 + endOffset: 1962 +- name: 'Early Social Media Growth: tactics for initial visibility' + startOffset: 1962 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1962 + endOffset: 1991 +- name: 'Career Transition Strategies: students, career changers, mutual value exchange' + startOffset: 1991 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1991 + endOffset: 2195 +- name: 'Focused Applications: targeted research over mass applying' + startOffset: 2195 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2195 + endOffset: 2310 +- name: 'Hiring Portfolio: unsolicited redesigns, product clones, and case studies' + startOffset: 2310 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2310 + endOffset: 2540 +- name: 'Internal Pathways: lateral entry and internal transfers' + startOffset: 2540 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2540 + endOffset: 2597 +- name: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' + startOffset: 2597 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2597 + endOffset: 2743 +- name: 'Process Summary: discover, learn in public, work, and iterate' + startOffset: 2743 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2743 + endOffset: 2834 +- name: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' + startOffset: 2834 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2834 + endOffset: 3070 +- name: 'Internal Promotion Tools: brag document, demos, and networking' + startOffset: 3070 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3070 + endOffset: 3256 +- name: 'Signature Initiative: company‑wide projects that build influence' + startOffset: 3256 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3256 + endOffset: 3429 +- name: 'Internal Content Strategy: applying external marketing tactics inside' + startOffset: 3429 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3429 + endOffset: 3544 +- name: 'Public Speaking: creating reusable talks and practicing communication' + startOffset: 3544 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3544 + endOffset: 3717 +- name: 'Book & Resources: The Coding Career Handbook, newsletter, and discount' + startOffset: 3717 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3717 + endOffset: 3791 +- name: 'Final Takeaway: non‑technical skills dominate engineering ladders' + startOffset: 3791 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3791 + endOffset: 3761 + transcript: - header: 'Guest Overview: Swyx and the learn in public movement' - line: This week we will talk about marketing ourselves. We have a special guest @@ -1544,142 +1673,6 @@ transcript: sec: 3830 time: '1:03:50' who: Alexey -intro: 'How do developers build visibility, earn promotions, and steer their careers - by learning in public? In this episode, Shawn Swyx Wang — Senior Developer Advocate - for AWS Amplify, author of The Coding Career Handbook, and former engineer at Netlify - and Temporal — walks through a practical framework for personal branding and career - marketing for developers. We unpack why self-marketing matters beyond job hunting - and the five-part personal marketing framework: brand, domain, value, skills, and - channel.

You''ll hear concrete guidance on choosing and validating a niche - (meetups, conferences, community signals), building an owned platform (blog, newsletter, - mailing list), and distribution tactics from early social growth to the engagement - move "pick up what they put down." Swyx also covers career transition strategies, - hiring portfolios and case studies, internal pathways like lateral moves and signature - initiatives, and creating reusable talks and demos. Practical tools discussed include - brag documents, demos for internal promotion, and open knowledge projects as visibility - builders. Tune in to get actionable steps to craft a developer personal brand, grow - influence, and apply learn-in-public tactics to advance your career and job opportunities.' -description: 'Discover personal branding & career marketing for devs: learn-in-public - tactics, niche choice and internal promotion to boost visibility and land promotions.' -dateadded: '2021-05-22' -duration: PT01H02M41S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=0 - endOffset: 69 -- name: 'Guest Overview: Swyx and the learn in public movement' - startOffset: 69 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=69 - endOffset: 144 -- name: 'Career Journey: finance to coding, Netlify, AWS, Temporal' - startOffset: 144 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=144 - endOffset: 376 -- name: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' - startOffset: 376 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=376 - endOffset: 513 -- name: 'Marketing Beyond Job Hunting: open source and internal persuasion' - startOffset: 513 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=513 - endOffset: 626 -- name: 'Personal Marketing Framework: brand, domain, value, skills, channel' - startOffset: 626 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=626 - endOffset: 756 -- name: 'Personal Brand for Non‑star Developers: find distinctiveness' - startOffset: 756 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=756 - endOffset: 787 -- name: 'Brand Consistency: photo, name, and repeated impressions' - startOffset: 787 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=787 - endOffset: 1123 -- name: 'Domain Selection: choosing topics to write and speak about' - startOffset: 1123 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1123 - endOffset: 1272 -- name: 'Niche Strategy: choosing the right level of specialization' - startOffset: 1272 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1272 - endOffset: 1352 -- name: 'Validating a Niche: meetups, conferences, and community signals' - startOffset: 1352 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1352 - endOffset: 1433 -- name: 'Learn in Public: honest progress, corrections, and earned expertise' - startOffset: 1433 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1433 - endOffset: 1554 -- name: 'Owned Platforms & Blogging: mailing lists, newsletters, and personal site' - startOffset: 1554 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1554 - endOffset: 1632 -- name: 'Starting Distribution: social media to drive people to your site' - startOffset: 1632 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1632 - endOffset: 1827 -- name: 'Engagement Tactic: Pick up what they put down to get noticed' - startOffset: 1827 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1827 - endOffset: 1962 -- name: 'Early Social Media Growth: tactics for initial visibility' - startOffset: 1962 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1962 - endOffset: 1991 -- name: 'Career Transition Strategies: students, career changers, mutual value exchange' - startOffset: 1991 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1991 - endOffset: 2195 -- name: 'Focused Applications: targeted research over mass applying' - startOffset: 2195 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2195 - endOffset: 2310 -- name: 'Hiring Portfolio: unsolicited redesigns, product clones, and case studies' - startOffset: 2310 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2310 - endOffset: 2540 -- name: 'Internal Pathways: lateral entry and internal transfers' - startOffset: 2540 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2540 - endOffset: 2597 -- name: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' - startOffset: 2597 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2597 - endOffset: 2743 -- name: 'Process Summary: discover, learn in public, work, and iterate' - startOffset: 2743 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2743 - endOffset: 2834 -- name: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' - startOffset: 2834 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2834 - endOffset: 3070 -- name: 'Internal Promotion Tools: brag document, demos, and networking' - startOffset: 3070 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3070 - endOffset: 3256 -- name: 'Signature Initiative: company‑wide projects that build influence' - startOffset: 3256 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3256 - endOffset: 3429 -- name: 'Internal Content Strategy: applying external marketing tactics inside' - startOffset: 3429 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3429 - endOffset: 3544 -- name: 'Public Speaking: creating reusable talks and practicing communication' - startOffset: 3544 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3544 - endOffset: 3717 -- name: 'Book & Resources: The Coding Career Handbook, newsletter, and discount' - startOffset: 3717 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3717 - endOffset: 3791 -- name: 'Final Takeaway: non‑technical skills dominate engineering ladders' - startOffset: 3791 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3791 - endOffset: 3761 --- Links: diff --git a/_podcast/s02e02-developer-advocacy.md b/_podcast/devrel-data-science-open-source-tools.md similarity index 97% rename from _podcast/s02e02-developer-advocacy.md rename to _podcast/devrel-data-science-open-source-tools.md index 44df949e..31c858c4 100644 --- a/_podcast/s02e02-developer-advocacy.md +++ b/_podcast/devrel-data-science-open-source-tools.md @@ -1,11 +1,11 @@ --- title: 'DevRel for Data Science: Build Community, Create Content, and Grow Your Career' short: Developer Advocacy for Data Science +season: 2 +episode: 2 guests: - elleobrien image: images/podcast/s02e02-developer-advocacy.jpg -season: 2 -episode: 2 ids: youtube: jv5W4jXk4P4 anchor: Developer-Advocacy-for-Data-Science---Elle-OBrien-epcbak @@ -14,6 +14,132 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Developer-Advocacy-for-Data-Science---Elle-OBrien-epcbak spotify: https://open.spotify.com/episode/6Hq0ZGPTkDk1h8orfCU78I apple: https://podcasts.apple.com/us/podcast/developer-advocacy-for-data-science-elle-obrien/id1541710331?i=1000506315396 + +description: 'Discover DevRel tactics for Data Science: community growth, reproducibility, and content strategy—practical metrics, safety practices, and career growth tips.' +intro: How do you practice developer relations for data science while balancing reproducibility, community growth, and content strategy? In this episode, Elle O’Brien — a data scientist at Iterative (working on DVC and CML) and a lecturer at the University of Michigan with a PhD in neuroscience and computational modeling from UW — walks through practical DevRel for data-focused tools and teaching.

We cover her shift from a viral StyleGAN project into DevRel, the scope of a solo developer advocate (product work, docs, PRs, videos, hiring), and how she prioritizes releases versus evergreen content. Elle shares promotion tactics (Hacker News, Reddit, social), approaches to community safety and moderation, and the emotional realities of online work. She explains community metrics, role distinctions between DevRel/advocate/evangelist, and core skills like technical credibility and rapid learning. We also dig into content strategy for teaching—curriculum design, reusable video content, recording lectures as open educational resources, and practical ways to get started blogging and building a developer portfolio.

Listen to gain actionable guidance on community growth, reproducibility best practices, content planning, and the trade-offs of DevRel work in open source data science +topics: +- developer relations +- data science +- machine learning +- open-source +dateadded: 2021-02-23 + +duration: PT00H55M15S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=0 + endOffset: 177 +- name: 'Introduction: Developer Advocacy for Data Science — Elle O''Brien (Iterative, + DVC, CML)' + startOffset: 177 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=177 + endOffset: 250 +- name: 'Background: Neuroscience research, PhD, and computational modeling' + startOffset: 250 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=250 + endOffset: 470 +- name: 'Teaching focus: Applied Data Science curriculum & research reproducibility' + startOffset: 470 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=470 + endOffset: 573 +- name: 'Career pivot: Viral StyleGAN project to DevRel role at Iterative' + startOffset: 573 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=573 + endOffset: 740 +- name: 'Role scope: product work, CML, docs, PRs, videos, and hiring' + startOffset: 740 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=740 + endOffset: 902 +- name: 'Prioritization as a solo DevRel: scheduling releases vs evergreen content' + startOffset: 902 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=902 + endOffset: 1004 +- name: 'Release promotion: real‑time engagement on Hacker News, Reddit, and social + media' + startOffset: 1004 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1004 + endOffset: 1074 +- name: 'Managing toxicity: choosing communities and setting boundaries' + startOffset: 1074 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1074 + endOffset: 1187 +- name: 'DevRel job realities: content creation, community management, and support + trade‑offs' + startOffset: 1187 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1187 + endOffset: 1431 +- name: 'Community feedback: being the product signal and user insight channel' + startOffset: 1431 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1431 + endOffset: 1561 +- name: 'Community metrics: signals, analytics, and full‑time analysis potential' + startOffset: 1561 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1561 + endOffset: 1624 +- name: 'Role distinctions: DevRel, developer advocate, and evangelist explained' + startOffset: 1624 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1624 + endOffset: 1735 +- name: 'Risks of DevRel: online abuse, burnout, and public scrutiny' + startOffset: 1735 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1735 + endOffset: 1885 +- name: 'Safety practices: anonymity, moderation, and peer solidarity' + startOffset: 1885 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1885 + endOffset: 2068 +- name: 'Rewards of DevRel: visibility, speaking invites, and career opportunities' + startOffset: 2068 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2068 + endOffset: 2211 +- name: 'Core skills: technical credibility, rapid learning, and clear communication' + startOffset: 2211 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2211 + endOffset: 2371 +- name: 'Nontechnical pathways: learning in public and building a portfolio' + startOffset: 2371 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2371 + endOffset: 2532 +- name: 'Getting started: blogging, tutorials, Twitter, and content examples' + startOffset: 2532 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2532 + endOffset: 2605 +- name: 'Backgrounds & personality: creativity, humor, and relatability in DevRel' + startOffset: 2605 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2605 + endOffset: 2715 +- name: 'Assessing fit: hobby vs job, community inclination, and time commitment' + startOffset: 2715 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2715 + endOffset: 2886 +- name: 'Audience growth: metrics, growth‑hacking versus sustainable strategies' + startOffset: 2886 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2886 + endOffset: 3019 +- name: 'Branding & engagement: mascots, tone, and consistent visuals (Divi owl)' + startOffset: 3019 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3019 + endOffset: 3126 +- name: 'Teaching & DevRel synergy: curriculum design and reusable video content' + startOffset: 3126 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3126 + endOffset: 3286 +- name: 'University plans: recording lectures and open educational resources on YouTube' + startOffset: 3286 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3286 + endOffset: 3399 +- name: 'Closing thoughts: encouraging diverse DevRels and where to follow Elle (Twitter, + YouTube, LinkedIn)' + startOffset: 3399 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3399 + endOffset: 3492 +- name: Episode Wrap‑up and Farewell + startOffset: 3492 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3492 + endOffset: 3315 + transcript: - header: Podcast Introduction - header: 'Introduction: Developer Advocacy for Data Science — Elle O''Brien (Iterative, @@ -966,137 +1092,4 @@ transcript: sec: 3492 time: '58:12' who: Alexey -description: 'Discover DevRel tactics for Data Science: community growth, reproducibility, - and content strategy—practical metrics, safety practices, and career growth tips.' -intro: How do you practice developer relations for data science while balancing reproducibility, - community growth, and content strategy? In this episode, Elle O’Brien — a data scientist - at Iterative (working on DVC and CML) and a lecturer at the University of Michigan - with a PhD in neuroscience and computational modeling from UW — walks through practical - DevRel for data-focused tools and teaching.

We cover her shift from a viral - StyleGAN project into DevRel, the scope of a solo developer advocate (product work, - docs, PRs, videos, hiring), and how she prioritizes releases versus evergreen content. - Elle shares promotion tactics (Hacker News, Reddit, social), approaches to community - safety and moderation, and the emotional realities of online work. She explains - community metrics, role distinctions between DevRel/advocate/evangelist, and core - skills like technical credibility and rapid learning. We also dig into content strategy - for teaching—curriculum design, reusable video content, recording lectures as open - educational resources, and practical ways to get started blogging and building a - developer portfolio.

Listen to gain actionable guidance on community growth, - reproducibility best practices, content planning, and the trade-offs of DevRel work - in open source data science. -dateadded: '2021-02-23' -duration: PT00H55M15S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=0 - endOffset: 177 -- name: 'Introduction: Developer Advocacy for Data Science — Elle O''Brien (Iterative, - DVC, CML)' - startOffset: 177 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=177 - endOffset: 250 -- name: 'Background: Neuroscience research, PhD, and computational modeling' - startOffset: 250 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=250 - endOffset: 470 -- name: 'Teaching focus: Applied Data Science curriculum & research reproducibility' - startOffset: 470 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=470 - endOffset: 573 -- name: 'Career pivot: Viral StyleGAN project to DevRel role at Iterative' - startOffset: 573 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=573 - endOffset: 740 -- name: 'Role scope: product work, CML, docs, PRs, videos, and hiring' - startOffset: 740 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=740 - endOffset: 902 -- name: 'Prioritization as a solo DevRel: scheduling releases vs evergreen content' - startOffset: 902 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=902 - endOffset: 1004 -- name: 'Release promotion: real‑time engagement on Hacker News, Reddit, and social - media' - startOffset: 1004 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1004 - endOffset: 1074 -- name: 'Managing toxicity: choosing communities and setting boundaries' - startOffset: 1074 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1074 - endOffset: 1187 -- name: 'DevRel job realities: content creation, community management, and support - trade‑offs' - startOffset: 1187 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1187 - endOffset: 1431 -- name: 'Community feedback: being the product signal and user insight channel' - startOffset: 1431 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1431 - endOffset: 1561 -- name: 'Community metrics: signals, analytics, and full‑time analysis potential' - startOffset: 1561 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1561 - endOffset: 1624 -- name: 'Role distinctions: DevRel, developer advocate, and evangelist explained' - startOffset: 1624 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1624 - endOffset: 1735 -- name: 'Risks of DevRel: online abuse, burnout, and public scrutiny' - startOffset: 1735 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1735 - endOffset: 1885 -- name: 'Safety practices: anonymity, moderation, and peer solidarity' - startOffset: 1885 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1885 - endOffset: 2068 -- name: 'Rewards of DevRel: visibility, speaking invites, and career opportunities' - startOffset: 2068 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2068 - endOffset: 2211 -- name: 'Core skills: technical credibility, rapid learning, and clear communication' - startOffset: 2211 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2211 - endOffset: 2371 -- name: 'Nontechnical pathways: learning in public and building a portfolio' - startOffset: 2371 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2371 - endOffset: 2532 -- name: 'Getting started: blogging, tutorials, Twitter, and content examples' - startOffset: 2532 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2532 - endOffset: 2605 -- name: 'Backgrounds & personality: creativity, humor, and relatability in DevRel' - startOffset: 2605 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2605 - endOffset: 2715 -- name: 'Assessing fit: hobby vs job, community inclination, and time commitment' - startOffset: 2715 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2715 - endOffset: 2886 -- name: 'Audience growth: metrics, growth‑hacking versus sustainable strategies' - startOffset: 2886 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2886 - endOffset: 3019 -- name: 'Branding & engagement: mascots, tone, and consistent visuals (Divi owl)' - startOffset: 3019 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3019 - endOffset: 3126 -- name: 'Teaching & DevRel synergy: curriculum design and reusable video content' - startOffset: 3126 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3126 - endOffset: 3286 -- name: 'University plans: recording lectures and open educational resources on YouTube' - startOffset: 3286 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3286 - endOffset: 3399 -- name: 'Closing thoughts: encouraging diverse DevRels and where to follow Elle (Twitter, - YouTube, LinkedIn)' - startOffset: 3399 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3399 - endOffset: 3492 -- name: Episode Wrap‑up and Farewell - startOffset: 3492 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3492 - endOffset: 3315 --- diff --git a/_podcast/s14e06-data-developer-relations.md b/_podcast/devrel-open-source-machine-learning.md similarity index 97% rename from _podcast/s14e06-data-developer-relations.md rename to _podcast/devrel-open-source-machine-learning.md index d3e82846..61aa1f9d 100644 --- a/_podcast/s14e06-data-developer-relations.md +++ b/_podcast/devrel-open-source-machine-learning.md @@ -1,19 +1,135 @@ --- +title: 'DevRel Role for Machine Learning: ML Ecosystems, Open-Source Governance & Developer Experience with Metaflow' +short: DevRel Role for Machine Learning +season: 14 episode: 6 guests: - hugobowneanderson +image: images/podcast/s14e06-data-developer-relations.jpg ids: anchor: ow/datatalksclub/episodes/Data-Developer-Relations---Hugo-Bowne-Anderson-e25q88q youtube: z7BvslwVRbQ -image: images/podcast/s14e06-data-developer-relations.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Data-Developer-Relations---Hugo-Bowne-Anderson-e25q88q apple: https://podcasts.apple.com/us/podcast/data-developer-relations-hugo-bowne-anderson/id1541710331?i=1000617298688 spotify: https://open.spotify.com/episode/7bVCKqn9fLt6ETq8hxId5V?si=GZSC3NbvRuyXD85iOQo51Q youtube: https://www.youtube.com/watch?v=z7BvslwVRbQ -season: 14 -short: Data Developer Relations -title: 'Master Full-Stack ML with Metaflow: DevRel, Open-Source Governance & AI Trends' + +description: 'Explore the role of developer relations for machine learning: ML ecosystems, open-source governance and developer experience with Metaflow.' +intro: How do you build effective developer relations for machine learning ecosystems while navigating open-source governance and enhancing developer experience? In this episode, Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime educator and podcast host — demonstrates Metaflow's capabilities and shares practical guidance for building reproducible ML workflows. Drawing on his background at Coiled and DataCamp and his experience teaching and creating courses, Hugo explores ML ecosystem integrations (AWS, Kubernetes, Argo), interoperability considerations, and company support models for open-source projects like Dask and Metaflow.

We dive into the DevRel career path, essential skills (technical fluency, writing, community building), organizational structures, and how developer feedback and dogfooding enhance documentation and reproducibility. Hugo discusses generative AI's impact on ML infrastructure and DevRel practices, AI-assisted content creation tools like Whisper and ChatGPT, and strategic approaches to tutorials, blogs, and conference talks. Listen to gain actionable insights on ML ecosystem development, improving developer experience with Metaflow, and aligning DevRel strategies with open-source governance to support scalable machine learning infrastructure +dateadded: 2023-06-17 +topics: +- developer relations +- machine learning +- open-source +duration: PT00H57M52S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=0 + endOffset: 93 +- name: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' + startOffset: 93 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=93 + endOffset: 134 +- name: Metaflow Sandbox Demo & Full‑Stack Machine Learning Spotlight + startOffset: 134 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=134 + endOffset: 216 +- name: 'Career Path: From Biophysics Research to Data Science Education' + startOffset: 216 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=216 + endOffset: 226 +- name: Building Courses, Open‑Source Collaboration & DataCamp Impact + startOffset: 226 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=226 + endOffset: 647 +- name: 'Open‑Source Governance: Company Support for Projects (Dask, Metaflow)' + startOffset: 647 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=647 + endOffset: 832 +- name: 'Metaflow Integrations: AWS, Kubernetes, Argo & ML Interoperability' + startOffset: 832 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=832 + endOffset: 874 +- name: 'Path to DevRel: Education to Developer Advocacy' + startOffset: 874 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=874 + endOffset: 1083 +- name: 'DevRel Explained: Education, Documentation & the "Wisdom Layer"' + startOffset: 1083 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1083 + endOffset: 1372 +- name: 'DevRel Organizational Models: Reporting Lines & Technical Alignment' + startOffset: 1372 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1372 + endOffset: 1517 +- name: 'Developer Collaboration: Feedback Loops, Documentation & Dogfooding' + startOffset: 1517 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1517 + endOffset: 1570 +- name: 'Generative AI Trends: Impacts on ML Infrastructure & DevRel' + startOffset: 1570 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1570 + endOffset: 1637 +- name: 'Marketing Partnership: SEO, Content Strategy & Audience Targeting' + startOffset: 1637 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1637 + endOffset: 1901 +- name: 'Core DevRel Skills: Technical Fluency, Writing & Community Building' + startOffset: 1901 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1901 + endOffset: 2090 +- name: 'Role Trade‑offs: Content Work vs Internal Data Science' + startOffset: 2090 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2090 + endOffset: 2187 +- name: 'Teaching Reproducibility: Dogfooding and Simplifying Workflows' + startOffset: 2187 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2187 + endOffset: 2241 +- name: 'Improving Writing: Practice, Collaboration & Editorial Feedback' + startOffset: 2241 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2241 + endOffset: 2417 +- name: 'AI‑Assisted Drafting: Whisper, ChatGPT & Productivity Tools' + startOffset: 2417 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2417 + endOffset: 2594 +- name: 'Content Design: Audience, Goals & Structural Outlines for Tutorials' + startOffset: 2594 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2594 + endOffset: 2769 +- name: 'Content Goals: Awareness, Support & Open‑Source Strategy Decisions' + startOffset: 2769 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2769 + endOffset: 2923 +- name: 'Choosing Media: Blog Posts, Talks, Videos, Conferences & ROI' + startOffset: 2923 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2923 + endOffset: 3102 +- name: 'Scope of DevRel: Internal Enablement, External Outreach & Partnerships' + startOffset: 3102 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3102 + endOffset: 3271 +- name: 'Career Advice: GitHub Portfolios, Meetups & Experimenting in DevRel' + startOffset: 3271 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3271 + endOffset: 3372 +- name: 'Long‑Form Conversations: Vanishing Gradients Podcast Overview' + startOffset: 3372 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3372 + endOffset: 3503 +- name: 'Contact & Resources: Hugo Online, Outerbounds Slack & Links' + startOffset: 3503 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3503 + endOffset: 3565 +- name: Closing Remarks & Episode Wrap‑Up + startOffset: 3565 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3565 + endOffset: 3472 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' @@ -1061,130 +1177,6 @@ transcript: sec: 3565 time: '59:25' who: Alexey -description: 'Master Metaflow, DevRel and full-stack ML: demo, AWS/Kubernetes integrations, - open-source governance and career tips to build reproducible ML pipelines.' -intro: How do you master full‑stack machine learning with Metaflow while balancing - open‑source governance, developer relations, and fast‑moving AI trends? In this - episode Hugo Bowne‑Anderson — Head of Developer Relations at Outerbounds, longtime - educator and podcast host — walks through a Metaflow sandbox demo and practical - guidance for building reproducible machine learning workflows. Drawing on his background - at Coiled and DataCamp and his experience teaching and creating courses, Hugo breaks - down Metaflow integrations (AWS, Kubernetes, Argo), interoperability considerations, - and company support models for projects like Dask and Metaflow.

We also - explore the path to DevRel, core DevRel skills (technical fluency, writing, community - building), organizational models, and how developer feedback and dogfooding improve - documentation and reproducibility. Hugo discusses generative AI’s impacts on ML - infrastructure and DevRel, AI‑assisted drafting tools like Whisper and ChatGPT, - and content strategy for tutorials, blogs, and talks. Listen to gain concrete takeaways - on mastering full‑stack ML with Metaflow, improving reproducibility, and aligning - DevRel and open‑source governance to support scalable machine learning infrastructure. -dateadded: '2023-06-17' -duration: PT00H57M52S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=0 - endOffset: 93 -- name: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' - startOffset: 93 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=93 - endOffset: 134 -- name: Metaflow Sandbox Demo & Full‑Stack Machine Learning Spotlight - startOffset: 134 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=134 - endOffset: 216 -- name: 'Career Path: From Biophysics Research to Data Science Education' - startOffset: 216 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=216 - endOffset: 226 -- name: Building Courses, Open‑Source Collaboration & DataCamp Impact - startOffset: 226 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=226 - endOffset: 647 -- name: 'Open‑Source Governance: Company Support for Projects (Dask, Metaflow)' - startOffset: 647 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=647 - endOffset: 832 -- name: 'Metaflow Integrations: AWS, Kubernetes, Argo & ML Interoperability' - startOffset: 832 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=832 - endOffset: 874 -- name: 'Path to DevRel: Education to Developer Advocacy' - startOffset: 874 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=874 - endOffset: 1083 -- name: 'DevRel Explained: Education, Documentation & the "Wisdom Layer"' - startOffset: 1083 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1083 - endOffset: 1372 -- name: 'DevRel Organizational Models: Reporting Lines & Technical Alignment' - startOffset: 1372 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1372 - endOffset: 1517 -- name: 'Developer Collaboration: Feedback Loops, Documentation & Dogfooding' - startOffset: 1517 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1517 - endOffset: 1570 -- name: 'Generative AI Trends: Impacts on ML Infrastructure & DevRel' - startOffset: 1570 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1570 - endOffset: 1637 -- name: 'Marketing Partnership: SEO, Content Strategy & Audience Targeting' - startOffset: 1637 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1637 - endOffset: 1901 -- name: 'Core DevRel Skills: Technical Fluency, Writing & Community Building' - startOffset: 1901 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1901 - endOffset: 2090 -- name: 'Role Trade‑offs: Content Work vs Internal Data Science' - startOffset: 2090 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2090 - endOffset: 2187 -- name: 'Teaching Reproducibility: Dogfooding and Simplifying Workflows' - startOffset: 2187 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2187 - endOffset: 2241 -- name: 'Improving Writing: Practice, Collaboration & Editorial Feedback' - startOffset: 2241 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2241 - endOffset: 2417 -- name: 'AI‑Assisted Drafting: Whisper, ChatGPT & Productivity Tools' - startOffset: 2417 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2417 - endOffset: 2594 -- name: 'Content Design: Audience, Goals & Structural Outlines for Tutorials' - startOffset: 2594 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2594 - endOffset: 2769 -- name: 'Content Goals: Awareness, Support & Open‑Source Strategy Decisions' - startOffset: 2769 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2769 - endOffset: 2923 -- name: 'Choosing Media: Blog Posts, Talks, Videos, Conferences & ROI' - startOffset: 2923 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2923 - endOffset: 3102 -- name: 'Scope of DevRel: Internal Enablement, External Outreach & Partnerships' - startOffset: 3102 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3102 - endOffset: 3271 -- name: 'Career Advice: GitHub Portfolios, Meetups & Experimenting in DevRel' - startOffset: 3271 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3271 - endOffset: 3372 -- name: 'Long‑Form Conversations: Vanishing Gradients Podcast Overview' - startOffset: 3372 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3372 - endOffset: 3503 -- name: 'Contact & Resources: Hugo Online, Outerbounds Slack & Links' - startOffset: 3503 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3503 - endOffset: 3565 -- name: Closing Remarks & Episode Wrap‑Up - startOffset: 3565 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3565 - endOffset: 3472 --- Links: diff --git a/_podcast/s05e09-business-acumen.md b/_podcast/feature-engineering-model-monitoring-and-data-governance.md similarity index 97% rename from _podcast/s05e09-business-acumen.md rename to _podcast/feature-engineering-model-monitoring-and-data-governance.md index cb20d1f1..eeba2942 100644 --- a/_podcast/s05e09-business-acumen.md +++ b/_podcast/feature-engineering-model-monitoring-and-data-governance.md @@ -1,12 +1,11 @@ --- -title: 'Practical Data Science & ML: Feature Engineering, Model Monitoring, Data Governance - & Storytelling' +title: 'Practical Data Science & ML: Feature Engineering, Model Monitoring, Data Governance & Storytelling' short: Building Business Acumen for Data Professionals +season: 5 +episode: 9 guests: - thomives image: images/podcast/s05e09-business-acumen.jpg -season: 5 -episode: 9 ids: youtube: pImYf9ML95Q anchor: Building-Business-Acumen-for-Data-Professionals---Thom-Ives-e19gq91 @@ -15,6 +14,112 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-Business-Acumen-for-Data-Professionals---Thom-Ives-e19gq91 spotify: https://open.spotify.com/episode/4dFbkQI9pF4wUDueZFqxGY apple: https://podcasts.apple.com/us/podcast/building-business-acumen-for-data-professionals-thom-ives/id1541710331?i=1000540181044 + +description: 'Master feature engineering, model monitoring & data governance: ML tactics to prevent drift, boost performance, and sharpen data storytelling.' +intro: How do you move from models that look good on paper to reliable machine learning in production—while keeping data clean and stakeholders aligned? In this episode Thom Ives, founder of Integrated Machine Learning & AI and a veteran data scientist, walks through practical approaches to feature engineering, model monitoring, data governance, and data storytelling. Thom draws on a career spanning industry roles and mentoring to contrast concept-focused learning versus specialist detail work, and to explain why business acumen and role clarity matter for data teams.

You’ll hear concrete guidance on ETL reliability, closing data collection gaps, and shared responsibility for data governance and literacy. Thom breaks down the ML pipeline—from feature conditioning, scaling, selection, and engineered features to addressing collinearity with PCA and pursuing model parsimony. He also covers model selection trade-offs, spotting data drift and concept drift in production, and the maintenance needed for long-term generalizability. Finally, he emphasizes analytical storytelling and persuasion skills for influencing decisions, plus community resources and mentoring through Integrated ML & AI. Tune in for actionable tactics to improve data quality, monitoring, and stakeholder communication in real-world data science +topics: +- data science +- machine learning +- ai +- data engineering +dateadded: 2021-10-30 + +duration: PT01H05M17S + +quotableClips: +- name: Episode Introduction & Guest Thom Ives + startOffset: 75 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=75 + endOffset: 110 +- name: Concept-focused learning vs. detail specialization + startOffset: 110 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=110 + endOffset: 201 +- name: 'Career journey: naval nuclear program, grad school, early AI' + startOffset: 201 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=201 + endOffset: 311 +- name: 'Industry roles: HP, ON Semiconductor, SaaS AI work' + startOffset: 311 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=311 + endOffset: 532 +- name: 'Mentoring & community building: integrated mentoring origins' + startOffset: 532 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=532 + endOffset: 552 +- name: Why business acumen matters for data professionals + startOffset: 552 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=552 + endOffset: 651 +- name: 'Role clarity: data scientist versus domain expert' + startOffset: 651 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=651 + endOffset: 819 +- name: Rapid delivery & customer-centric feedback (MVP / tracer bullet) + startOffset: 819 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=819 + endOffset: 1172 +- name: ETL reliability, data collection gaps, and advocating for clean data + startOffset: 1172 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1172 + endOffset: 1299 +- name: 'Shared responsibility: data governance and data literacy' + startOffset: 1299 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1299 + endOffset: 1432 +- name: 'Data-driven vs. data-informed: definitions and practical balance' + startOffset: 1432 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1432 + endOffset: 1689 +- name: Analytical skills & data storytelling before modeling + startOffset: 1689 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1689 + endOffset: 1881 +- name: 'Machine learning development pipeline: feature conditioning to modeling' + startOffset: 1881 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1881 + endOffset: 2094 +- name: Feature scaling, selection, and engineered features for business insight + startOffset: 2094 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2094 + endOffset: 2446 +- name: Addressing collinearity with PCA and pursuing parsimony + startOffset: 2446 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2446 + endOffset: 2753 +- name: 'Model selection: accuracy, variance, and generalizability' + startOffset: 2753 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2753 + endOffset: 2850 +- name: 'Monitoring models in production: data drift, concept drift, and maintenance' + startOffset: 2850 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2850 + endOffset: 2968 +- name: 'Essential business skills: explainability, persuasion, and influence' + startOffset: 2968 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2968 + endOffset: 3042 +- name: 'Relationship-building: informal check‑ins, lunch & beer networking' + startOffset: 3042 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3042 + endOffset: 3349 +- name: 'Remote rapport: virtual lunches, video calls, and building camaraderie' + startOffset: 3349 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3349 + endOffset: 3516 +- name: 'Integrated ML & AI community: structure, ethos, and free resources' + startOffset: 3516 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3516 + endOffset: 3816 +- name: Joining the Slack community and accessing resources + startOffset: 3816 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3816 + endOffset: 3955 +- name: Episode wrap-up and final takeaways + startOffset: 3955 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3955 + endOffset: 3917 + transcript: - header: Episode Introduction & Guest Thom Ives - line: This week, we'll talk about business acumen. We have a special guest today, @@ -1030,120 +1135,6 @@ transcript: sec: 3992 time: '1:06:32' who: Thom -description: 'Master feature engineering, model monitoring & data governance: ML tactics - to prevent drift, boost performance, and sharpen data storytelling.' -intro: How do you move from models that look good on paper to reliable machine learning - in production—while keeping data clean and stakeholders aligned? In this episode - Thom Ives, founder of Integrated Machine Learning & AI and a veteran data scientist, - walks through practical approaches to feature engineering, model monitoring, data - governance, and data storytelling. Thom draws on a career spanning industry roles - and mentoring to contrast concept-focused learning versus specialist detail work, - and to explain why business acumen and role clarity matter for data teams.

- You’ll hear concrete guidance on ETL reliability, closing data collection gaps, - and shared responsibility for data governance and literacy. Thom breaks down the - ML pipeline—from feature conditioning, scaling, selection, and engineered features - to addressing collinearity with PCA and pursuing model parsimony. He also covers - model selection trade-offs, spotting data drift and concept drift in production, - and the maintenance needed for long-term generalizability. Finally, he emphasizes - analytical storytelling and persuasion skills for influencing decisions, plus community - resources and mentoring through Integrated ML & AI. Tune in for actionable tactics - to improve data quality, monitoring, and stakeholder communication in real-world - data science. -dateadded: '2021-10-30' -duration: PT01H05M17S -quotableClips: -- name: Episode Introduction & Guest Thom Ives - startOffset: 75 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=75 - endOffset: 110 -- name: Concept-focused learning vs. detail specialization - startOffset: 110 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=110 - endOffset: 201 -- name: 'Career journey: naval nuclear program, grad school, early AI' - startOffset: 201 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=201 - endOffset: 311 -- name: 'Industry roles: HP, ON Semiconductor, SaaS AI work' - startOffset: 311 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=311 - endOffset: 532 -- name: 'Mentoring & community building: integrated mentoring origins' - startOffset: 532 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=532 - endOffset: 552 -- name: Why business acumen matters for data professionals - startOffset: 552 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=552 - endOffset: 651 -- name: 'Role clarity: data scientist versus domain expert' - startOffset: 651 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=651 - endOffset: 819 -- name: Rapid delivery & customer-centric feedback (MVP / tracer bullet) - startOffset: 819 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=819 - endOffset: 1172 -- name: ETL reliability, data collection gaps, and advocating for clean data - startOffset: 1172 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1172 - endOffset: 1299 -- name: 'Shared responsibility: data governance and data literacy' - startOffset: 1299 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1299 - endOffset: 1432 -- name: 'Data-driven vs. data-informed: definitions and practical balance' - startOffset: 1432 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1432 - endOffset: 1689 -- name: Analytical skills & data storytelling before modeling - startOffset: 1689 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1689 - endOffset: 1881 -- name: 'Machine learning development pipeline: feature conditioning to modeling' - startOffset: 1881 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1881 - endOffset: 2094 -- name: Feature scaling, selection, and engineered features for business insight - startOffset: 2094 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2094 - endOffset: 2446 -- name: Addressing collinearity with PCA and pursuing parsimony - startOffset: 2446 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2446 - endOffset: 2753 -- name: 'Model selection: accuracy, variance, and generalizability' - startOffset: 2753 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2753 - endOffset: 2850 -- name: 'Monitoring models in production: data drift, concept drift, and maintenance' - startOffset: 2850 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2850 - endOffset: 2968 -- name: 'Essential business skills: explainability, persuasion, and influence' - startOffset: 2968 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2968 - endOffset: 3042 -- name: 'Relationship-building: informal check‑ins, lunch & beer networking' - startOffset: 3042 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3042 - endOffset: 3349 -- name: 'Remote rapport: virtual lunches, video calls, and building camaraderie' - startOffset: 3349 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3349 - endOffset: 3516 -- name: 'Integrated ML & AI community: structure, ethos, and free resources' - startOffset: 3516 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3516 - endOffset: 3816 -- name: Joining the Slack community and accessing resources - startOffset: 3816 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3816 - endOffset: 3955 -- name: Episode wrap-up and final takeaways - startOffset: 3955 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3955 - endOffset: 3917 --- diff --git a/_podcast/s09e04-freelancing-and-consulting-with-data-engineering.md b/_podcast/freelance-data-engineering-pricing-and-clients.md similarity index 97% rename from _podcast/s09e04-freelancing-and-consulting-with-data-engineering.md rename to _podcast/freelance-data-engineering-pricing-and-clients.md index 5d12b232..82b8b2f4 100644 --- a/_podcast/s09e04-freelancing-and-consulting-with-data-engineering.md +++ b/_podcast/freelance-data-engineering-pricing-and-clients.md @@ -1,20 +1,150 @@ --- +title: 'Freelance Data Engineering Playbook: Pricing, Client Acquisition & Tools' +short: Freelancing and Consulting with Data Engineering +season: 9 episode: 4 guests: - adrianbrudaru -date: 2025-11-07 +image: images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg ids: anchor: Freelancing-and-Consulting-with-Data-Engineering---Adrian-Brudaru-e1jtkkg youtube: 9DTTrN-khCk -image: images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Freelancing-and-Consulting-with-Data-Engineering---Adrian-Brudaru-e1jtkkg apple: https://podcasts.apple.com/us/podcast/freelancing-and-consulting-with-data-engineering/id1541710331?i=1000566841525 spotify: https://open.spotify.com/episode/5M9HFWt8xmqf5HyGu40RmJ?si=C95h0CMkRiazs_ft8Z_gRg youtube: https://www.youtube.com/watch?v=9DTTrN-khCk -season: 9 -short: Freelancing and Consulting with Data Engineering -title: 'Freelance Data Engineering Playbook: Pricing, Client Acquisition & Tools' + +description: 'Master freelance data engineering: pricing, client acquisition & tools to negotiate rates, scope projects, build reusable portfolios and win repeat clients.' +intro: 'How do you price freelance data engineering work, win steady clients, and pick the right tools for messy production problems? In this episode, Adrian Brudaru — an economist-turned-business analyst who moved to Berlin, left corporate/startup cycles to freelance for five years, and now co-founds a data company releasing open source tooling — walks through a practical playbook for freelance data engineers.

We cover pricing models (hourly rates, negotiation, occupancy and income variability), client acquisition (networking, repeat business, recruiters vs. direct contracts, Upwork pros and cons), and scoping techniques (spikes, scope documents, managing expectations). Adrian also digs into technical topics: legacy cleanup, Airflow work, and a data loading tool for volatile schemas and automatic unpacking. Along the way, he explains building a reusable portfolio, transitioning from freelancing to product or investing, working remotely vs. on-site, and how to create opportunities in local markets like Berlin.

Listen to learn concrete approaches to freelance data engineering pricing, client acquisition strategies, scoping projects, and practical tools to handle unstable schemas — so you can evaluate projects, set rates, and grow a sustainable freelance practice.' +topics: +- data engineering +- freelance +- career growth +- tools +dateadded: 2022-06-18 +date: 2025-11-07 + +duration: PT01H01M16S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=0 + endOffset: 100 +- name: 'Guest Overview: Adrian’s Move to Freelancing' + startOffset: 100 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=100 + endOffset: 132 +- name: 'Career Journey: Economics & Marketing to Data Engineering' + startOffset: 132 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=132 + endOffset: 214 +- name: 'Becoming a Freelancer: Leaving Corporate Life & First Contract' + startOffset: 214 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=214 + endOffset: 333 +- name: 'Hiring Process Differences: Freelance vs Corporate Engagements' + startOffset: 333 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=333 + endOffset: 426 +- name: Income Variability & Occupancy Rate Explained + startOffset: 426 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=426 + endOffset: 567 +- name: 'Early Challenges: Impostor Syndrome and Team Fit' + startOffset: 567 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=567 + endOffset: 696 +- name: 'First Projects: Legacy Cleanup, Airflow, Data Science & ICO Work' + startOffset: 696 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=696 + endOffset: 908 +- name: 'Shift to Product: Founding a Company and Building a Prototype' + startOffset: 908 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=908 + endOffset: 1092 +- name: 'Pricing Models: Hourly Rates, Negotiation, and Market Ranges' + startOffset: 1092 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1092 + endOffset: 1277 +- name: 'Skill Growth: Learning on the Job and Generalist Advantage' + startOffset: 1277 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1277 + endOffset: 1399 +- name: 'Intermediaries: Recruitment Agencies vs Direct Client Work' + startOffset: 1399 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1399 + endOffset: 1665 +- name: 'Client Acquisition: Networking Strategies and Repeat Business' + startOffset: 1665 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1665 + endOffset: 1903 +- name: 'Scoping Work: Spikes, Scope Documents and Managing Expectations' + startOffset: 1903 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1903 + endOffset: 2101 +- name: 'Networking Tactics: In-Person Meetings and Relationship Building' + startOffset: 2101 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2101 + endOffset: 2385 +- name: 'Freelance Platforms: Upwork Pros, Cons, and Time Valuation' + startOffset: 2385 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2385 + endOffset: 2457 +- name: 'Work Location: Choosing Remote or On‑Site Projects' + startOffset: 2457 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2457 + endOffset: 2492 +- name: 'Data Loading Tool: Handling Volatile Schemas & Automatic Unpacking' + startOffset: 2492 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2492 + endOffset: 2668 +- name: 'Transition Paths: From Freelancing to Product or Investing' + startOffset: 2668 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2668 + endOffset: 2777 +- name: 'Reusable Portfolio: Building Demonstrable, Reusable Assets' + startOffset: 2777 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2777 + endOffset: 2937 +- name: 'Personality Fit: Freelancing as an Introvert' + startOffset: 2937 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2937 + endOffset: 2973 +- name: 'Work Flexibility: Feasibility of Working Three Months a Year' + startOffset: 2973 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2973 + endOffset: 3087 +- name: 'Project Selection: Following Interest vs Strategic Skill Building' + startOffset: 3087 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3087 + endOffset: 3142 +- name: 'Assessing Fit: Traits That Predict Freelance Success' + startOffset: 3142 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3142 + endOffset: 3283 +- name: 'Local Market: Berlin Opportunities and How to Create Them' + startOffset: 3283 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3283 + endOffset: 3330 +- name: 'Client Expectations: Proactivity, Ownership, and Outcomes' + startOffset: 3330 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3330 + endOffset: 3443 +- name: 'Multiple Clients: Balancing Focus, Risk, and Side Gigs' + startOffset: 3443 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3443 + endOffset: 3549 +- name: 'Freelance Cooperative: Slack Group for Collaboration & Referrals' + startOffset: 3549 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3549 + endOffset: 3700 +- name: 'Contact & Resources: LinkedIn, GitHub, Slack Invite and Next Steps' + startOffset: 3700 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3700 + endOffset: 3676 + transcript: - header: 'Guest Overview: Adrian’s Move to Freelancing' - line: This week we'll talk about freelancing in data engineering. We have a special @@ -1274,143 +1404,6 @@ transcript: sec: 3776 time: '1:02:56' who: Adrian -intro: 'How do you price freelance data engineering work, win steady clients, and - pick the right tools for messy production problems? In this episode, Adrian Brudaru - — an economist-turned-business analyst who moved to Berlin, left corporate/startup - cycles to freelance for five years, and now co-founds a data company releasing open - source tooling — walks through a practical playbook for freelance data engineers. -

We cover pricing models (hourly rates, negotiation, occupancy and income - variability), client acquisition (networking, repeat business, recruiters vs. direct - contracts, Upwork pros and cons), and scoping techniques (spikes, scope documents, - managing expectations). Adrian also digs into technical topics: legacy cleanup, - Airflow work, and a data loading tool for volatile schemas and automatic unpacking. - Along the way, he explains building a reusable portfolio, transitioning from freelancing - to product or investing, working remotely vs. on-site, and how to create opportunities - in local markets like Berlin.

Listen to learn concrete approaches to freelance - data engineering pricing, client acquisition strategies, scoping projects, and practical - tools to handle unstable schemas — so you can evaluate projects, set rates, and - grow a sustainable freelance practice.' -description: 'Master freelance data engineering: pricing, client acquisition & tools - to negotiate rates, scope projects, build reusable portfolios and win repeat clients.' -dateadded: '2022-06-18' -duration: PT01H01M16S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=0 - endOffset: 100 -- name: 'Guest Overview: Adrian’s Move to Freelancing' - startOffset: 100 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=100 - endOffset: 132 -- name: 'Career Journey: Economics & Marketing to Data Engineering' - startOffset: 132 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=132 - endOffset: 214 -- name: 'Becoming a Freelancer: Leaving Corporate Life & First Contract' - startOffset: 214 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=214 - endOffset: 333 -- name: 'Hiring Process Differences: Freelance vs Corporate Engagements' - startOffset: 333 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=333 - endOffset: 426 -- name: Income Variability & Occupancy Rate Explained - startOffset: 426 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=426 - endOffset: 567 -- name: 'Early Challenges: Impostor Syndrome and Team Fit' - startOffset: 567 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=567 - endOffset: 696 -- name: 'First Projects: Legacy Cleanup, Airflow, Data Science & ICO Work' - startOffset: 696 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=696 - endOffset: 908 -- name: 'Shift to Product: Founding a Company and Building a Prototype' - startOffset: 908 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=908 - endOffset: 1092 -- name: 'Pricing Models: Hourly Rates, Negotiation, and Market Ranges' - startOffset: 1092 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1092 - endOffset: 1277 -- name: 'Skill Growth: Learning on the Job and Generalist Advantage' - startOffset: 1277 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1277 - endOffset: 1399 -- name: 'Intermediaries: Recruitment Agencies vs Direct Client Work' - startOffset: 1399 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1399 - endOffset: 1665 -- name: 'Client Acquisition: Networking Strategies and Repeat Business' - startOffset: 1665 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1665 - endOffset: 1903 -- name: 'Scoping Work: Spikes, Scope Documents and Managing Expectations' - startOffset: 1903 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1903 - endOffset: 2101 -- name: 'Networking Tactics: In-Person Meetings and Relationship Building' - startOffset: 2101 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2101 - endOffset: 2385 -- name: 'Freelance Platforms: Upwork Pros, Cons, and Time Valuation' - startOffset: 2385 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2385 - endOffset: 2457 -- name: 'Work Location: Choosing Remote or On‑Site Projects' - startOffset: 2457 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2457 - endOffset: 2492 -- name: 'Data Loading Tool: Handling Volatile Schemas & Automatic Unpacking' - startOffset: 2492 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2492 - endOffset: 2668 -- name: 'Transition Paths: From Freelancing to Product or Investing' - startOffset: 2668 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2668 - endOffset: 2777 -- name: 'Reusable Portfolio: Building Demonstrable, Reusable Assets' - startOffset: 2777 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2777 - endOffset: 2937 -- name: 'Personality Fit: Freelancing as an Introvert' - startOffset: 2937 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2937 - endOffset: 2973 -- name: 'Work Flexibility: Feasibility of Working Three Months a Year' - startOffset: 2973 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2973 - endOffset: 3087 -- name: 'Project Selection: Following Interest vs Strategic Skill Building' - startOffset: 3087 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3087 - endOffset: 3142 -- name: 'Assessing Fit: Traits That Predict Freelance Success' - startOffset: 3142 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3142 - endOffset: 3283 -- name: 'Local Market: Berlin Opportunities and How to Create Them' - startOffset: 3283 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3283 - endOffset: 3330 -- name: 'Client Expectations: Proactivity, Ownership, and Outcomes' - startOffset: 3330 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3330 - endOffset: 3443 -- name: 'Multiple Clients: Balancing Focus, Risk, and Side Gigs' - startOffset: 3443 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3443 - endOffset: 3549 -- name: 'Freelance Cooperative: Slack Group for Collaboration & Referrals' - startOffset: 3549 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3549 - endOffset: 3700 -- name: 'Contact & Resources: LinkedIn, GitHub, Slack Invite and Next Steps' - startOffset: 3700 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3700 - endOffset: 3676 --- Links: diff --git a/_podcast/s04e08-freelancing.md b/_podcast/freelancing-in-machine-learning.md similarity index 97% rename from _podcast/s04e08-freelancing.md rename to _podcast/freelancing-in-machine-learning.md index 361aba68..719355b1 100644 --- a/_podcast/s04e08-freelancing.md +++ b/_podcast/freelancing-in-machine-learning.md @@ -1,20 +1,11 @@ --- -title: Freelancing in Machine Learning +title: 'Freelancing in Machine Learning: Pricing, Client Acquisition & Proposals' short: Freelancing in Machine Learning +season: 4 +episode: 8 guests: - mikiobraun image: images/podcast/s04e08-freelancing.jpg -topics: -- freelance -- consulting -- machine learning -- career growth -- entrepreneurship -- remote work -- business strategy -date: 2025-11-07 -season: 4 -episode: 8 ids: youtube: HfF791e0HR8 anchor: Freelancing-in-Machine-Learning---Mikio-Braun-e166n7r @@ -23,6 +14,140 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Freelancing-in-Machine-Learning---Mikio-Braun-e166n7r spotify: https://open.spotify.com/episode/2oE13mUEa9k4AO5qogYdqv apple: https://podcasts.apple.com/us/podcast/freelancing-in-machine-learning-mikio-braun/id1541710331?i=1000532612872 + +description: 'Learn freelancing in machine learning: pricing, client acquisition, and proposals to win ML consulting gigs, scale sustainably, and secure steady income' +intro: 'How do you move from academic research or in‑house ML engineering to a sustainable freelance career in machine learning — getting clients, pricing your work, and delivering production systems? In this episode, Mikio Braun, who transitioned from TU Berlin into ML roles at Zalando and GetYourGuide and now consults on machine learning production, infrastructure, and teams, walks through that path step by step.

We cover the practical parts of freelancing in machine learning: launching first clients, sourcing leads through network and referrals, and demand generation with LinkedIn, talks, and podcasts; pre‑sales tactics like free intro calls, problem discovery, and clear proposals; pricing models and rate‑setting strategies; financial planning, capacity management, and avoiding burnout; plus specialization, productizing consulting, and scaling options (agency, product, or return to employment). The episode also addresses administrative essentials for freelancers in Germany (registration, VAT, payments), accounting choices, professional liability, and how to compete in a global remote market. Listen for concrete advice on client‑finding, scope discipline, and deliverables so you can evaluate whether freelancing in machine learning is the right next step and how to start with a safety net.' +topics: +- freelance +- consulting +- machine learning +- career growth +- entrepreneurship +- strategy +dateadded: 2021-08-20 +date: 2025-11-07 + +duration: PT01H01M48S + +quotableClips: +- name: Episode Introduction & Topic Overview (Freelancing in Machine Learning) + startOffset: 0 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=0 + endOffset: 119 +- name: Guest Background — Academic Research to Industry Roles (TU Berlin → Zalando + → GetYourGuide) + startOffset: 119 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=119 + endOffset: 373 +- name: Consulting Scope — Advising on ML Production, Infrastructure, and Teams + startOffset: 373 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=373 + endOffset: 473 +- name: Freelance Launch — First Clients and Early Momentum + startOffset: 473 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=473 + endOffset: 527 +- name: Client Lead Sources — Network, Referrals, and Direct Outreach + startOffset: 527 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=527 + endOffset: 608 +- name: Personal Branding & Demand Generation (LinkedIn, Talks, Podcasts) + startOffset: 608 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=608 + endOffset: 928 +- name: Networking Tactics — Coffee Chats, Lunchclub, and Meetups + startOffset: 928 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=928 + endOffset: 1149 +- name: Intro Calls & Pre‑sales — Free Meetings, Qualification, and Trust Building + startOffset: 1149 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1149 + endOffset: 1297 +- name: Problem Discovery — Diagnosing Needs vs. Prescribed Solutions + startOffset: 1297 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1297 + endOffset: 1338 +- name: Proposal Essentials — Written Summaries, Scope Alignment, and Signoff + startOffset: 1338 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1338 + endOffset: 1432 +- name: Pricing Models — Hourly, Fixed‑Price, and Value‑Based Tradeoffs + startOffset: 1432 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1432 + endOffset: 1777 +- name: Rate Setting Strategies — Negotiation, Risk, and Client Concerns + startOffset: 1777 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1777 + endOffset: 1912 +- name: Financial Planning — Vacation, Risk Buffer, and Expected Income + startOffset: 1912 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1912 + endOffset: 2038 +- name: Workload Management — Capacity Planning, Calendars, and Burnout Prevention + startOffset: 2038 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2038 + endOffset: 2171 +- name: Specialization Strategy — Niches, Productizing Consulting, and Predictability + startOffset: 2171 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2171 + endOffset: 2326 +- name: Client Workflow — Managing Multiple Clients and Daily Rhythms + startOffset: 2326 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2326 + endOffset: 2424 +- name: Income Comparison — Freelance Earnings vs. Full‑Time Salary + startOffset: 2424 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2424 + endOffset: 2479 +- name: Freelance Tradeoffs — Freedom, Overhead, and Side Projects + startOffset: 2479 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2479 + endOffset: 2660 +- name: Capacity Decisions — Accepting, Delaying, or Declining New Projects + startOffset: 2660 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2660 + endOffset: 2715 +- name: Scaling Paths — Building an Agency, Launching a Product, or Rejoining Employment + startOffset: 2715 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2715 + endOffset: 2907 +- name: Deliverables & Outcomes — Mentoring, Workshops, Prototypes, and Team Results + startOffset: 2907 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2907 + endOffset: 3017 +- name: Skill Growth as a Freelancer — Stretch Assignments and Learning Safely + startOffset: 3017 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3017 + endOffset: 3165 +- name: Client‑Finding Lessons — Clarity in Writing and Scope Discipline + startOffset: 3165 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3165 + endOffset: 3210 +- name: Administrative Setup in Germany — Freelance Registration, VAT, and Payments + startOffset: 3210 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3210 + endOffset: 3261 +- name: Accounting Choices — DIY Taxes vs. Hiring a Tax Advisor + startOffset: 3261 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3261 + endOffset: 3422 +- name: Professional Liability — Insurance, GDPR, and Contractual Safeguards + startOffset: 3422 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3422 + endOffset: 3539 +- name: Global Market Dynamics — Remote Work, Competition, and Differentiation + startOffset: 3539 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3539 + endOffset: 3662 +- name: Starter Advice — Trying Freelancing with a Safety Net + startOffset: 3662 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3662 + endOffset: 3792 +- name: Closing Remarks & Contact Information (LinkedIn, Twitter @mikiobraun) + startOffset: 3792 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3792 + endOffset: 3708 + transcript: - line: This week, we'll talk about freelancing and consulting. And we have a special guest today — Mikio. Actually, Mikio was my teacher six years ago. I was studying @@ -1270,144 +1395,6 @@ transcript: sec: 3827 time: '1:03:47' who: Alexey -intro: 'How do you move from academic research or in‑house ML engineering to a sustainable - freelance career in machine learning — getting clients, pricing your work, and delivering - production systems? In this episode, Mikio Braun, who transitioned from TU Berlin - into ML roles at Zalando and GetYourGuide and now consults on machine learning production, - infrastructure, and teams, walks through that path step by step.

We cover - the practical parts of freelancing in machine learning: launching first clients, - sourcing leads through network and referrals, and demand generation with LinkedIn, - talks, and podcasts; pre‑sales tactics like free intro calls, problem discovery, - and clear proposals; pricing models and rate‑setting strategies; financial planning, - capacity management, and avoiding burnout; plus specialization, productizing consulting, - and scaling options (agency, product, or return to employment). The episode also - addresses administrative essentials for freelancers in Germany (registration, VAT, - payments), accounting choices, professional liability, and how to compete in a global - remote market. Listen for concrete advice on client‑finding, scope discipline, and - deliverables so you can evaluate whether freelancing in machine learning is the - right next step and how to start with a safety net.' -description: 'Learn freelancing in machine learning: pricing, client acquisition, - and proposals to win ML consulting gigs, scale sustainably, and secure steady income' -dateadded: '2021-08-20' -duration: PT01H01M48S -quotableClips: -- name: Episode Introduction & Topic Overview (Freelancing in Machine Learning) - startOffset: 0 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=0 - endOffset: 119 -- name: Guest Background — Academic Research to Industry Roles (TU Berlin → Zalando - → GetYourGuide) - startOffset: 119 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=119 - endOffset: 373 -- name: Consulting Scope — Advising on ML Production, Infrastructure, and Teams - startOffset: 373 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=373 - endOffset: 473 -- name: Freelance Launch — First Clients and Early Momentum - startOffset: 473 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=473 - endOffset: 527 -- name: Client Lead Sources — Network, Referrals, and Direct Outreach - startOffset: 527 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=527 - endOffset: 608 -- name: Personal Branding & Demand Generation (LinkedIn, Talks, Podcasts) - startOffset: 608 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=608 - endOffset: 928 -- name: Networking Tactics — Coffee Chats, Lunchclub, and Meetups - startOffset: 928 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=928 - endOffset: 1149 -- name: Intro Calls & Pre‑sales — Free Meetings, Qualification, and Trust Building - startOffset: 1149 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1149 - endOffset: 1297 -- name: Problem Discovery — Diagnosing Needs vs. Prescribed Solutions - startOffset: 1297 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1297 - endOffset: 1338 -- name: Proposal Essentials — Written Summaries, Scope Alignment, and Signoff - startOffset: 1338 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1338 - endOffset: 1432 -- name: Pricing Models — Hourly, Fixed‑Price, and Value‑Based Tradeoffs - startOffset: 1432 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1432 - endOffset: 1777 -- name: Rate Setting Strategies — Negotiation, Risk, and Client Concerns - startOffset: 1777 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1777 - endOffset: 1912 -- name: Financial Planning — Vacation, Risk Buffer, and Expected Income - startOffset: 1912 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1912 - endOffset: 2038 -- name: Workload Management — Capacity Planning, Calendars, and Burnout Prevention - startOffset: 2038 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2038 - endOffset: 2171 -- name: Specialization Strategy — Niches, Productizing Consulting, and Predictability - startOffset: 2171 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2171 - endOffset: 2326 -- name: Client Workflow — Managing Multiple Clients and Daily Rhythms - startOffset: 2326 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2326 - endOffset: 2424 -- name: Income Comparison — Freelance Earnings vs. Full‑Time Salary - startOffset: 2424 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2424 - endOffset: 2479 -- name: Freelance Tradeoffs — Freedom, Overhead, and Side Projects - startOffset: 2479 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2479 - endOffset: 2660 -- name: Capacity Decisions — Accepting, Delaying, or Declining New Projects - startOffset: 2660 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2660 - endOffset: 2715 -- name: Scaling Paths — Building an Agency, Launching a Product, or Rejoining Employment - startOffset: 2715 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2715 - endOffset: 2907 -- name: Deliverables & Outcomes — Mentoring, Workshops, Prototypes, and Team Results - startOffset: 2907 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2907 - endOffset: 3017 -- name: Skill Growth as a Freelancer — Stretch Assignments and Learning Safely - startOffset: 3017 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3017 - endOffset: 3165 -- name: Client‑Finding Lessons — Clarity in Writing and Scope Discipline - startOffset: 3165 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3165 - endOffset: 3210 -- name: Administrative Setup in Germany — Freelance Registration, VAT, and Payments - startOffset: 3210 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3210 - endOffset: 3261 -- name: Accounting Choices — DIY Taxes vs. Hiring a Tax Advisor - startOffset: 3261 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3261 - endOffset: 3422 -- name: Professional Liability — Insurance, GDPR, and Contractual Safeguards - startOffset: 3422 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3422 - endOffset: 3539 -- name: Global Market Dynamics — Remote Work, Competition, and Differentiation - startOffset: 3539 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3539 - endOffset: 3662 -- name: Starter Advice — Trying Freelancing with a Safety Net - startOffset: 3662 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3662 - endOffset: 3792 -- name: Closing Remarks & Contact Information (LinkedIn, Twitter @mikiobraun) - startOffset: 3792 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3792 - endOffset: 3708 --- Books: diff --git a/_podcast/s12e09-staff-ai-engineer.md b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md similarity index 97% rename from _podcast/s12e09-staff-ai-engineer.md rename to _podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md index 2fc82655..9e887262 100644 --- a/_podcast/s12e09-staff-ai-engineer.md +++ b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md @@ -1,19 +1,139 @@ --- +title: 'Transitioning from Academia to Industry as a Staff AI Engineer: Interview Prep, MLOps & Onboarding' +short: Transitioning from Academia to Industry as a Staff AI Engineer +season: 12 episode: 9 guests: - tatianagabruseva +image: images/podcast/s12e09-staff-ai-engineer.jpg ids: anchor: Staff-AI-Engineer---Tatiana-Gabruseva-e1v3on7 youtube: _xr1_xb736E -image: images/podcast/s12e09-staff-ai-engineer.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Staff-AI-Engineer---Tatiana-Gabruseva-e1v3on7 apple: https://podcasts.apple.com/us/podcast/staff-ai-engineer-tatiana-gabruseva/id1541710331?i=1000600246792 spotify: https://open.spotify.com/episode/4o52jMRR2cctCD8LuFFLdD?si=tBO_9KkiSWySHu7jaM-McQ youtube: https://www.youtube.com/watch?v=_xr1_xb736E -season: 12 -short: Staff AI Engineer -title: 'Staff AI Engineer: From Academia to Industry — Interview Prep, MLOps & Onboarding' + +description: Discover Staff AI Engineer interview prep, MLOps & onboarding tactics to transition from academia—coding strategies, system design, mentorship for impact +intro: 'How do you transition from academia into a Staff AI Engineer role while nailing interview prep, MLOps, and onboarding? In this episode, Tatiana Gabruseva — a computer vision/deep learning engineer, Kaggle Competitions Master, and Senior ML Engineer at Cork University Hospital — walks through her shift from physics and healthcare research into industry engineering leadership.

We cover practical, concrete topics listeners can use: the onboarding shock she experienced at LinkedIn and how to prioritize learning; ramping up production stacks (Scala, Spark, Kubernetes); the Staff AI Engineer remit of opinion-setting, cross-functional influence, and strategy; and staff engineer archetypes (deep specialist, cross-team advisor, hands-on mentor). Tatiana explains how to translate academic leadership, grants, and research into industry roadmaps, how mentorship accelerates onboarding, and how to convince employers with applied projects and collaborations.

For candidates she shares interview prep tactics — LeetCode coding plans, ML and system design prep, mock interviews, networking and referrals, and reframing rejections — plus real-world involvement in MLOps, ETL pipelines, and heavy code review. Tune in to learn actionable steps for moving from academia to a staff engineering role and succeeding in interviews, onboarding, and production ML.' +topics: +- machine learning +- career transition +- MLOps +- staff AI engineer +- career growth +dateadded: 2023-02-18 + +duration: PT00H59M23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=0 + endOffset: 71 +- name: Episode kickoff and guest reintroduction + startOffset: 71 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=71 + endOffset: 113 +- name: 'Guest background: physics → healthcare → machine learning' + startOffset: 113 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=113 + endOffset: 204 +- name: Onboarding shock at LinkedIn and industry mindset shift + startOffset: 204 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=204 + endOffset: 343 +- name: Ramping up technical stack as a tech lead (Scala, Spark, Kubernetes) + startOffset: 343 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=343 + endOffset: 450 +- name: 'Staff AI Engineer role: opinion, strategy, and cross-functional influence' + startOffset: 450 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=450 + endOffset: 664 +- name: 'Staff engineer archetypes: deep specialist, cross-team advisor, hands-on + mentor' + startOffset: 664 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=664 + endOffset: 881 +- name: Transferring academic skills to industry leadership and roadmapping + startOffset: 881 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=881 + endOffset: 1007 +- name: 'Onboarding priorities: common mistakes and faster learning' + startOffset: 1007 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1007 + endOffset: 1065 +- name: Mentorship importance for onboarding and career growth + startOffset: 1065 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1065 + endOffset: 1148 +- name: 'Skipping mid-level roles: landing a staff position from academia' + startOffset: 1148 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1148 + endOffset: 1286 +- name: Translating research leadership and grants experience to industry impact + startOffset: 1286 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1286 + endOffset: 1530 +- name: 'Convincing employers: framing applied projects and industry collaborations' + startOffset: 1530 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1530 + endOffset: 1705 +- name: 'Interview journey: early failures, coding gaps, and commitment to prep' + startOffset: 1705 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1705 + endOffset: 1781 +- name: Referrals and networking influence on hiring outcomes + startOffset: 1781 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1781 + endOffset: 1928 +- name: Reframing rejections as learning opportunities + startOffset: 1928 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1928 + endOffset: 2080 +- name: 'Coding interview strategy: LeetCode plan, timeline, and persistence' + startOffset: 2080 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2080 + endOffset: 2384 +- name: 'ML design interviews: physics-style decomposition, blogs, and mock practice' + startOffset: 2384 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2384 + endOffset: 2616 +- name: 'System design prep: Grokking, mock interviews, and quick study tactics' + startOffset: 2616 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2616 + endOffset: 2923 +- name: Mock interviews and building a mentor network + startOffset: 2923 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2923 + endOffset: 3070 +- name: Staff involvement in MLOps, ETL, pipelines, and data team collaboration + startOffset: 3070 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3070 + endOffset: 3139 +- name: Managing heavy code review load and context switching across projects + startOffset: 3139 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3139 + endOffset: 3253 +- name: Advice for academics aiming for staff roles in industry + startOffset: 3253 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3253 + endOffset: 3460 +- name: 'Excitement of AI work: generative models, R&D freedom, and measurable impact' + startOffset: 3460 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3460 + endOffset: 3585 +- name: 'Recommended books: communication, staff engineering, and leadership' + startOffset: 3585 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3585 + endOffset: 3634 +- name: Episode closing and final thanks + startOffset: 3634 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3634 + endOffset: 3563 + transcript: - header: Podcast Introduction - header: Episode kickoff and guest reintroduction @@ -1083,133 +1203,6 @@ transcript: sec: 3634 time: '1:00:34' who: Alexey -description: Discover Staff AI Engineer interview prep, MLOps & onboarding tactics - to transition from academia—coding strategies, system design, mentorship for impact. -intro: 'How do you transition from academia into a Staff AI Engineer role while nailing - interview prep, MLOps, and onboarding? In this episode, Tatiana Gabruseva — a computer - vision/deep learning engineer, Kaggle Competitions Master, and Senior ML Engineer - at Cork University Hospital — walks through her shift from physics and healthcare - research into industry engineering leadership.

We cover practical, concrete - topics listeners can use: the onboarding shock she experienced at LinkedIn and how - to prioritize learning; ramping up production stacks (Scala, Spark, Kubernetes); - the Staff AI Engineer remit of opinion-setting, cross-functional influence, and - strategy; and staff engineer archetypes (deep specialist, cross-team advisor, hands-on - mentor). Tatiana explains how to translate academic leadership, grants, and research - into industry roadmaps, how mentorship accelerates onboarding, and how to convince - employers with applied projects and collaborations.

For candidates she - shares interview prep tactics — LeetCode coding plans, ML and system design prep, - mock interviews, networking and referrals, and reframing rejections — plus real-world - involvement in MLOps, ETL pipelines, and heavy code review. Tune in to learn actionable - steps for moving from academia to a staff engineering role and succeeding in interviews, - onboarding, and production ML.' -dateadded: '2023-02-18' -duration: PT00H59M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=0 - endOffset: 71 -- name: Episode kickoff and guest reintroduction - startOffset: 71 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=71 - endOffset: 113 -- name: 'Guest background: physics → healthcare → machine learning' - startOffset: 113 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=113 - endOffset: 204 -- name: Onboarding shock at LinkedIn and industry mindset shift - startOffset: 204 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=204 - endOffset: 343 -- name: Ramping up technical stack as a tech lead (Scala, Spark, Kubernetes) - startOffset: 343 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=343 - endOffset: 450 -- name: 'Staff AI Engineer role: opinion, strategy, and cross-functional influence' - startOffset: 450 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=450 - endOffset: 664 -- name: 'Staff engineer archetypes: deep specialist, cross-team advisor, hands-on - mentor' - startOffset: 664 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=664 - endOffset: 881 -- name: Transferring academic skills to industry leadership and roadmapping - startOffset: 881 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=881 - endOffset: 1007 -- name: 'Onboarding priorities: common mistakes and faster learning' - startOffset: 1007 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1007 - endOffset: 1065 -- name: Mentorship importance for onboarding and career growth - startOffset: 1065 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1065 - endOffset: 1148 -- name: 'Skipping mid-level roles: landing a staff position from academia' - startOffset: 1148 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1148 - endOffset: 1286 -- name: Translating research leadership and grants experience to industry impact - startOffset: 1286 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1286 - endOffset: 1530 -- name: 'Convincing employers: framing applied projects and industry collaborations' - startOffset: 1530 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1530 - endOffset: 1705 -- name: 'Interview journey: early failures, coding gaps, and commitment to prep' - startOffset: 1705 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1705 - endOffset: 1781 -- name: Referrals and networking influence on hiring outcomes - startOffset: 1781 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1781 - endOffset: 1928 -- name: Reframing rejections as learning opportunities - startOffset: 1928 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1928 - endOffset: 2080 -- name: 'Coding interview strategy: LeetCode plan, timeline, and persistence' - startOffset: 2080 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2080 - endOffset: 2384 -- name: 'ML design interviews: physics-style decomposition, blogs, and mock practice' - startOffset: 2384 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2384 - endOffset: 2616 -- name: 'System design prep: Grokking, mock interviews, and quick study tactics' - startOffset: 2616 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2616 - endOffset: 2923 -- name: Mock interviews and building a mentor network - startOffset: 2923 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2923 - endOffset: 3070 -- name: Staff involvement in MLOps, ETL, pipelines, and data team collaboration - startOffset: 3070 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3070 - endOffset: 3139 -- name: Managing heavy code review load and context switching across projects - startOffset: 3139 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3139 - endOffset: 3253 -- name: Advice for academics aiming for staff roles in industry - startOffset: 3253 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3253 - endOffset: 3460 -- name: 'Excitement of AI work: generative models, R&D freedom, and measurable impact' - startOffset: 3460 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3460 - endOffset: 3585 -- name: 'Recommended books: communication, staff engineering, and leadership' - startOffset: 3585 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3585 - endOffset: 3634 -- name: Episode closing and final thanks - startOffset: 3634 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3634 - endOffset: 3563 --- Links: diff --git a/_podcast/s15e08-from-data-manager-to-data-architect.md b/_podcast/from-iot-data-engineering-to-leading-data-architect.md similarity index 96% rename from _podcast/s15e08-from-data-manager-to-data-architect.md rename to _podcast/from-iot-data-engineering-to-leading-data-architect.md index 72c1c6d3..8c3949c8 100644 --- a/_podcast/s15e08-from-data-manager-to-data-architect.md +++ b/_podcast/from-iot-data-engineering-to-leading-data-architect.md @@ -1,19 +1,123 @@ --- +title: 'From Hands-On IoT Data Engineering to Leading Data Architecture: Pipelines, Cloud Adaptation & Analytics Modeling' +short: From Data Manager to Data Architect +season: 15 episode: 8 guests: - loicmagnien +image: images/podcast/s15e08-from-data-manager-to-data-architect.jpg ids: anchor: atatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 youtube: qWG--iYO2uc -image: images/podcast/s15e08-from-data-manager-to-data-architect.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 apple: https://podcasts.apple.com/us/podcast/from-data-manager-to-data-architect-lo%C3%AFc-magnien/id1541710331?i=1000629678056 spotify: https://open.spotify.com/episode/7twXPni1q2RJQU2jjbCGty?si=KNCEy-0ZRrWDVchFsDCHjQ youtube: https://www.youtube.com/watch?v=qWG--iYO2uc -season: 15 -short: From Data Manager to Data Architect -title: Build & Scale End-to-End IoT Data Pipelines, Lakehouse & Core Data Modeling +description: "A real-world journey from hands-on IoT data engineering to leading data architecture: covering pipelines, cloud adaptation, analytics modeling, lakehouse design, and the senior responsibilities of a data lead." +intro: "What does it take to evolve from hands-on IoT data engineering to leading data architecture — building scalable pipelines, adapting to cloud platforms, and designing analytics models that serve entire organizations? In this episode, Loïc Magnien, Lead Data at Mylight150 with a decade spanning database management, data engineering, product ownership and architecture, shares his real-world journey from managing sensor data to architecting enterprise-scale data systems.

We explore the progression from tactical IoT work — sensor data aggregation, structural health monitoring, ETL automation for logger ingestion — to strategic architecture responsibilities including cloud fundamentals (Python, Azure), lakehouse design with bronze-silver-gold layering, and core data modeling that aligns cross-functional teams. Loïc breaks down practical patterns for building reusable ingestion and transformation templates, designing dimensions and facts that serve multiple consumers, maintaining data quality expectations across layers, and balancing hands-on engineering with stakeholder engagement. The conversation covers hiring considerations for data teams, scaling responsibilities from individual contributor to lead, leveraging tools like DBT and LLMs for technology scouting, and making pragmatic tradeoffs between reusable components and project-specific solutions.

Listen to discover actionable guidance on architecture outcomes, agile delivery through proofs of concept, and building core models that drive business alignment — plus insights on the senior leadership skills needed to succeed as a data architect in IoT and analytics environments." +topics: +- data engineering +- career transition +- MLOps +dateadded: 2023-10-01 + +duration: PT01H27S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=0 + endOffset: 105 +- name: 'Career overview: From data manager to data lead' + startOffset: 105 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=105 + endOffset: 204 +- name: 'Early role: Sensor data aggregation & structural health monitoring' + startOffset: 204 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=204 + endOffset: 344 +- name: 'Data management vs analyst: responsibilities and data discovery' + startOffset: 344 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=344 + endOffset: 441 +- name: 'Automation to data engineering: ETL, scripting, and process automation' + startOffset: 441 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=441 + endOffset: 561 +- name: 'End-to-end IoT pipelines: loggers, ingestion, and reporting' + startOffset: 561 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=561 + endOffset: 687 +- name: 'Domain expertise: civil engineering aiding data diagnosis' + startOffset: 687 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=687 + endOffset: 891 +- name: 'Adapting to cloud & IoT: learning Python, Azure, and cloud fundamentals' + startOffset: 891 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=891 + endOffset: 1261 +- name: 'Hiring mindset: evaluating experience, scale, and cloud adaptability' + startOffset: 1261 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1261 + endOffset: 1367 +- name: 'Data architect role: seniority, end-to-end ownership, and modeling' + startOffset: 1367 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1367 + endOffset: 1640 +- name: 'Architecture outcome: team alignment and optimized data processes' + startOffset: 1640 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1640 + endOffset: 1796 +- name: 'Lakehouse layering: bronze, silver, gold and data quality expectations' + startOffset: 1796 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1796 + endOffset: 1978 +- name: 'Analytics modeling: dimensions, facts, metrics, and stakeholder discovery' + startOffset: 1978 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1978 + endOffset: 2160 +- name: 'Core model strategy: supporting multiple consumers and departments' + startOffset: 2160 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2160 + endOffset: 2230 +- name: 'Role balance: hands-on engineering vs stakeholder engagement over time' + startOffset: 2230 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2230 + endOffset: 2551 +- name: 'Empowerment & prioritization: scaling teams and aligning with business goals' + startOffset: 2551 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2551 + endOffset: 2653 +- name: 'Staying technical: one-on-ones, demos, and hands-on proofs of concept' + startOffset: 2653 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2653 + endOffset: 3045 +- name: 'Technology scouting: DBT, LLMs, newsletters and community curation' + startOffset: 3045 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3045 + endOffset: 3208 +- name: 'Agile delivery: draft specs, proof of concept pipelines, and iteration' + startOffset: 3208 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3208 + endOffset: 3432 +- name: 'Reusable templates: ingestion, transformation, and datamart patterns' + startOffset: 3432 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3432 + endOffset: 3574 +- name: 'Design tradeoffs: reusable components vs project-specific solutions' + startOffset: 3574 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3574 + endOffset: 3651 +- name: 'Follow-up: guest contact and LinkedIn connection' + startOffset: 3651 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3651 + endOffset: 3691 +- name: Episode recap & closing + startOffset: 3691 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3691 + endOffset: 3627 + transcript: - header: Podcast Introduction - header: 'Career overview: From data manager to data lead' @@ -937,118 +1041,6 @@ transcript: sec: 3732 time: '1:02:12' who: Loïc -description: Master end-to-end IoT data pipelines, lakehouse & data modeling, learn - ETL, ingestion patterns and core model strategies to scale analytics and speed delivery. -intro: How do you build and scale end-to-end IoT data pipelines and a lakehouse that - supports reliable core data modeling across teams? In this episode, Loïc Magnien, - Lead Data at Mylight150 with a decade in database management, data engineering, - product ownership and architecture, walks through practical patterns for IoT pipelines, - lakehouse design and analytics modeling. We cover sensor data aggregation and structural - health monitoring, ETL and automation for ingestion from loggers, cloud fundamentals - (Python, Azure), and the move from data management to data architect responsibilities. - Loïc explains lakehouse layering (bronze, silver, gold) and data quality expectations, - how to design dimensions, facts and metrics to serve multiple consumers, and strategies - for reusable ingestion, transformation and datamart templates. He also discusses - hiring and team scale, balancing hands-on engineering with stakeholder engagement, - using DBT and LLMs for technology scouting, and pragmatic tradeoffs between reusable - components and project-specific solutions. Listen to learn actionable guidance on - architecture outcomes, agile delivery with proofs of concept, and building core - models that align teams and business goals. -dateadded: '2023-10-01' -duration: PT01H27S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=0 - endOffset: 105 -- name: 'Career overview: From data manager to data lead' - startOffset: 105 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=105 - endOffset: 204 -- name: 'Early role: Sensor data aggregation & structural health monitoring' - startOffset: 204 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=204 - endOffset: 344 -- name: 'Data management vs analyst: responsibilities and data discovery' - startOffset: 344 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=344 - endOffset: 441 -- name: 'Automation to data engineering: ETL, scripting, and process automation' - startOffset: 441 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=441 - endOffset: 561 -- name: 'End-to-end IoT pipelines: loggers, ingestion, and reporting' - startOffset: 561 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=561 - endOffset: 687 -- name: 'Domain expertise: civil engineering aiding data diagnosis' - startOffset: 687 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=687 - endOffset: 891 -- name: 'Adapting to cloud & IoT: learning Python, Azure, and cloud fundamentals' - startOffset: 891 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=891 - endOffset: 1261 -- name: 'Hiring mindset: evaluating experience, scale, and cloud adaptability' - startOffset: 1261 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1261 - endOffset: 1367 -- name: 'Data architect role: seniority, end-to-end ownership, and modeling' - startOffset: 1367 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1367 - endOffset: 1640 -- name: 'Architecture outcome: team alignment and optimized data processes' - startOffset: 1640 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1640 - endOffset: 1796 -- name: 'Lakehouse layering: bronze, silver, gold and data quality expectations' - startOffset: 1796 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1796 - endOffset: 1978 -- name: 'Analytics modeling: dimensions, facts, metrics, and stakeholder discovery' - startOffset: 1978 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1978 - endOffset: 2160 -- name: 'Core model strategy: supporting multiple consumers and departments' - startOffset: 2160 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2160 - endOffset: 2230 -- name: 'Role balance: hands-on engineering vs stakeholder engagement over time' - startOffset: 2230 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2230 - endOffset: 2551 -- name: 'Empowerment & prioritization: scaling teams and aligning with business goals' - startOffset: 2551 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2551 - endOffset: 2653 -- name: 'Staying technical: one-on-ones, demos, and hands-on proofs of concept' - startOffset: 2653 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2653 - endOffset: 3045 -- name: 'Technology scouting: DBT, LLMs, newsletters and community curation' - startOffset: 3045 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3045 - endOffset: 3208 -- name: 'Agile delivery: draft specs, proof of concept pipelines, and iteration' - startOffset: 3208 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3208 - endOffset: 3432 -- name: 'Reusable templates: ingestion, transformation, and datamart patterns' - startOffset: 3432 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3432 - endOffset: 3574 -- name: 'Design tradeoffs: reusable components vs project-specific solutions' - startOffset: 3574 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3574 - endOffset: 3651 -- name: 'Follow-up: guest contact and LinkedIn connection' - startOffset: 3651 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3651 - endOffset: 3691 -- name: Episode recap & closing - startOffset: 3691 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3691 - endOffset: 3627 --- Links: diff --git a/_podcast/s11e07-from-digital-marketing-to-analytics-engineering.md b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md similarity index 96% rename from _podcast/s11e07-from-digital-marketing-to-analytics-engineering.md rename to _podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md index 7fd8798c..52160a62 100644 --- a/_podcast/s11e07-from-digital-marketing-to-analytics-engineering.md +++ b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md @@ -1,19 +1,145 @@ --- +title: 'Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook' +short: From Digital Marketing to Analytics Engineering +season: 11 episode: 7 guests: - nikolamaksimovic +image: images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg ids: anchor: From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s youtube: GawJ7mG5ElQ -image: images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s apple: https://podcasts.apple.com/us/podcast/from-digital-marketing-to-analytics-engineering-nikola/id1541710331?i=1000586740912 spotify: https://open.spotify.com/episode/5VwS6ijaToirTzR7Xd5Phw?si=OsOVLOzBSt2sIgvbRS3krg youtube: https://www.youtube.com/watch?v=GawJ7mG5ElQ -season: 11 -short: From Digital Marketing to Analytics Engineering -title: 'Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook' + +description: "Discover DBT, SQL & data modeling tactics for pivoting into analytics engineering: learn migration, tooling, A/B testing, and a career playbook to get hired." +intro: "How do you transition from digital marketing into analytics engineering—and master DBT, SQL, and data modeling in the process? In this episode, Nikola Maksimovic shares his complete career transformation journey, from startup marketing roles in London and Berlin to growth marketing at Ecosia, and ultimately his pandemic-driven pivot into BI and analytics engineering. Nikola reveals the step-by-step learning path that worked for him—SQL fundamentals, hands-on BI projects, strategic conversations with internal data teams—plus the essential technical skills that got him hired: advanced SQL, data pipeline understanding, and Python foundations.

You'll get an inside look at real analytics engineering work: spearheading a company-wide DBT migration, navigating data modeling decisions (wide vs narrow tables, incremental strategies), and working with modern data stacks including Snowplow, DBT, Looker/LookML, Redshift, Airflow, Airbyte, and Redash. We also explore A/B testing frameworks, product analytics implementation, and the nuanced differences between analytics engineer and data analyst roles. Nikola shares his proven transition playbook (Excel → SQL → dashboards → meaningful projects), networking tactics that opened doors, mentorship approaches, and the communities and resources that accelerated his learning.

Whether you're in marketing, operations, or any non-technical role considering a move into data, this episode provides a concrete roadmap with actionable steps, realistic timelines, and insider insights to help you successfully pivot into analytics engineering." +topics: +- data science +- analytics engineering +- career transition +- tools +dateadded: 2022-11-19 + +duration: PT00H54M34S + +quotableClips: +- name: 'Episode Overview: Switching from Marketing to Analytics Engineering' + startOffset: 0 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=0 + endOffset: 32 +- name: 'Early Career & Startup Experience: London, Berlin, Movinga' + startOffset: 32 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=32 + endOffset: 64 +- name: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' + startOffset: 64 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=64 + endOffset: 173 +- name: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' + startOffset: 173 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=173 + endOffset: 438 +- name: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' + startOffset: 438 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=438 + endOffset: 525 +- name: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' + startOffset: 525 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=525 + endOffset: 593 +- name: 'Internal Pathway: Conversations with BI Team and Required Skills' + startOffset: 593 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=593 + endOffset: 662 +- name: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' + startOffset: 662 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=662 + endOffset: 770 +- name: 'Transition Phase: Balancing Marketing Work and BI Projects' + startOffset: 770 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=770 + endOffset: 854 +- name: 'Current Responsibilities: Analytics Engineering, Product Support & A/B Testing' + startOffset: 854 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=854 + endOffset: 1114 +- name: 'Data Modeling in Practice: DBT Migration and Transformation Layers' + startOffset: 1114 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1114 + endOffset: 1234 +- name: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, + Redash' + startOffset: 1234 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1234 + endOffset: 1328 +- name: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' + startOffset: 1328 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1328 + endOffset: 1392 +- name: 'Looker & LookML Experience: Reporting and Dashboard Building' + startOffset: 1392 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 + endOffset: 1491 +- name: 'Infrastructure Choices: Self‑Hosted Tooling vs DBT Cloud' + startOffset: 1491 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 + endOffset: 1506 +- name: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational + Fit' + startOffset: 1506 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1506 + endOffset: 1720 +- name: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' + startOffset: 1720 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1720 + endOffset: 1828 +- name: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' + startOffset: 1828 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1828 + endOffset: 2026 +- name: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' + startOffset: 2026 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 + endOffset: 2130 +- name: 'Nontraditional Background: Classics to Data — Just‑In‑Time Learning and Udemy + SQL' + startOffset: 2130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 + endOffset: 2307 +- name: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' + startOffset: 2307 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2307 + endOffset: 2376 +- name: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' + startOffset: 2376 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2376 + endOffset: 2510 +- name: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' + startOffset: 2510 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2510 + endOffset: 2709 +- name: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' + startOffset: 2709 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2709 + endOffset: 3023 +- name: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' + startOffset: 3023 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3023 + endOffset: 3130 +- name: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally + Optimistic)' + startOffset: 3130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 + endOffset: 3226 +- name: 'Contact & Wrap‑Up: Finding Nikola on LinkedIn and Episode Close' + startOffset: 3226 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 + endOffset: 3274 + transcript: - header: 'Episode Overview: Switching from Marketing to Analytics Engineering' - line: This week, we'll talk about switching careers from marketing to analytics @@ -963,139 +1089,6 @@ transcript: sec: 3274 time: '54:34' who: Nikola -description: 'Discover DBT, SQL & data modeling tactics for pivoting into analytics - engineering: learn migration, tooling, A/B testing, and a career playbook to get - hired.' -intro: 'How do you move from marketing into analytics engineering—and learn DBT, SQL, - and data modeling along the way? In this episode, Nikola Maksimovic walks through - that exact career pivot, from early startup roles in London and Berlin to growth - marketing at Ecosia and a pandemic‑era shift toward BI and analytics engineering. - Nikola outlines the practical learning path—SQL courses, small BI projects, conversations - with internal BI teams—and the core technical skills you’ll need: advanced SQL, - data pipeline familiarity, and Python basics.

We dig into real-world analytics - engineering work: leading a DBT migration, data modeling tradeoffs (wide vs narrow - tables, incrementalization), tooling stacks like Snowplow, DBT, Looker/LookML, Redshift, - Airflow, Airbyte and Redash, plus A/B testing and product analytics use cases. Nikola - also shares a transition playbook (Excel → SQL → dashboards → projects), mentorship - and networking strategies, and recommended reading and communities.

Listen - to get a practical career playbook and actionable guidance on SQL, DBT, data modeling, - and the organizational fit between analytics engineer and data analyst. Find Nikola - on LinkedIn (nikola-maksimovic-40188183).' -dateadded: '2022-11-19' -duration: PT00H54M34S -quotableClips: -- name: 'Episode Overview: Switching from Marketing to Analytics Engineering' - startOffset: 0 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=0 - endOffset: 32 -- name: 'Early Career & Startup Experience: London, Berlin, Movinga' - startOffset: 32 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=32 - endOffset: 64 -- name: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' - startOffset: 64 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=64 - endOffset: 173 -- name: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' - startOffset: 173 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=173 - endOffset: 438 -- name: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' - startOffset: 438 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=438 - endOffset: 525 -- name: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' - startOffset: 525 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=525 - endOffset: 593 -- name: 'Internal Pathway: Conversations with BI Team and Required Skills' - startOffset: 593 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=593 - endOffset: 662 -- name: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' - startOffset: 662 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=662 - endOffset: 770 -- name: 'Transition Phase: Balancing Marketing Work and BI Projects' - startOffset: 770 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=770 - endOffset: 854 -- name: 'Current Responsibilities: Analytics Engineering, Product Support & A/B Testing' - startOffset: 854 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=854 - endOffset: 1114 -- name: 'Data Modeling in Practice: DBT Migration and Transformation Layers' - startOffset: 1114 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1114 - endOffset: 1234 -- name: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, - Redash' - startOffset: 1234 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1234 - endOffset: 1328 -- name: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' - startOffset: 1328 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1328 - endOffset: 1392 -- name: 'Looker & LookML Experience: Reporting and Dashboard Building' - startOffset: 1392 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 - endOffset: 1491 -- name: 'Infrastructure Choices: Self‑Hosted Tooling vs DBT Cloud' - startOffset: 1491 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 - endOffset: 1506 -- name: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational - Fit' - startOffset: 1506 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1506 - endOffset: 1720 -- name: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' - startOffset: 1720 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1720 - endOffset: 1828 -- name: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' - startOffset: 1828 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1828 - endOffset: 2026 -- name: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' - startOffset: 2026 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 - endOffset: 2130 -- name: 'Nontraditional Background: Classics to Data — Just‑In‑Time Learning and Udemy - SQL' - startOffset: 2130 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 - endOffset: 2307 -- name: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' - startOffset: 2307 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2307 - endOffset: 2376 -- name: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' - startOffset: 2376 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2376 - endOffset: 2510 -- name: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' - startOffset: 2510 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2510 - endOffset: 2709 -- name: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' - startOffset: 2709 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2709 - endOffset: 3023 -- name: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' - startOffset: 3023 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3023 - endOffset: 3130 -- name: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally - Optimistic)' - startOffset: 3130 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 - endOffset: 3226 -- name: 'Contact & Wrap‑Up: Finding Nikola on LinkedIn and Episode Close' - startOffset: 3226 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 - endOffset: 3274 --- Links: diff --git a/_podcast/s07e09-from-math-teacher-to-analytics-engineer.md b/_podcast/from-math-graduate-to-data-analytics.md similarity index 97% rename from _podcast/s07e09-from-math-teacher-to-analytics-engineer.md rename to _podcast/from-math-graduate-to-data-analytics.md index 3dda99eb..edfecb9d 100644 --- a/_podcast/s07e09-from-math-teacher-to-analytics-engineer.md +++ b/_podcast/from-math-graduate-to-data-analytics.md @@ -1,38 +1,152 @@ --- +title: 'How to Break into Data Analytics: Networking, Portfolio, SQL & Interview Prep' +short: From Math Teacher to Analytics Engineer +season: 7 episode: 9 guests: - juanpablo -intro: 'How do you actually break into data analytics — and what combination of networking, - portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan - Pablo Murillo, an AI and data professional now at Google with prior roles as an - Amazon Business Intelligence Engineer and data scientist at T‑Mobile, walks through - a practical path from math grad to analytics roles.

We cover the full playbook: - where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, - EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present - projects for hiring managers. Juan addresses bootcamp trade‑offs, networking wins - from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, - finding contract or pro bono work, and resume/STAR interview prep. He also discusses - role realities for BI and analytics engineering and employer branding to build credibility. -

Listen for actionable steps and specific tactics—how to structure three - portfolio projects, message templates for outreach, and interview preparation tips—to - help you break into data analytics, improve SQL interview readiness, and turn public - work into job opportunities.' -topics: -- career switch -- data analytics -- career growth +image: images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg ids: anchor: From-Math-Teacher-to-Analytics-Engineer---Juan-Pablo-e1fplc1 youtube: qh6-HDhw2xY -image: images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Math-Teacher-to-Analytics-Engineer---Juan-Pablo-e1fplc1 apple: https://podcasts.apple.com/us/podcast/from-math-teacher-to-analytics-engineer-juan-pablo/id1541710331?i=1000554506607 spotify: https://open.spotify.com/episode/153XI6DvtNWHYzSAv2UTqw youtube: https://www.youtube.com/watch?v=qh6-HDhw2xY -season: 7 -short: From Math Teacher to Analytics Engineer -title: 'How to Break into Data Analytics: Networking, Portfolio, SQL & Interview Prep' + +description: 'Discover data analytics: build a portfolio, master SQL & networking, interview prep, cold outreach and project READMEs to land job offers faster.' +intro: 'How do you actually break into data analytics — and what combination of networking, portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan Pablo Murillo, an AI and data professional now at Google with prior roles as an Amazon Business Intelligence Engineer and data scientist at T‑Mobile, walks through a practical path from math grad to analytics roles.

We cover the full playbook: where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present projects for hiring managers. Juan addresses bootcamp trade‑offs, networking wins from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, finding contract or pro bono work, and resume/STAR interview prep. He also discusses role realities for BI and analytics engineering and employer branding to build credibility.

Listen for actionable steps and specific tactics—how to structure three portfolio projects, message templates for outreach, and interview preparation tips—to help you break into data analytics, improve SQL interview readiness, and turn public work into job opportunities.' +topics: +- career transition +- data analytics +- career growth +dateadded: 2022-03-19 + +duration: PT01H03M20S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=0 + endOffset: 111 +- name: 'Background & Motivation: Math Graduate to Data Analytics' + startOffset: 111 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=111 + endOffset: 197 +- name: 'Early Roles & Mentoring: Consulting, T-Mobile, Amazon Path' + startOffset: 197 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=197 + endOffset: 246 +- name: 'Community & Resources: Amplifying Learning Platforms' + startOffset: 246 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=246 + endOffset: 325 +- name: Math Foundations & Machine Learning Relevance + startOffset: 325 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=325 + endOffset: 504 +- name: 'Transition Path: Biostatistics, R, SAS and Discovering SQL' + startOffset: 504 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=504 + endOffset: 787 +- name: 'Bootcamp Trade-offs: Cost, Network and a Nine-Month Job Search' + startOffset: 787 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=787 + endOffset: 972 +- name: 'Networking Wins: Meetups Leading to First Offer' + startOffset: 972 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=972 + endOffset: 1110 +- name: 'Building Credibility: Employer Brand & Social Proof' + startOffset: 1110 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1110 + endOffset: 1217 +- name: 'Uncrowded Doors: Alternative Job‑Hunting Strategies' + startOffset: 1217 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1217 + endOffset: 1286 +- name: 'LinkedIn Tactics: Active Posting, Commenting & Visibility' + startOffset: 1286 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1286 + endOffset: 1427 +- name: 'Resume Readiness: Quick Sharing and On-the-Spot Opportunities' + startOffset: 1427 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1427 + endOffset: 1463 +- name: 'Portfolio Essentials: rpubs, EDA, Visualizations & Basic ML' + startOffset: 1463 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1463 + endOffset: 1599 +- name: 'Portfolio Strategy: Three Projects and Publicizing Work' + startOffset: 1599 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1599 + endOffset: 1699 +- name: 'Meetup Tactics: Spotting and Approaching Hiring Managers' + startOffset: 1699 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1699 + endOffset: 1947 +- name: 'Cold Outreach & DIY Internships: 200 Messages and Trial Offers' + startOffset: 1947 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1947 + endOffset: 2106 +- name: 'Finding Contract Work: Dice, Recruiter Calls and Freelance Tradeoffs' + startOffset: 2106 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2106 + endOffset: 2292 +- name: 'Pro Bono & Nonprofit Projects: Catchafire for Real Experience' + startOffset: 2292 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2292 + endOffset: 2512 +- name: 'Messaging Strategy: Personalization, Alumni Hooks & Templates' + startOffset: 2512 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2512 + endOffset: 2666 +- name: 'Consistency & Visibility: Posting Frequency and the Algorithm' + startOffset: 2666 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2666 + endOffset: 2718 +- name: 'Portfolio Hosting Options: Zyro, GitHub, WordPress, Hashnode' + startOffset: 2718 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2718 + endOffset: 2899 +- name: 'Project Presentation: Clean README, Docs and Repo Organization' + startOffset: 2899 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2899 + endOffset: 2974 +- name: 'Skills Roadmap & Interview Prep: SQL, Python, Visualization' + startOffset: 2974 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2974 + endOffset: 3171 +- name: 'Role Realities: BI / Analytics Engineer Work at Amazon' + startOffset: 3171 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3171 + endOffset: 3362 +- name: 'Career Advice: Consistency, Soft Skills and STAR Format' + startOffset: 3362 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3362 + endOffset: 3475 +- name: 'Online Networking: Hopin Random Date & Virtual Meetups' + startOffset: 3475 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3475 + endOffset: 3599 +- name: 'Communicating Impact: Summarizing Projects for Hiring Managers' + startOffset: 3599 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3599 + endOffset: 3666 +- name: 'Project Hygiene: Version Control and Shared Repositories' + startOffset: 3666 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3666 + endOffset: 3751 +- name: Personal Branding & Contact Info + startOffset: 3751 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3751 + endOffset: 3793 +- name: Episode Closing + startOffset: 3793 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3793 + endOffset: 3800 + transcript: - header: Podcast Introduction - line: This week, we'll talk about transitioning to analytics. And we have a special @@ -1054,131 +1168,6 @@ transcript: sec: 3802 time: '1:03:22' who: Juan Pablo -description: 'Discover data analytics: build a portfolio, master SQL & networking, - interview prep, cold outreach and project READMEs to land job offers faster.' -dateadded: '2022-03-19' -duration: PT01H03M20S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=0 - endOffset: 111 -- name: 'Background & Motivation: Math Graduate to Data Analytics' - startOffset: 111 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=111 - endOffset: 197 -- name: 'Early Roles & Mentoring: Consulting, T-Mobile, Amazon Path' - startOffset: 197 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=197 - endOffset: 246 -- name: 'Community & Resources: Amplifying Learning Platforms' - startOffset: 246 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=246 - endOffset: 325 -- name: Math Foundations & Machine Learning Relevance - startOffset: 325 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=325 - endOffset: 504 -- name: 'Transition Path: Biostatistics, R, SAS and Discovering SQL' - startOffset: 504 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=504 - endOffset: 787 -- name: 'Bootcamp Trade-offs: Cost, Network and a Nine-Month Job Search' - startOffset: 787 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=787 - endOffset: 972 -- name: 'Networking Wins: Meetups Leading to First Offer' - startOffset: 972 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=972 - endOffset: 1110 -- name: 'Building Credibility: Employer Brand & Social Proof' - startOffset: 1110 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1110 - endOffset: 1217 -- name: 'Uncrowded Doors: Alternative Job‑Hunting Strategies' - startOffset: 1217 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1217 - endOffset: 1286 -- name: 'LinkedIn Tactics: Active Posting, Commenting & Visibility' - startOffset: 1286 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1286 - endOffset: 1427 -- name: 'Resume Readiness: Quick Sharing and On-the-Spot Opportunities' - startOffset: 1427 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1427 - endOffset: 1463 -- name: 'Portfolio Essentials: rpubs, EDA, Visualizations & Basic ML' - startOffset: 1463 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1463 - endOffset: 1599 -- name: 'Portfolio Strategy: Three Projects and Publicizing Work' - startOffset: 1599 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1599 - endOffset: 1699 -- name: 'Meetup Tactics: Spotting and Approaching Hiring Managers' - startOffset: 1699 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1699 - endOffset: 1947 -- name: 'Cold Outreach & DIY Internships: 200 Messages and Trial Offers' - startOffset: 1947 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1947 - endOffset: 2106 -- name: 'Finding Contract Work: Dice, Recruiter Calls and Freelance Tradeoffs' - startOffset: 2106 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2106 - endOffset: 2292 -- name: 'Pro Bono & Nonprofit Projects: Catchafire for Real Experience' - startOffset: 2292 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2292 - endOffset: 2512 -- name: 'Messaging Strategy: Personalization, Alumni Hooks & Templates' - startOffset: 2512 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2512 - endOffset: 2666 -- name: 'Consistency & Visibility: Posting Frequency and the Algorithm' - startOffset: 2666 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2666 - endOffset: 2718 -- name: 'Portfolio Hosting Options: Zyro, GitHub, WordPress, Hashnode' - startOffset: 2718 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2718 - endOffset: 2899 -- name: 'Project Presentation: Clean README, Docs and Repo Organization' - startOffset: 2899 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2899 - endOffset: 2974 -- name: 'Skills Roadmap & Interview Prep: SQL, Python, Visualization' - startOffset: 2974 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2974 - endOffset: 3171 -- name: 'Role Realities: BI / Analytics Engineer Work at Amazon' - startOffset: 3171 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3171 - endOffset: 3362 -- name: 'Career Advice: Consistency, Soft Skills and STAR Format' - startOffset: 3362 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3362 - endOffset: 3475 -- name: 'Online Networking: Hopin Random Date & Virtual Meetups' - startOffset: 3475 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3475 - endOffset: 3599 -- name: 'Communicating Impact: Summarizing Projects for Hiring Managers' - startOffset: 3599 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3599 - endOffset: 3666 -- name: 'Project Hygiene: Version Control and Shared Repositories' - startOffset: 3666 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3666 - endOffset: 3751 -- name: Personal Branding & Contact Info - startOffset: 3751 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3751 - endOffset: 3793 -- name: Episode Closing - startOffset: 3793 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3793 - endOffset: 3800 --- Links: diff --git a/_podcast/s03e06-from-physics-to-machine-learning.md b/_podcast/from-physics-to-computer-vision-career-transition.md similarity index 97% rename from _podcast/s03e06-from-physics-to-machine-learning.md rename to _podcast/from-physics-to-computer-vision-career-transition.md index aadef128..cce3d2a6 100644 --- a/_podcast/s03e06-from-physics-to-machine-learning.md +++ b/_podcast/from-physics-to-computer-vision-career-transition.md @@ -1,12 +1,11 @@ --- -title: 'Switch to Computer Vision & Deep Learning: Roadmap, Kaggle Projects, Mentors - & Interview Prep' +title: 'Switch to Computer Vision & Deep Learning: Roadmap, Kaggle Projects, Mentors & Interview Prep' short: From Physics to Machine Learning +season: 3 +episode: 6 guests: - tatianagabruseva image: images/podcast/s03e06-from-physics-to-machine-learning.jpg -season: 3 -episode: 6 ids: youtube: wJPi6Ip9PX0 anchor: From-Physics-to-Machine-Learning---Tatiana-Gabruseva-e10r4pl @@ -15,6 +14,127 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/From-Physics-to-Machine-Learning---Tatiana-Gabruseva-e10r4pl spotify: https://open.spotify.com/episode/4Kk7xXfD5t2VHnLDHpdW1y apple: https://podcasts.apple.com/us/podcast/from-physics-to-machine-learning-tatiana-gabruseva/id1541710331?i=1000521740775 + +description: 'Master computer vision & deep learning with a clear roadmap: Kaggle projects, mentorship strategies and interview prep to land roles and build deployed models.' +intro: How do you switch into computer vision and deep learning from a non‑industry background — and build a portfolio that lands interviews? In this episode, Tatiana Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master now working as a Senior ML Engineer at Cork University Hospital, maps a practical career-change roadmap. Drawing on her move from a physics PhD during maternity leave, Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), hands‑on projects (Kaggle competitions, internships, Omdena‑style collaborations, end‑to‑end pet projects with data collection, labeling, deployment and Docker), and where to start Kaggle with minimal Python.

You’ll hear tactical advice on mentorship — finding and nurturing long‑term mentors — plus networking, team building for competitions and papers, and overcoming impostor syndrome with mock interviews and LeetCode practice. She also shares prioritization strategies (Pareto, outsourcing), mental rehearsal techniques, boundary setting, and self‑care to avoid burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, and connect with the data science community to accelerate a switch into computer vision and deep learning +topics: +- career transition +- physics +- deep learning +- machine learning +- career growth +- academia +- mentorship +dateadded: 2021-05-14 + +duration: PT01H06M13S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=0 + endOffset: 117 +- name: 'Career origin: physics PhD to computer vision deep learning' + startOffset: 117 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=117 + endOffset: 152 +- name: 'Transition catalyst: maternity leave, online courses and internship' + startOffset: 152 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=152 + endOffset: 260 +- name: 'Career-change summary: sharing a Twitter thread of practical lessons' + startOffset: 260 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=260 + endOffset: 347 +- name: 'Network makeover: building supportive data science circles' + startOffset: 347 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=347 + endOffset: 470 +- name: Overcoming fears and age stereotypes in career change + startOffset: 470 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=470 + endOffset: 533 +- name: 'Eliminating distractions: focused time management during maternity leave' + startOffset: 533 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=533 + endOffset: 649 +- name: 'Impostor syndrome remedy: interviews and mock interviewing practice' + startOffset: 649 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=649 + endOffset: 892 +- name: 'Selective attention: focusing on positive signals and mentors' + startOffset: 892 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=892 + endOffset: 956 +- name: 'Team building: finding teammates for Kaggle competitions and papers' + startOffset: 956 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=956 + endOffset: 1264 +- name: 'Prioritization: Pareto principle, outsourcing and avoiding perfectionism' + startOffset: 1264 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1264 + endOffset: 1425 +- name: 'Mental rehearsal: initial creation, visualization and Sankalpa technique' + startOffset: 1425 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1425 + endOffset: 1688 +- name: 'Mentorship strategies: finding and nurturing long-term mentors' + startOffset: 1688 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1688 + endOffset: 1902 +- name: 'Boundary setting: learning to say no and protect your time' + startOffset: 1902 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1902 + endOffset: 2065 +- name: 'Embracing failure: treating setbacks as growth opportunities' + startOffset: 2065 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2065 + endOffset: 2250 +- name: 'Self-care tactics: sleep, support systems and avoiding burnout' + startOffset: 2250 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2250 + endOffset: 2554 +- name: 'Kaggle vs internships and Omdena-style projects: pros and cons' + startOffset: 2554 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2554 + endOffset: 2800 +- name: 'End-to-end pet projects: data collection, labeling, deployment and Docker' + startOffset: 2800 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2800 + endOffset: 2969 +- name: 'Learning roadmap: Python, ML/DL courses, SQL, algorithms and system design' + startOffset: 2969 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2969 + endOffset: 3220 +- name: 'Starting Kaggle with minimal Python: practical beginner advice' + startOffset: 3220 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3220 + endOffset: 3284 +- name: 'Improving focus: meditation, analytical practice and achieving flow' + startOffset: 3284 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3284 + endOffset: 3476 +- name: 'Astroinformatics overview: ML applications in astronomy' + startOffset: 3476 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3476 + endOffset: 3569 +- name: 'Physics background advantage: math, problem solving and modeling' + startOffset: 3569 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3569 + endOffset: 3753 +- name: 'Leaving academia: lab constraints, maternity leaves and cloud credits' + startOffset: 3753 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3753 + endOffset: 3874 +- name: 'Interview preparation: LeetCode, mock interviews and system design prep' + startOffset: 3874 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3874 + endOffset: 4058 +- name: 'Where to connect: LinkedIn, Twitter and DataTalks.Club follow-up' + startOffset: 4058 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=4058 + endOffset: 3973 + transcript: - header: Podcast Introduction - header: 'Career origin: physics PhD to computer vision deep learning' @@ -1111,132 +1231,6 @@ transcript: sec: 4090 time: '1:08:10' who: Alexey -description: 'Master computer vision & deep learning with a clear roadmap: Kaggle - projects, mentorship strategies and interview prep to land roles and build deployed - models.' -intro: How do you switch into computer vision and deep learning from a non‑industry - background — and build a portfolio that lands interviews? In this episode, Tatiana - Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master - now working as a Senior ML Engineer at Cork University Hospital, maps a practical - career-change roadmap. Drawing on her move from a physics PhD during maternity leave, - Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), - hands‑on projects (Kaggle competitions, internships, Omdena‑style collaborations, - end‑to‑end pet projects with data collection, labeling, deployment and Docker), - and where to start Kaggle with minimal Python.

You’ll hear tactical advice - on mentorship — finding and nurturing long‑term mentors — plus networking, team - building for competitions and papers, and overcoming impostor syndrome with mock - interviews and LeetCode practice. She also shares prioritization strategies (Pareto, - outsourcing), mental rehearsal techniques, boundary setting, and self‑care to avoid - burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, - and connect with the data science community to accelerate a switch into computer - vision and deep learning. -dateadded: '2021-05-14' -duration: PT01H06M13S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=0 - endOffset: 117 -- name: 'Career origin: physics PhD to computer vision deep learning' - startOffset: 117 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=117 - endOffset: 152 -- name: 'Transition catalyst: maternity leave, online courses and internship' - startOffset: 152 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=152 - endOffset: 260 -- name: 'Career-change summary: sharing a Twitter thread of practical lessons' - startOffset: 260 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=260 - endOffset: 347 -- name: 'Network makeover: building supportive data science circles' - startOffset: 347 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=347 - endOffset: 470 -- name: Overcoming fears and age stereotypes in career change - startOffset: 470 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=470 - endOffset: 533 -- name: 'Eliminating distractions: focused time management during maternity leave' - startOffset: 533 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=533 - endOffset: 649 -- name: 'Impostor syndrome remedy: interviews and mock interviewing practice' - startOffset: 649 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=649 - endOffset: 892 -- name: 'Selective attention: focusing on positive signals and mentors' - startOffset: 892 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=892 - endOffset: 956 -- name: 'Team building: finding teammates for Kaggle competitions and papers' - startOffset: 956 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=956 - endOffset: 1264 -- name: 'Prioritization: Pareto principle, outsourcing and avoiding perfectionism' - startOffset: 1264 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1264 - endOffset: 1425 -- name: 'Mental rehearsal: initial creation, visualization and Sankalpa technique' - startOffset: 1425 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1425 - endOffset: 1688 -- name: 'Mentorship strategies: finding and nurturing long-term mentors' - startOffset: 1688 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1688 - endOffset: 1902 -- name: 'Boundary setting: learning to say no and protect your time' - startOffset: 1902 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1902 - endOffset: 2065 -- name: 'Embracing failure: treating setbacks as growth opportunities' - startOffset: 2065 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2065 - endOffset: 2250 -- name: 'Self-care tactics: sleep, support systems and avoiding burnout' - startOffset: 2250 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2250 - endOffset: 2554 -- name: 'Kaggle vs internships and Omdena-style projects: pros and cons' - startOffset: 2554 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2554 - endOffset: 2800 -- name: 'End-to-end pet projects: data collection, labeling, deployment and Docker' - startOffset: 2800 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2800 - endOffset: 2969 -- name: 'Learning roadmap: Python, ML/DL courses, SQL, algorithms and system design' - startOffset: 2969 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2969 - endOffset: 3220 -- name: 'Starting Kaggle with minimal Python: practical beginner advice' - startOffset: 3220 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3220 - endOffset: 3284 -- name: 'Improving focus: meditation, analytical practice and achieving flow' - startOffset: 3284 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3284 - endOffset: 3476 -- name: 'Astroinformatics overview: ML applications in astronomy' - startOffset: 3476 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3476 - endOffset: 3569 -- name: 'Physics background advantage: math, problem solving and modeling' - startOffset: 3569 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3569 - endOffset: 3753 -- name: 'Leaving academia: lab constraints, maternity leaves and cloud credits' - startOffset: 3753 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3753 - endOffset: 3874 -- name: 'Interview preparation: LeetCode, mock interviews and system design prep' - startOffset: 3874 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3874 - endOffset: 4058 -- name: 'Where to connect: LinkedIn, Twitter and DataTalks.Club follow-up' - startOffset: 4058 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=4058 - endOffset: 3973 --- Links: diff --git a/_podcast/s04e01-from-swe-to-ml.md b/_podcast/from-software-engineer-to-machine-learning.md similarity index 98% rename from _podcast/s04e01-from-swe-to-ml.md rename to _podcast/from-software-engineer-to-machine-learning.md index 0cf84087..8f79d185 100644 --- a/_podcast/s04e01-from-swe-to-ml.md +++ b/_podcast/from-software-engineer-to-machine-learning.md @@ -1,12 +1,11 @@ --- -title: 'From Software Engineering to Machine Learning: 7 Lessons, Tools, MLOps & Project - Roadmap' +title: 'From Software Engineering to Machine Learning: 7 Lessons, Tools, MLOps & Project Roadmap' short: From Software Engineering to Machine Learning +season: 4 +episode: 1 guests: - svpino image: images/podcast/s04e01-from-swe-to-ml.jpg -season: 4 -episode: 1 ids: youtube: xVYOdRrN7hw anchor: From-Software-Engineering-to-Machine-Learning---Santiago-Valdarrama-e139s63 @@ -15,6 +14,135 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/From-Software-Engineering-to-Machine-Learning---Santiago-Valdarrama-e139s63 spotify: https://open.spotify.com/episode/0PHDZPGyXgyDM9HH7QzVdZ apple: https://podcasts.apple.com/us/podcast/from-software-engineering-to-machine-learning-santiago/id1541710331?i=1000526870384 + +description: 'Learn practical machine learning for software engineers: 7 lessons, Python tools, MLOps & a project roadmap to build, deploy and monitor real ML systems.' +intro: 'How do you move from software engineering into practical machine learning without getting stuck on theory or math? In this episode, Santiago Valdarrama — Director of Computer Vision and a computer scientist with two decades of software experience — walks through a pragmatic roadmap for software engineers transitioning to machine learning.

We cover seven practical lessons for getting started (start projects, think long-term, teach and join communities, build real projects, prioritize coding, analyze problems first, and favor pragmatism), core ML tooling (Python, NumPy, Pandas, Matplotlib, scikit-learn), and recommended learning resources (Google ML Crash Course, Kaggle, Deep Learning with Python, Hands-On Machine Learning). Santiago compares problem-based vs top-down learning, outlines a course roadmap for engineers, and explains ML engineering skills: data pipelines, modeling, deployment, monitoring, plus MLOps fundamentals like APIs, Docker, and cloud providers.

Listen to gain an actionable project roadmap, tools checklist, and concrete strategies to conquer math anxiety and ship ML systems — practical guidance for engineers who want to build, deploy, and maintain real machine learning solutions.' +dateadded: 2021-06-25 + +duration: PT00H59M24S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=0 + endOffset: 159 +- name: 'Guest Overview: Santiago — Director of Computer Vision' + startOffset: 159 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=159 + endOffset: 208 +- name: Adding Machine Learning to a Software Engineering Skillset + startOffset: 208 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=208 + endOffset: 291 +- name: 'Personal & Academic Background: Cuba, Bachelor’s, Georgia Tech MS' + startOffset: 291 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=291 + endOffset: 393 +- name: 'Software Engineers’ Advantage: Coding as a Core ML Skill' + startOffset: 393 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=393 + endOffset: 492 +- name: 'Overcoming Math Anxiety: Practical, Problem-First Learning' + startOffset: 492 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=492 + endOffset: 780 +- name: 'Communicating ML Simply: Teaching and Writing for Understanding' + startOffset: 780 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=780 + endOffset: 971 +- name: Seven Practical Lessons for Starting a Machine Learning Career + startOffset: 971 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=971 + endOffset: 1045 +- name: 'Lesson 1 — Take Action: Start Projects Instead of Overpreparing' + startOffset: 1045 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1045 + endOffset: 1149 +- name: 'Lesson 2 — Learning as a Marathon: Long-Term Growth in ML' + startOffset: 1149 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1149 + endOffset: 1238 +- name: 'Lesson 3 — Community & Teaching: Accelerating Progress Together' + startOffset: 1238 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1238 + endOffset: 1338 +- name: 'Lesson 4 — Apply Knowledge: Build and Share Real Projects' + startOffset: 1338 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1338 + endOffset: 1500 +- name: 'Lesson 5 — Math vs Coding: Coding Often Determines Success' + startOffset: 1500 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1500 + endOffset: 1599 +- name: 'Lesson 6 — Problem Analysis First: Design Solutions Before Code' + startOffset: 1599 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1599 + endOffset: 1745 +- name: 'Lesson 7 — Pragmatism Over Purism: Deliver Value Without Knowing Every Detail' + startOffset: 1745 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1745 + endOffset: 1990 +- name: 'Core ML Tooling: Python, NumPy, Pandas, Matplotlib, scikit-learn' + startOffset: 1990 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1990 + endOffset: 2179 +- name: 'Learning Approaches: Problem-Based vs Top-Down (Theory First)' + startOffset: 2179 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2179 + endOffset: 2328 +- name: 'Recommended Courses & Tutorials: Google ML Crash Course, Kaggle' + startOffset: 2328 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2328 + endOffset: 2469 +- name: 'Essential Books: Deep Learning with Python; Hands-On Machine Learning' + startOffset: 2469 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2469 + endOffset: 2528 +- name: Course Roadmap for Software Engineers Transitioning to ML + startOffset: 2528 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2528 + endOffset: 2641 +- name: 'Improving Coding Skills: Learn Python by Building Solutions' + startOffset: 2641 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2641 + endOffset: 2727 +- name: 'Build Projects Without ML: Automation Examples (Selenium)' + startOffset: 2727 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2727 + endOffset: 2799 +- name: 'ML Engineering Skills: Data Pipeline, Modeling, Deployment, Monitoring' + startOffset: 2799 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2799 + endOffset: 2963 +- name: 'Deployment & MLOps Fundamentals: APIs, Docker, Cloud Providers' + startOffset: 2963 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2963 + endOffset: 3081 +- name: 'Learning Cloud Pragmatically: Learn What the Project Demands' + startOffset: 3081 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3081 + endOffset: 3139 +- name: 'Machine Learning vs Data Science: Roles, Tools, and Focus' + startOffset: 3139 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3139 + endOffset: 3310 +- name: 'Getting Started: Andrew Ng Coursera vs Hands-On Project Work' + startOffset: 3310 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3310 + endOffset: 3397 +- name: 'Conquering Math: Intuition, Translate Formulas to Code' + startOffset: 3397 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3397 + endOffset: 3594 +- name: 'Episode Resources: Santiago’s Twitter, Course Links' + startOffset: 3594 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3594 + endOffset: 3639 +- name: Closing Remarks & Conference Announcements + startOffset: 3639 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3639 + endOffset: 3564 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Santiago — Director of Computer Vision' @@ -1113,146 +1241,6 @@ transcript: sec: 3723 time: '1:02:03' who: Alexey -description: 'Learn practical machine learning for software engineers: 7 lessons, - Python tools, MLOps & a project roadmap to build, deploy and monitor real ML systems.' -intro: 'How do you move from software engineering into practical machine learning - without getting stuck on theory or math? In this episode, Santiago Valdarrama — Director - of Computer Vision and a computer scientist with two decades of software experience - — walks through a pragmatic roadmap for software engineers transitioning to machine - learning.

We cover seven practical lessons for getting started (start projects, - think long-term, teach and join communities, build real projects, prioritize coding, - analyze problems first, and favor pragmatism), core ML tooling (Python, NumPy, Pandas, - Matplotlib, scikit-learn), and recommended learning resources (Google ML Crash Course, - Kaggle, Deep Learning with Python, Hands-On Machine Learning). Santiago compares - problem-based vs top-down learning, outlines a course roadmap for engineers, and - explains ML engineering skills: data pipelines, modeling, deployment, monitoring, - plus MLOps fundamentals like APIs, Docker, and cloud providers.

Listen - to gain an actionable project roadmap, tools checklist, and concrete strategies - to conquer math anxiety and ship ML systems — practical guidance for engineers who - want to build, deploy, and maintain real machine learning solutions.' -dateadded: '2021-06-25' -duration: PT00H59M24S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=0 - endOffset: 159 -- name: 'Guest Overview: Santiago — Director of Computer Vision' - startOffset: 159 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=159 - endOffset: 208 -- name: Adding Machine Learning to a Software Engineering Skillset - startOffset: 208 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=208 - endOffset: 291 -- name: 'Personal & Academic Background: Cuba, Bachelor’s, Georgia Tech MS' - startOffset: 291 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=291 - endOffset: 393 -- name: 'Software Engineers’ Advantage: Coding as a Core ML Skill' - startOffset: 393 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=393 - endOffset: 492 -- name: 'Overcoming Math Anxiety: Practical, Problem-First Learning' - startOffset: 492 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=492 - endOffset: 780 -- name: 'Communicating ML Simply: Teaching and Writing for Understanding' - startOffset: 780 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=780 - endOffset: 971 -- name: Seven Practical Lessons for Starting a Machine Learning Career - startOffset: 971 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=971 - endOffset: 1045 -- name: 'Lesson 1 — Take Action: Start Projects Instead of Overpreparing' - startOffset: 1045 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1045 - endOffset: 1149 -- name: 'Lesson 2 — Learning as a Marathon: Long-Term Growth in ML' - startOffset: 1149 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1149 - endOffset: 1238 -- name: 'Lesson 3 — Community & Teaching: Accelerating Progress Together' - startOffset: 1238 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1238 - endOffset: 1338 -- name: 'Lesson 4 — Apply Knowledge: Build and Share Real Projects' - startOffset: 1338 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1338 - endOffset: 1500 -- name: 'Lesson 5 — Math vs Coding: Coding Often Determines Success' - startOffset: 1500 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1500 - endOffset: 1599 -- name: 'Lesson 6 — Problem Analysis First: Design Solutions Before Code' - startOffset: 1599 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1599 - endOffset: 1745 -- name: 'Lesson 7 — Pragmatism Over Purism: Deliver Value Without Knowing Every Detail' - startOffset: 1745 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1745 - endOffset: 1990 -- name: 'Core ML Tooling: Python, NumPy, Pandas, Matplotlib, scikit-learn' - startOffset: 1990 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1990 - endOffset: 2179 -- name: 'Learning Approaches: Problem-Based vs Top-Down (Theory First)' - startOffset: 2179 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2179 - endOffset: 2328 -- name: 'Recommended Courses & Tutorials: Google ML Crash Course, Kaggle' - startOffset: 2328 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2328 - endOffset: 2469 -- name: 'Essential Books: Deep Learning with Python; Hands-On Machine Learning' - startOffset: 2469 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2469 - endOffset: 2528 -- name: Course Roadmap for Software Engineers Transitioning to ML - startOffset: 2528 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2528 - endOffset: 2641 -- name: 'Improving Coding Skills: Learn Python by Building Solutions' - startOffset: 2641 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2641 - endOffset: 2727 -- name: 'Build Projects Without ML: Automation Examples (Selenium)' - startOffset: 2727 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2727 - endOffset: 2799 -- name: 'ML Engineering Skills: Data Pipeline, Modeling, Deployment, Monitoring' - startOffset: 2799 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2799 - endOffset: 2963 -- name: 'Deployment & MLOps Fundamentals: APIs, Docker, Cloud Providers' - startOffset: 2963 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2963 - endOffset: 3081 -- name: 'Learning Cloud Pragmatically: Learn What the Project Demands' - startOffset: 3081 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3081 - endOffset: 3139 -- name: 'Machine Learning vs Data Science: Roles, Tools, and Focus' - startOffset: 3139 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3139 - endOffset: 3310 -- name: 'Getting Started: Andrew Ng Coursera vs Hands-On Project Work' - startOffset: 3310 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3310 - endOffset: 3397 -- name: 'Conquering Math: Intuition, Translate Formulas to Code' - startOffset: 3397 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3397 - endOffset: 3594 -- name: 'Episode Resources: Santiago’s Twitter, Course Links' - startOffset: 3594 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3594 - endOffset: 3639 -- name: Closing Remarks & Conference Announcements - startOffset: 3639 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3639 - endOffset: 3564 --- Links: diff --git a/_podcast/s07e08-from-data-science-to-data-engineering.md b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md similarity index 99% rename from _podcast/s07e08-from-data-science-to-data-engineering.md rename to _podcast/from-software-engineering-data-science-to-data-engineering-leadership.md index 1c581057..84a900f7 100644 --- a/_podcast/s07e08-from-data-science-to-data-engineering.md +++ b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md @@ -1,33 +1,131 @@ --- +title: 'How to Become a Data Engineer: Skills, MLOps, Pipelines, SQL, CI/CD & Cloud' +short: From Data Science to Data Engineering +season: 7 episode: 8 guests: - ellenkonig -description: 'Master data engineering, MLOps and pipelines: learn CI/CD, cloud cost - control and SQL/Python skills to switch careers and accelerate growth now.' -intro: In this episode, Ellen König—Head of Engineering at alcemy—shares her journey - from software and data science to data engineering leadership. She explains why - many professionals make the switch, the skills that matter most (from DevOps and - CI/CD to collaboration), and how to prepare through side projects and software fundamentals. -

Ellen also breaks down key tools like Git, Docker, and Airflow, discusses - the realities of cloud costs and team structures, and offers practical advice for - anyone planning a transition into data engineering. -date: 2025-11-07 +image: images/podcast/s07e08-from-data-science-to-data-engineering.jpg ids: anchor: From-Data-Science-to-Data-Engineering---Ellen-Knig-e1fgfbm youtube: 3TTu-hYzxeg -image: images/podcast/s07e08-from-data-science-to-data-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Data-Science-to-Data-Engineering---Ellen-Knig-e1fgfbm apple: https://podcasts.apple.com/us/podcast/from-data-science-to-data-engineering-ellen-k%C3%B6nig/id1541710331?i=1000553736781 spotify: https://open.spotify.com/episode/4R9F5B4f8vf5r5yQEmwYiu youtube: https://www.youtube.com/watch?v=3TTu-hYzxeg -season: 7 -short: From Data Science to Data Engineering -title: 'How to Become a Data Engineer: Skills, MLOps, Pipelines, SQL, CI/CD & Cloud' + +description: 'Master data engineering, MLOps and pipelines: learn CI/CD, cloud cost control and SQL/Python skills to switch careers and accelerate growth now.' +intro: In this episode, Ellen König—Head of Engineering at alcemy—shares her journey from software and data science to data engineering leadership. She explains why many professionals make the switch, the skills that matter most (from DevOps and CI/CD to collaboration), and how to prepare through side projects and software fundamentals.

Ellen also breaks down key tools like Git, Docker, and Airflow, discusses the realities of cloud costs and team structures, and offers practical advice for anyone planning a transition into data engineering topics: - data science - data engineering -- career switch +- career transition +- MLOps +- tools +dateadded: 2022-03-14 +date: 2025-11-07 + +duration: PT00H59M45S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=0 + endOffset: 111 +- name: 'Career Narrative: From Backend Developer to Data Engineering Lead' + startOffset: 111 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=111 + endOffset: 392 +- name: 'Motivation to Switch: Blackbox Models, Code Quality, and Professional Fit' + startOffset: 392 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=392 + endOffset: 581 +- name: 'Role Overlap: Data Science Tasks That Are Data Engineering Work' + startOffset: 581 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=581 + endOffset: 722 +- name: 'Data Intuition: How Data Is Produced, Structured, and Biased' + startOffset: 722 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=722 + endOffset: 835 +- name: 'Transferable Strengths: Pipelines, Stakeholder Communication, Exploration' + startOffset: 835 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=835 + endOffset: 902 +- name: 'Core Upskills: Collaborative Coding, CI/CD and DevOps Practices' + startOffset: 902 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=902 + endOffset: 1054 +- name: 'MLOps vs Research: When Data Scientists Need Production Engineering Skills' + startOffset: 1054 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1054 + endOffset: 1176 +- name: 'Learning Pathways: On-the-Job Mentorship, Bootcamps, and Courses' + startOffset: 1176 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1176 + endOffset: 1285 +- name: 'Experiment First: Side Projects and Small Work Assignments Before Switching' + startOffset: 1285 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1285 + endOffset: 1421 +- name: 'Software Foundations: Take General Dev Courses (Web, Mobile) to Learn Engineering' + startOffset: 1421 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1421 + endOffset: 1580 +- name: 'Essential Course Components: Git, Docker, Testing, CLI, Clean Code' + startOffset: 1580 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1580 + endOffset: 1734 +- name: 'Language Guidance: SQL & Python for Analytics; Java/Scala for Streaming' + startOffset: 1734 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1734 + endOffset: 1963 +- name: 'Market Dynamics: Strong Demand for Data Engineers and Expectation Gaps' + startOffset: 1963 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1963 + endOffset: 2140 +- name: 'Teamwork Shift: Adapting to Pair Programming and Close Collaboration' + startOffset: 2140 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2140 + endOffset: 2300 +- name: 'Organizational Models: Embedded Data Engineers vs Central Platform Teams' + startOffset: 2300 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2300 + endOffset: 2370 +- name: 'Intersection Roles: Analytics Engineer, Data-Science-Engineers, MLOps' + startOffset: 2370 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2370 + endOffset: 2489 +- name: 'Project Recipes: Build Scrapers, ETL Pipelines, Schedulers (Airflow)' + startOffset: 2489 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2489 + endOffset: 2640 +- name: 'Portfolio Example: Domain-Focused Pipelines with Real Data & Automation' + startOffset: 2640 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2640 + endOffset: 2962 +- name: 'Cloud Cost Control: Billing Exploration, Budgets, and Alerting' + startOffset: 2962 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2962 + endOffset: 3166 +- name: 'Entry Strategy: When to Apply for Entry-Level Roles vs Internships' + startOffset: 3166 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3166 + endOffset: 3346 +- name: 'Career Acceleration: Benefits of Consultancies and Large Companies' + startOffset: 3346 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3346 + endOffset: 3516 +- name: 'Cloud Choice: Practical Differences, Local Demand, and Free Tiers' + startOffset: 3516 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3516 + endOffset: 3621 +- name: Closing Remarks & How to Contact Ellen + startOffset: 3621 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3621 + endOffset: 3585 + transcript: - header: Episode Introduction & Guest Overview - header: Episode Introduction & Guest Overview @@ -1731,103 +1829,4 @@ transcript: sec: 3659 time: '1:00:59' who: Ellen -dateadded: '2022-03-14' -duration: PT00H59M45S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=0 - endOffset: 111 -- name: 'Career Narrative: From Backend Developer to Data Engineering Lead' - startOffset: 111 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=111 - endOffset: 392 -- name: 'Motivation to Switch: Blackbox Models, Code Quality, and Professional Fit' - startOffset: 392 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=392 - endOffset: 581 -- name: 'Role Overlap: Data Science Tasks That Are Data Engineering Work' - startOffset: 581 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=581 - endOffset: 722 -- name: 'Data Intuition: How Data Is Produced, Structured, and Biased' - startOffset: 722 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=722 - endOffset: 835 -- name: 'Transferable Strengths: Pipelines, Stakeholder Communication, Exploration' - startOffset: 835 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=835 - endOffset: 902 -- name: 'Core Upskills: Collaborative Coding, CI/CD and DevOps Practices' - startOffset: 902 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=902 - endOffset: 1054 -- name: 'MLOps vs Research: When Data Scientists Need Production Engineering Skills' - startOffset: 1054 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1054 - endOffset: 1176 -- name: 'Learning Pathways: On-the-Job Mentorship, Bootcamps, and Courses' - startOffset: 1176 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1176 - endOffset: 1285 -- name: 'Experiment First: Side Projects and Small Work Assignments Before Switching' - startOffset: 1285 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1285 - endOffset: 1421 -- name: 'Software Foundations: Take General Dev Courses (Web, Mobile) to Learn Engineering' - startOffset: 1421 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1421 - endOffset: 1580 -- name: 'Essential Course Components: Git, Docker, Testing, CLI, Clean Code' - startOffset: 1580 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1580 - endOffset: 1734 -- name: 'Language Guidance: SQL & Python for Analytics; Java/Scala for Streaming' - startOffset: 1734 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1734 - endOffset: 1963 -- name: 'Market Dynamics: Strong Demand for Data Engineers and Expectation Gaps' - startOffset: 1963 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1963 - endOffset: 2140 -- name: 'Teamwork Shift: Adapting to Pair Programming and Close Collaboration' - startOffset: 2140 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2140 - endOffset: 2300 -- name: 'Organizational Models: Embedded Data Engineers vs Central Platform Teams' - startOffset: 2300 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2300 - endOffset: 2370 -- name: 'Intersection Roles: Analytics Engineer, Data-Science-Engineers, MLOps' - startOffset: 2370 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2370 - endOffset: 2489 -- name: 'Project Recipes: Build Scrapers, ETL Pipelines, Schedulers (Airflow)' - startOffset: 2489 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2489 - endOffset: 2640 -- name: 'Portfolio Example: Domain-Focused Pipelines with Real Data & Automation' - startOffset: 2640 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2640 - endOffset: 2962 -- name: 'Cloud Cost Control: Billing Exploration, Budgets, and Alerting' - startOffset: 2962 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2962 - endOffset: 3166 -- name: 'Entry Strategy: When to Apply for Entry-Level Roles vs Internships' - startOffset: 3166 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3166 - endOffset: 3346 -- name: 'Career Acceleration: Benefits of Consultancies and Large Companies' - startOffset: 3346 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3346 - endOffset: 3516 -- name: 'Cloud Choice: Practical Differences, Local Demand, and Free Tiers' - startOffset: 3516 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3516 - endOffset: 3621 -- name: Closing Remarks & How to Contact Ellen - startOffset: 3621 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3621 - endOffset: 3585 --- diff --git a/_podcast/s12e01-from-software-engineer-to-data-science-manager.md b/_podcast/from-software-engineering-to-leading-data-science-teams.md similarity index 97% rename from _podcast/s12e01-from-software-engineer-to-data-science-manager.md rename to _podcast/from-software-engineering-to-leading-data-science-teams.md index 45c6ceff..20086eaf 100644 --- a/_podcast/s12e01-from-software-engineer-to-data-science-manager.md +++ b/_podcast/from-software-engineering-to-leading-data-science-teams.md @@ -1,20 +1,152 @@ --- +title: 'Transitioning from Software Engineer to Data Science Manager: Search, ML & Leadership' +short: From Software Engineer to Data Science Manager +season: 12 episode: 1 guests: - sadatanwar +image: images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg ids: anchor: From-Software-Engineer-to-Data-Science-Manager---Sadat-Anwar-e1rqkdf youtube: xyTfqIWeKf8 -image: images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Software-Engineer-to-Data-Science-Manager---Sadat-Anwar-e1rqkdf apple: https://podcasts.apple.com/us/podcast/from-software-engineer-to-data-science-manager-sadat-anwar/id1541710331?i=1000589637838 spotify: https://open.spotify.com/episode/3vOUwe4WtNQFXHRgTcyMtg?si=87o3XW_EQZ-n68nhAeV8Xw youtube: https://www.youtube.com/watch?v=xyTfqIWeKf8 -season: 12 -short: From Software Engineer to Data Science Manager -title: 'Transitioning from Software Engineer to Data Science Manager: Search, ML & - Leadership' + +description: 'Learn to transition into a Data Science Manager: master search engineering, machine learning and leadership to hire, scale teams and measure business impact.' +intro: How do you move from hands-on software engineering into leading data science teams while staying effective on search and machine learning projects? In this episode Sadat Anwar — a people‑centric Data Science Manager and former software engineer fluent in Java, Scala and Python — maps his path from an electronics and informatics background to research in computer vision at Fraunhofer and production search work at OLX.

We cover practical search engineering topics (Solr autoscaling, decoupling search from a monolith, Kotlin services with Python ML satellites), early ML projects and experimentation strategies (master’s thesis on neural nets, 20% time wins, “act before you think”), and engineering safety nets like feature flags, backups and monitoring. Sadat also walks through the promotion/hiring process, documenting leadership evidence, people management challenges (conflict resolution, hiring, motivation loss when stepping away from code), and transitioning into data science management with NLP, trust & safety and fraud detection responsibilities.

Listen for concrete advice on measuring managerial impact, leveraging EM experience to lead data science teams, and tactical steps for engineers aiming to become data science managers in search and ML domains +topics: +- career transition +- software engineering +- data science +- machine learning +- leadership +- team building +dateadded: 2022-12-10 + +duration: PT01H28S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=0 + endOffset: 65 +- name: 'Episode Overview: From Software Engineer to Data Science Manager' + startOffset: 65 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=65 + endOffset: 97 +- name: 'Early Career & Education: Aspiring Doctor, Electronics Bachelor, Informatics + Master' + startOffset: 97 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=97 + endOffset: 192 +- name: 'Fraunhofer Research Assistant: Computer Vision and Car Dent Detection' + startOffset: 192 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=192 + endOffset: 391 +- name: 'Search Engineering at OLX: First Day Firefighting and Team Onboarding' + startOffset: 391 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=391 + endOffset: 522 +- name: 'Solr Autoscaling: Root Cause, CPU Load, and Scheduled Scaling Fixes' + startOffset: 522 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=522 + endOffset: 637 +- name: 'Decoupling Search from Monolith: Proposal, Implementation, and Experiment + Velocity' + startOffset: 637 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=637 + endOffset: 870 +- name: 'Tech Stack for Search: Kotlin Services, Python Satellites, and ML Integrations' + startOffset: 870 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=870 + endOffset: 913 +- name: 'First ML Project: Master’s Thesis on Neural Networks for Energy Forecasting' + startOffset: 913 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=913 + endOffset: 1138 +- name: '20% Time Success: Spellchecker Attempt, Word2Vec, and Recommendation System + Wins' + startOffset: 1138 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1138 + endOffset: 1247 +- name: 'Learning Approach: "Act Before You Think" and Building Practical ML Experience' + startOffset: 1247 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1247 + endOffset: 1318 +- name: 'Safety Nets for Experimentation: Feature Flags, Backups, Monitoring, Experimentation' + startOffset: 1318 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1318 + endOffset: 1511 +- name: 'Path to Management: Opportunity, Promotion, and Timing' + startOffset: 1511 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1511 + endOffset: 1588 +- name: 'Internal Hiring Process: Panel Interviews, Feedback, and Internal Candidate + Dynamics' + startOffset: 1588 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1588 + endOffset: 1825 +- name: 'People Management Skills: Conflict Resolution, Hiring, and Business Metrics' + startOffset: 1825 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1825 + endOffset: 2026 +- name: 'The Brag List: Documenting Leadership Evidence for Interviews' + startOffset: 2026 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2026 + endOffset: 2176 +- name: 'Transition Pain Points: Dopamine Loss, Dropping Hands‑On Coding, and Withdrawal' + startOffset: 2176 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2176 + endOffset: 2448 +- name: 'Managing Momentum: Project Ownership, Milestones, and Team Coordination' + startOffset: 2448 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2448 + endOffset: 2613 +- name: 'Transition to Data Science Management: Case Study Interview and Motivation' + startOffset: 2613 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2613 + endOffset: 2678 +- name: 'Trust & Safety Work: Chat Moderation, NLP Challenges, and Fraud Detection' + startOffset: 2678 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2678 + endOffset: 2901 +- name: 'Managerial Domain Knowledge: When ML/NLP Expertise Helps vs Coordination + Role' + startOffset: 2901 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2901 + endOffset: 3044 +- name: 'Role Shift: Greater Product Involvement and Stakeholder Influence' + startOffset: 3044 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3044 + endOffset: 3172 +- name: Leveraging EM Experience to Lead Data Science Teams + startOffset: 3172 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3172 + endOffset: 3236 +- name: 'Transitioning from Data Engineering/Analytics to Data Science Manager: Advice' + startOffset: 3236 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3236 + endOffset: 3454 +- name: 'Measuring Managerial Impact: Influence, Business Value, and Team Health Metrics' + startOffset: 3454 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3454 + endOffset: 3516 +- name: 'Recommended Reading: The Manager''s Path and No Rules Rules (Team Culture)' + startOffset: 3516 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3516 + endOffset: 3620 +- name: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It"' + startOffset: 3620 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3620 + endOffset: 3672 +- name: Podcast Closing and Final Remarks + startOffset: 3672 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3672 + endOffset: 3628 + transcript: - header: Podcast Introduction - header: 'Episode Overview: From Software Engineer to Data Science Manager' @@ -1396,144 +1528,6 @@ transcript: sec: 3693 time: '1:01:33' who: Sadat -description: 'Learn to transition into a Data Science Manager: master search engineering, - machine learning and leadership to hire, scale teams and measure business impact.' -intro: How do you move from hands-on software engineering into leading data science - teams while staying effective on search and machine learning projects? In this episode - Sadat Anwar — a people‑centric Data Science Manager and former software engineer - fluent in Java, Scala and Python — maps his path from an electronics and informatics - background to research in computer vision at Fraunhofer and production search work - at OLX.

We cover practical search engineering topics (Solr autoscaling, - decoupling search from a monolith, Kotlin services with Python ML satellites), early - ML projects and experimentation strategies (master’s thesis on neural nets, 20% - time wins, “act before you think”), and engineering safety nets like feature flags, - backups and monitoring. Sadat also walks through the promotion/hiring process, documenting - leadership evidence, people management challenges (conflict resolution, hiring, - motivation loss when stepping away from code), and transitioning into data science - management with NLP, trust & safety and fraud detection responsibilities.

- Listen for concrete advice on measuring managerial impact, leveraging EM experience - to lead data science teams, and tactical steps for engineers aiming to become data - science managers in search and ML domains. -dateadded: '2022-12-10' -duration: PT01H28S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=0 - endOffset: 65 -- name: 'Episode Overview: From Software Engineer to Data Science Manager' - startOffset: 65 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=65 - endOffset: 97 -- name: 'Early Career & Education: Aspiring Doctor, Electronics Bachelor, Informatics - Master' - startOffset: 97 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=97 - endOffset: 192 -- name: 'Fraunhofer Research Assistant: Computer Vision and Car Dent Detection' - startOffset: 192 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=192 - endOffset: 391 -- name: 'Search Engineering at OLX: First Day Firefighting and Team Onboarding' - startOffset: 391 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=391 - endOffset: 522 -- name: 'Solr Autoscaling: Root Cause, CPU Load, and Scheduled Scaling Fixes' - startOffset: 522 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=522 - endOffset: 637 -- name: 'Decoupling Search from Monolith: Proposal, Implementation, and Experiment - Velocity' - startOffset: 637 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=637 - endOffset: 870 -- name: 'Tech Stack for Search: Kotlin Services, Python Satellites, and ML Integrations' - startOffset: 870 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=870 - endOffset: 913 -- name: 'First ML Project: Master’s Thesis on Neural Networks for Energy Forecasting' - startOffset: 913 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=913 - endOffset: 1138 -- name: '20% Time Success: Spellchecker Attempt, Word2Vec, and Recommendation System - Wins' - startOffset: 1138 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1138 - endOffset: 1247 -- name: 'Learning Approach: "Act Before You Think" and Building Practical ML Experience' - startOffset: 1247 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1247 - endOffset: 1318 -- name: 'Safety Nets for Experimentation: Feature Flags, Backups, Monitoring, Experimentation' - startOffset: 1318 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1318 - endOffset: 1511 -- name: 'Path to Management: Opportunity, Promotion, and Timing' - startOffset: 1511 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1511 - endOffset: 1588 -- name: 'Internal Hiring Process: Panel Interviews, Feedback, and Internal Candidate - Dynamics' - startOffset: 1588 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1588 - endOffset: 1825 -- name: 'People Management Skills: Conflict Resolution, Hiring, and Business Metrics' - startOffset: 1825 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1825 - endOffset: 2026 -- name: 'The Brag List: Documenting Leadership Evidence for Interviews' - startOffset: 2026 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2026 - endOffset: 2176 -- name: 'Transition Pain Points: Dopamine Loss, Dropping Hands‑On Coding, and Withdrawal' - startOffset: 2176 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2176 - endOffset: 2448 -- name: 'Managing Momentum: Project Ownership, Milestones, and Team Coordination' - startOffset: 2448 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2448 - endOffset: 2613 -- name: 'Transition to Data Science Management: Case Study Interview and Motivation' - startOffset: 2613 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2613 - endOffset: 2678 -- name: 'Trust & Safety Work: Chat Moderation, NLP Challenges, and Fraud Detection' - startOffset: 2678 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2678 - endOffset: 2901 -- name: 'Managerial Domain Knowledge: When ML/NLP Expertise Helps vs Coordination - Role' - startOffset: 2901 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2901 - endOffset: 3044 -- name: 'Role Shift: Greater Product Involvement and Stakeholder Influence' - startOffset: 3044 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3044 - endOffset: 3172 -- name: Leveraging EM Experience to Lead Data Science Teams - startOffset: 3172 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3172 - endOffset: 3236 -- name: 'Transitioning from Data Engineering/Analytics to Data Science Manager: Advice' - startOffset: 3236 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3236 - endOffset: 3454 -- name: 'Measuring Managerial Impact: Influence, Business Value, and Team Health Metrics' - startOffset: 3454 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3454 - endOffset: 3516 -- name: 'Recommended Reading: The Manager''s Path and No Rules Rules (Team Culture)' - startOffset: 3516 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3516 - endOffset: 3620 -- name: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It"' - startOffset: 3620 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3620 - endOffset: 3672 -- name: Podcast Closing and Final Remarks - startOffset: 3672 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3672 - endOffset: 3628 --- Links: diff --git a/_podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md b/_podcast/from-startup-engineering-to-freelance-data-science.md similarity index 97% rename from _podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md rename to _podcast/from-startup-engineering-to-freelance-data-science.md index dd5ee653..8de130ba 100644 --- a/_podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md +++ b/_podcast/from-startup-engineering-to-freelance-data-science.md @@ -1,29 +1,151 @@ --- +title: 'Freelance Data Scientist Playbook: MLOps, Model Monitoring, Upwork & Startup Skills' +short: Lessons Learned from Freelancing and Working in a Start-up +season: 14 episode: 5 guests: - antonisstellas -date: 2025-11-07 -topics: -- Freelance -- Startups -- Entrepreneurship -- Career Growth -- Remote Work -- Consulting -- Self-Employment +image: images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg ids: anchor: ow/datatalksclub/episodes/Lessons-Learned-from-Freelancing-and-Working-in-a-Start-up---Antonis-Stellas-e25g94r youtube: -Gj7SaI-QW4 -image: images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Lessons-Learned-from-Freelancing-and-Working-in-a-Start-up---Antonis-Stellas-e25g94r apple: https://podcasts.apple.com/us/podcast/lessons-learned-from-freelancing-and-working-in-a/id1541710331?i=1000616311575 spotify: https://open.spotify.com/episode/4ehGduC0p734UtwPr5HANq?si=rEC_XP-4RSKYh0TtSQBtlw youtube: https://www.youtube.com/watch?v=-Gj7SaI-QW4 -season: 14 -short: Lessons Learned from Freelancing and Working in a Start-up -title: 'Freelance Data Scientist Playbook: MLOps, Model Monitoring, Upwork & Startup - Skills' + +description: 'Discover MLOps, model monitoring & Upwork tips to build a freelance data scientist portfolio: pricing, onboarding, tools to land clients including MLflow' +intro: How do you transition from startup engineering to a sustainable freelance data science practice while handling MLOps, model monitoring, and client work on Upwork? In this episode, Antonis Stellas — a freelance data scientist at Nanometrisis with a background in applied mathematics, physics and a professional doctorate working on industry consultancy — lays out a practical playbook.

Antonis walks through startup-honed skills (cross-functional roles, lean build-measure-learn, communication and business know-how), concrete MLOps tooling and patterns (MLflow, Prefect, Grafana), and model monitoring essentials like data drift, concept drift and using Evidently AI. He shares a hands-on course project (semiconductor prediction), streaming examples (YouTube metrics into BigQuery/Looker), Kafka/Confluent emphasis, and an open-source Evidently how-to from Hacktoberfest.

For freelancers, Antonis details starting on Upwork — profile building, proposal iteration, pricing strategy, onboarding workflows, invoicing and balancing startup commitments. Listen to get actionable guidance on building a portfolio, selecting projects, monitoring production models, and practical steps to find and retain clients as a freelance data scientist +topics: +- freelance +- startups +- career growth +- remote work +- MLOps +dateadded: 2023-06-10 +date: 2025-11-07 + +duration: PT00H58M11S + +quotableClips: +- name: 'Podcast Introduction: guest Antonis and episode themes' + startOffset: 0 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=0 + endOffset: 148 +- name: 'Early Education: applied mathematics, physics and nanotechnology' + startOffset: 148 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=148 + endOffset: 230 +- name: 'Professional Doctorate: industry projects and consultancy in the Netherlands' + startOffset: 230 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=230 + endOffset: 335 +- name: 'Nanometrisis Focus: nanoscale inspection for chips, razors and cosmetics' + startOffset: 335 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=335 + endOffset: 499 +- name: 'Career Choice: choosing a startup over a corporation' + startOffset: 499 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=499 + endOffset: 716 +- name: 'Role Variety in Startups: cross-functional responsibilities' + startOffset: 716 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=716 + endOffset: 870 +- name: 'Small-Team Dynamics: working in a four-person startup' + startOffset: 870 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=870 + endOffset: 949 +- name: 'Skills Acquired: communication, business knowledge and self-organization' + startOffset: 949 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=949 + endOffset: 1059 +- name: 'Lean Methodology: build-measure-learn applied to products and ML' + startOffset: 1059 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1059 + endOffset: 1260 +- name: 'Model Monitoring: data drift, concept drift and Evidently AI' + startOffset: 1260 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1260 + endOffset: 1314 +- name: 'Community Onboarding: discovering and joining DataTalks.Club' + startOffset: 1314 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1314 + endOffset: 1512 +- name: 'MLOps Course Project: semiconductor prediction with MLflow, Prefect, Grafana' + startOffset: 1512 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1512 + endOffset: 1603 +- name: 'Course Recommendations: do exercises, be patient, complete final project' + startOffset: 1603 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1603 + endOffset: 1723 +- name: 'Open Source Contribution: creating an Evidently how-to during Hacktoberfest' + startOffset: 1723 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1723 + endOffset: 1833 +- name: 'Starting on Upwork: goals, platform mechanics and client discovery' + startOffset: 1833 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1833 + endOffset: 1917 +- name: 'Project Types on Upwork: ML, analytics, LLMs and variable durations' + startOffset: 1917 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1917 + endOffset: 2059 +- name: 'Profile Building: portfolios, attachments and iterative improvements' + startOffset: 2059 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2059 + endOffset: 2229 +- name: 'Learning from Rejection: refining proposals and specializing skills' + startOffset: 2229 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2229 + endOffset: 2355 +- name: 'Motivation for Freelancing: learning, extra income and persistence' + startOffset: 2355 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2355 + endOffset: 2439 +- name: 'Pricing Approach: hourly rates, client type and valuing your time' + startOffset: 2439 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2439 + endOffset: 2553 +- name: 'Onboarding Workflow: data inspection, milestones and client alignment' + startOffset: 2553 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2553 + endOffset: 2718 +- name: 'Financial Setup: registering as a freelancer and invoicing considerations' + startOffset: 2718 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2718 + endOffset: 2848 +- name: 'Balancing Commitments: wearing many hats across startup and freelance work' + startOffset: 2848 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2848 + endOffset: 2980 +- name: 'Client Acquisition Tips: focus, upskilling and leveraging community resources' + startOffset: 2980 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2980 + endOffset: 3102 +- name: 'Data Engineering Course: streaming emphasis and Kafka/Confluent usage' + startOffset: 3102 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3102 + endOffset: 3217 +- name: 'Example Project: streaming YouTube metrics into BigQuery and Looker' + startOffset: 3217 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3217 + endOffset: 3401 +- name: 'Portfolio Advice: choose projects you enjoy and prioritize exploration' + startOffset: 3401 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3401 + endOffset: 3491 +- name: 'Recommended Reading: The Lean Startup, Lean Analytics, Designing ML Systems' + startOffset: 3491 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3491 + endOffset: 3583 +- name: Closing Remarks and invitation to the community + startOffset: 3583 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3583 + endOffset: 3491 + transcript: - header: 'Podcast Introduction: guest Antonis and episode themes' - header: 'Podcast Introduction: guest Antonis and episode themes' @@ -1026,143 +1148,6 @@ transcript: sec: 3583 time: '59:43' who: Alexey -intro: How do you transition from startup engineering to a sustainable freelance data - science practice while handling MLOps, model monitoring, and client work on Upwork? - In this episode, Antonis Stellas — a freelance data scientist at Nanometrisis with - a background in applied mathematics, physics and a professional doctorate working - on industry consultancy — lays out a practical playbook.

Antonis walks - through startup-honed skills (cross-functional roles, lean build-measure-learn, - communication and business know-how), concrete MLOps tooling and patterns (MLflow, - Prefect, Grafana), and model monitoring essentials like data drift, concept drift - and using Evidently AI. He shares a hands-on course project (semiconductor prediction), - streaming examples (YouTube metrics into BigQuery/Looker), Kafka/Confluent emphasis, - and an open-source Evidently how-to from Hacktoberfest.

For freelancers, - Antonis details starting on Upwork — profile building, proposal iteration, pricing - strategy, onboarding workflows, invoicing and balancing startup commitments. Listen - to get actionable guidance on building a portfolio, selecting projects, monitoring - production models, and practical steps to find and retain clients as a freelance - data scientist. -description: 'Discover MLOps, model monitoring & Upwork tips to build a freelance - data scientist portfolio: pricing, onboarding, tools to land clients including MLflow' -dateadded: '2023-06-10' -duration: PT00H58M11S -quotableClips: -- name: 'Podcast Introduction: guest Antonis and episode themes' - startOffset: 0 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=0 - endOffset: 148 -- name: 'Early Education: applied mathematics, physics and nanotechnology' - startOffset: 148 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=148 - endOffset: 230 -- name: 'Professional Doctorate: industry projects and consultancy in the Netherlands' - startOffset: 230 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=230 - endOffset: 335 -- name: 'Nanometrisis Focus: nanoscale inspection for chips, razors and cosmetics' - startOffset: 335 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=335 - endOffset: 499 -- name: 'Career Choice: choosing a startup over a corporation' - startOffset: 499 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=499 - endOffset: 716 -- name: 'Role Variety in Startups: cross-functional responsibilities' - startOffset: 716 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=716 - endOffset: 870 -- name: 'Small-Team Dynamics: working in a four-person startup' - startOffset: 870 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=870 - endOffset: 949 -- name: 'Skills Acquired: communication, business knowledge and self-organization' - startOffset: 949 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=949 - endOffset: 1059 -- name: 'Lean Methodology: build-measure-learn applied to products and ML' - startOffset: 1059 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1059 - endOffset: 1260 -- name: 'Model Monitoring: data drift, concept drift and Evidently AI' - startOffset: 1260 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1260 - endOffset: 1314 -- name: 'Community Onboarding: discovering and joining DataTalks.Club' - startOffset: 1314 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1314 - endOffset: 1512 -- name: 'MLOps Course Project: semiconductor prediction with MLflow, Prefect, Grafana' - startOffset: 1512 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1512 - endOffset: 1603 -- name: 'Course Recommendations: do exercises, be patient, complete final project' - startOffset: 1603 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1603 - endOffset: 1723 -- name: 'Open Source Contribution: creating an Evidently how-to during Hacktoberfest' - startOffset: 1723 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1723 - endOffset: 1833 -- name: 'Starting on Upwork: goals, platform mechanics and client discovery' - startOffset: 1833 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1833 - endOffset: 1917 -- name: 'Project Types on Upwork: ML, analytics, LLMs and variable durations' - startOffset: 1917 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1917 - endOffset: 2059 -- name: 'Profile Building: portfolios, attachments and iterative improvements' - startOffset: 2059 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2059 - endOffset: 2229 -- name: 'Learning from Rejection: refining proposals and specializing skills' - startOffset: 2229 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2229 - endOffset: 2355 -- name: 'Motivation for Freelancing: learning, extra income and persistence' - startOffset: 2355 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2355 - endOffset: 2439 -- name: 'Pricing Approach: hourly rates, client type and valuing your time' - startOffset: 2439 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2439 - endOffset: 2553 -- name: 'Onboarding Workflow: data inspection, milestones and client alignment' - startOffset: 2553 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2553 - endOffset: 2718 -- name: 'Financial Setup: registering as a freelancer and invoicing considerations' - startOffset: 2718 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2718 - endOffset: 2848 -- name: 'Balancing Commitments: wearing many hats across startup and freelance work' - startOffset: 2848 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2848 - endOffset: 2980 -- name: 'Client Acquisition Tips: focus, upskilling and leveraging community resources' - startOffset: 2980 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2980 - endOffset: 3102 -- name: 'Data Engineering Course: streaming emphasis and Kafka/Confluent usage' - startOffset: 3102 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3102 - endOffset: 3217 -- name: 'Example Project: streaming YouTube metrics into BigQuery and Looker' - startOffset: 3217 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3217 - endOffset: 3401 -- name: 'Portfolio Advice: choose projects you enjoy and prioritize exploration' - startOffset: 3401 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3401 - endOffset: 3491 -- name: 'Recommended Reading: The Lean Startup, Lean Analytics, Designing ML Systems' - startOffset: 3491 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3491 - endOffset: 3583 -- name: Closing Remarks and invitation to the community - startOffset: 3583 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3583 - endOffset: 3491 --- Links: diff --git a/_podcast/s08e09-from-academia-to-data-analytics-and-engineering.md b/_podcast/get-data-analytics-and-data-engineering-job.md similarity index 98% rename from _podcast/s08e09-from-academia-to-data-analytics-and-engineering.md rename to _podcast/get-data-analytics-and-data-engineering-job.md index bc01a49f..e9412593 100644 --- a/_podcast/s08e09-from-academia-to-data-analytics-and-engineering.md +++ b/_podcast/get-data-analytics-and-data-engineering-job.md @@ -1,40 +1,120 @@ --- +title: 'How I Landed a Data Engineering Job: Bootcamp, Docker, Airflow, AWS & Interview Tips' +short: From Academia to Data Analytics and Engineering +season: 8 episode: 9 guests: - gloriaquiceno -intro: How do you go from neuroscience research to a data engineering role — and what - practical steps and skills actually get you hired? In this episode, Gloria Quiceno, - Senior Analytics Engineer at ICE, walks through her transition from neuroscience - labs to rebuilding enterprise data platforms (including a BI rebuild that saved - €250K), and the concrete tools and tactics that landed her a data engineering job. - We cover her early lab automation and scripting experience, learning MATLAB/R and - Python, a first industry role as a business data analyst, and a four-month job-search - timeline from bootcamp graduation to offer. Gloria explains building reproducible - pipelines with Docker, orchestrating workflows with Airflow and AWS (including Step - Functions), designing ETL/ELT and Snowflake integrations, capstone projects like - a Twitter data pipeline, and volunteer ML practice with Omdena. She also shares - application strategy (tracking ~130 applications), handling live coding and take-home - interviews, GDPR recruitment rights, salary negotiation, and portfolio advice to - stand out. Listen to get actionable interview tips, portfolio project ideas, and - hands-on guidance for landing a data engineering job using bootcamps, Docker, Airflow, - and AWS. +image: images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg ids: anchor: From-Academia-to-Data-Analytics-and-Engineering---Gloria-Quiceno-e1ikrd8 youtube: 0wANfIvum4U -image: images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Academia-to-Data-Analytics-and-Engineering---Gloria-Quiceno-e1ikrd8 apple: https://podcasts.apple.com/us/podcast/from-academia-to-data-analytics-and-engineering/id1541710331?i=1000562898040 spotify: https://open.spotify.com/episode/1kDpXugcmDdVJ6qUAiNnHQ?si=aa62cc4dce5f41b2 youtube: https://www.youtube.com/watch?v=0wANfIvum4U -season: 8 -short: From Academia to Data Analytics and Engineering -title: 'How I Landed a Data Engineering Job: Bootcamp, Docker, Airflow, AWS & Interview - Tips' + +description: Learn data engineering with Docker and Airflow—bootcamp ROI, interview tips, portfolio tips and salary tactics to land a data engineering role +intro: How do you go from neuroscience research to a data engineering role — and what practical steps and skills actually get you hired? In this episode, Gloria Quiceno, Senior Analytics Engineer at ICE, walks through her transition from neuroscience labs to rebuilding enterprise data platforms (including a BI rebuild that saved €250K), and the concrete tools and tactics that landed her a data engineering job. We cover her early lab automation and scripting experience, learning MATLAB/R and Python, a first industry role as a business data analyst, and a four-month job-search timeline from bootcamp graduation to offer. Gloria explains building reproducible pipelines with Docker, orchestrating workflows with Airflow and AWS (including Step Functions), designing ETL/ELT and Snowflake integrations, capstone projects like a Twitter data pipeline, and volunteer ML practice with Omdena. She also shares application strategy (tracking ~130 applications), handling live coding and take-home interviews, GDPR recruitment rights, salary negotiation, and portfolio advice to stand out. Listen to get actionable interview tips, portfolio project ideas, and hands-on guidance for landing a data engineering job using bootcamps, Docker, Airflow, and AWS topics: - career switch - data engineering - career growth +dateadded: 2022-05-21 + +duration: PT00H57M59S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=0 + endOffset: 118 +- name: 'Background: Transition from Neuroscience Research to Industry' + startOffset: 118 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=118 + endOffset: 161 +- name: 'Lab Automation & Scripting: Igor, C-style Code and Data Collection' + startOffset: 161 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=161 + endOffset: 393 +- name: 'Learning Curve: MATLAB, R and Falling in Love with Programming' + startOffset: 393 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=393 + endOffset: 466 +- name: 'First Industry Role: Business Data Analyst Duties and SQL Reporting' + startOffset: 466 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=466 + endOffset: 713 +- name: 'Company Overview: Music Metadata, Royalties and Data Workflows' + startOffset: 713 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=713 + endOffset: 974 +- name: 'Job Search Timeline: Bootcamp Graduation to Offer in Four Months' + startOffset: 974 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=974 + endOffset: 1101 +- name: 'Volunteer Experience: Omdena Project for Practical ML Practice' + startOffset: 1101 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1101 + endOffset: 1192 +- name: 'Role Focus: Choosing Data Engineering Tasks and Automation Work' + startOffset: 1192 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1192 + endOffset: 1285 +- name: 'Reproducibility: Docker for Collaborative Scripts and AWS Runs' + startOffset: 1285 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1285 + endOffset: 1377 +- name: 'Application Strategy: Tracking ~130 Applications and Organization' + startOffset: 1377 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1377 + endOffset: 1675 +- name: 'Interview Hurdles: Live Coding Pressure and Take-Home Challenges' + startOffset: 1675 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1675 + endOffset: 1819 +- name: 'Applicant Rights: GDPR and Deleting Recruitment Records' + startOffset: 1819 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1819 + endOffset: 1889 +- name: 'Salary Negotiation: Market Research, Confidence and Tactics' + startOffset: 1889 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1889 + endOffset: 2180 +- name: 'Bootcamp ROI: Learning Python, Docker, Airflow and Networking' + startOffset: 2180 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2180 + endOffset: 2245 +- name: 'Retrospective Tips: Earlier Career Coaching and More Networking' + startOffset: 2245 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2245 + endOffset: 2729 +- name: 'Cloud Platforms: AWS vs GCP Experience and Managing Credits' + startOffset: 2729 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2729 + endOffset: 3015 +- name: 'Capstone Project: Twitter Data Pipeline, Docker Containers and Slack Bot' + startOffset: 3015 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3015 + endOffset: 3102 +- name: 'Portfolio Strategy: Custom Projects to Stand Out to Employers' + startOffset: 3102 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3102 + endOffset: 3214 +- name: 'Data Quality: Detecting Bots, Cleaning Twitter Data and Sentiment Bias' + startOffset: 3214 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3214 + endOffset: 3408 +- name: 'Cohort Diversity: Varied Backgrounds and Cross-disciplinary Strengths' + startOffset: 3408 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3408 + endOffset: 3517 +- name: 'Closing Thoughts: Practical Advice for Transitioning to Data Engineering' + startOffset: 3517 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3517 + endOffset: 3479 + transcript: - header: Podcast Introduction - header: 'Background: Transition from Neuroscience Research to Industry' @@ -1289,97 +1369,4 @@ transcript: sec: 3597 time: '59:57' who: Alexey -description: Learn data engineering with Docker and Airflow—bootcamp ROI, interview - tips, portfolio tips and salary tactics to land a data engineering role. -dateadded: '2022-05-21' -duration: PT00H57M59S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=0 - endOffset: 118 -- name: 'Background: Transition from Neuroscience Research to Industry' - startOffset: 118 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=118 - endOffset: 161 -- name: 'Lab Automation & Scripting: Igor, C-style Code and Data Collection' - startOffset: 161 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=161 - endOffset: 393 -- name: 'Learning Curve: MATLAB, R and Falling in Love with Programming' - startOffset: 393 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=393 - endOffset: 466 -- name: 'First Industry Role: Business Data Analyst Duties and SQL Reporting' - startOffset: 466 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=466 - endOffset: 713 -- name: 'Company Overview: Music Metadata, Royalties and Data Workflows' - startOffset: 713 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=713 - endOffset: 974 -- name: 'Job Search Timeline: Bootcamp Graduation to Offer in Four Months' - startOffset: 974 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=974 - endOffset: 1101 -- name: 'Volunteer Experience: Omdena Project for Practical ML Practice' - startOffset: 1101 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1101 - endOffset: 1192 -- name: 'Role Focus: Choosing Data Engineering Tasks and Automation Work' - startOffset: 1192 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1192 - endOffset: 1285 -- name: 'Reproducibility: Docker for Collaborative Scripts and AWS Runs' - startOffset: 1285 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1285 - endOffset: 1377 -- name: 'Application Strategy: Tracking ~130 Applications and Organization' - startOffset: 1377 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1377 - endOffset: 1675 -- name: 'Interview Hurdles: Live Coding Pressure and Take-Home Challenges' - startOffset: 1675 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1675 - endOffset: 1819 -- name: 'Applicant Rights: GDPR and Deleting Recruitment Records' - startOffset: 1819 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1819 - endOffset: 1889 -- name: 'Salary Negotiation: Market Research, Confidence and Tactics' - startOffset: 1889 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1889 - endOffset: 2180 -- name: 'Bootcamp ROI: Learning Python, Docker, Airflow and Networking' - startOffset: 2180 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2180 - endOffset: 2245 -- name: 'Retrospective Tips: Earlier Career Coaching and More Networking' - startOffset: 2245 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2245 - endOffset: 2729 -- name: 'Cloud Platforms: AWS vs GCP Experience and Managing Credits' - startOffset: 2729 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2729 - endOffset: 3015 -- name: 'Capstone Project: Twitter Data Pipeline, Docker Containers and Slack Bot' - startOffset: 3015 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3015 - endOffset: 3102 -- name: 'Portfolio Strategy: Custom Projects to Stand Out to Employers' - startOffset: 3102 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3102 - endOffset: 3214 -- name: 'Data Quality: Detecting Bots, Cleaning Twitter Data and Sentiment Bias' - startOffset: 3214 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3214 - endOffset: 3408 -- name: 'Cohort Diversity: Varied Backgrounds and Cross-disciplinary Strengths' - startOffset: 3408 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3408 - endOffset: 3517 -- name: 'Closing Thoughts: Practical Advice for Transitioning to Data Engineering' - startOffset: 3517 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3517 - endOffset: 3479 --- diff --git a/_podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md b/_podcast/get-data-engineering-job-prep-and-interview.md similarity index 96% rename from _podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md rename to _podcast/get-data-engineering-job-prep-and-interview.md index f40962c6..08dee9ca 100644 --- a/_podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md +++ b/_podcast/get-data-engineering-job-prep-and-interview.md @@ -1,20 +1,141 @@ --- +title: 'Data Engineering Job Prep & Interview Guide: Python, SQL, Portfolio & Job Search Tips' +short: Getting a Data Engineering Job (Summary and Q&A) +season: 9 episode: 3 guests: - jeffkatz -date: 2025-11-07 +image: images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg ids: anchor: Getting-a-Data-Engineering-Job-Summary-and-QA---Jeff-Katz-e1jljmd youtube: asnt7xlyZXQ -image: images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Getting-a-Data-Engineering-Job-Summary-and-QA---Jeff-Katz-e1jljmd apple: https://podcasts.apple.com/us/podcast/getting-a-data-engineering-job-summary-and-q-a-jeff-katz/id1541710331?i=1000566005592 spotify: https://open.spotify.com/episode/1SaZ0QXAIhcdH1gfaNoN4Z?si=OvvNFdTpSu2MCCDOWdYgJQ youtube: https://www.youtube.com/watch?v=asnt7xlyZXQ -season: 9 -short: Getting a Data Engineering Job (Summary and Q&A) -title: Getting a Data Engineering Job (Summary and Q&A) + +description: 'Master data engineering job prep: Python, SQL tips, portfolio strategy, interview formats and job search tactics to land offers faster. quick wins' +intro: 'How do you actually get a data engineering job today — and which skills hiring teams care about most? In this episode, Jeff Katz, a Machine Learning Engineer at AppFolio and longtime instructor/founder of Jigsaw Labs and Flatiron School curriculum lead, distills a webinar on hiring demand into practical advice for job seekers. Drawing on applied AI and data engineering experience plus open-source contributions, Jeff walks through the core data engineering skills employers expect: deep Python and SQL, Docker, Airflow, and data warehouse fundamentals.

You’ll hear concrete guidance on portfolio strategy (personal projects and open source), code quality and OOP patterns, the application funnel (LinkedIn → resume → interviews), behavioral and technical interview formats (SQL LeetCode, Python problems, take-home projects), and essential database concepts (views, OLTP vs OLAP). The episode also covers learning resources, transitioning from BI, certification vs skills trade-offs, remote work realities, and how to leverage non-coding experience. Listen to learn a practical roadmap for interviews, portfolio building, and job search tactics to increase your chances of landing a data engineering role.' +topics: +- data engineering +- job search +- tools +dateadded: 2022-06-10 +date: 2025-11-07 + +duration: PT00H48M02S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=0 + endOffset: 36 +- name: 'Webinar Recap: Hiring Demand and Skill Gaps' + startOffset: 36 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=36 + endOffset: 80 +- name: 'Core Skills & Tools: Python, SQL, Docker, Airflow, Data Warehouses' + startOffset: 80 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=80 + endOffset: 109 +- name: 'Python & SQL Depth: Project Volume and Emphasis' + startOffset: 109 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=109 + endOffset: 142 +- name: 'Code Quality & OOP: Small Functions, Classes, Tests' + startOffset: 142 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=142 + endOffset: 166 +- name: 'Portfolio Strategy: Personal Projects and Open Source Contributions' + startOffset: 166 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=166 + endOffset: 218 +- name: 'Application Funnel: LinkedIn, Resume, and Interview Stages' + startOffset: 218 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=218 + endOffset: 315 +- name: 'Behavioral Interview Best Practices: Positivity, Structure, Motivation' + startOffset: 315 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=315 + endOffset: 466 +- name: 'Technical Interview Formats: SQL LeetCode, Python Problems, Take-Home Projects' + startOffset: 466 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=466 + endOffset: 581 +- name: 'Core Database Concepts: Views, Materialized Views, OLTP vs OLAP' + startOffset: 581 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=581 + endOffset: 684 +- name: 'Learning Resources: Python Books, Flask Mega-Tutorial, SQL Platforms' + startOffset: 684 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=684 + endOffset: 851 +- name: 'BI to Data Engineering Transition: Upskilling Within Your Role' + startOffset: 851 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=851 + endOffset: 953 +- name: 'Job Search Strategy: Apply Broadly and Avoid Self-Filtering' + startOffset: 953 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=953 + endOffset: 1008 +- name: Leveraging Non-Coding Experience and Domain Expertise + startOffset: 1008 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1008 + endOffset: 1197 +- name: 'Role Differentiation: Data Analyst vs Data Engineer' + startOffset: 1197 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1197 + endOffset: 1316 +- name: 'Certifications vs Skills: When Certificates Help and When They Don’t' + startOffset: 1316 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1316 + endOffset: 1393 +- name: 'Master’s Degree Trade-offs: Research Depth vs Applied Learning' + startOffset: 1393 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1393 + endOffset: 1666 +- name: 'Remote Work Reality: Timezones, Legal Constraints, and Standout Candidates' + startOffset: 1666 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1666 + endOffset: 1806 +- name: 'Teaching & Coaching on Resume: Communication and Mentorship Value' + startOffset: 1806 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1806 + endOffset: 1942 +- name: 'OOP Relevance: Patterns for Airflow and Maintainable Code' + startOffset: 1942 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1942 + endOffset: 1983 +- name: 'Language Choices: Python Focus; Java/Scala and Spark Considerations' + startOffset: 1983 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1983 + endOffset: 2109 +- name: 'Interview Load: Typical Number and Style of Technical Questions' + startOffset: 2109 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2109 + endOffset: 2269 +- name: 'Cloud Certification Prep: Learning Fundamentals vs Credential Hunting' + startOffset: 2269 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2269 + endOffset: 2389 +- name: 'Commercial Experience Alternatives: Nonprofits, Contract Work, Internships' + startOffset: 2389 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2389 + endOffset: 2611 +- name: 'Mid-Career Switch: Sales Skills as an Asset in Tech Hiring' + startOffset: 2611 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2611 + endOffset: 2776 +- name: 'Solution Engineer Pathway: Pre-/Post-Sales Roles as Transition Options' + startOffset: 2776 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2776 + endOffset: 2846 +- name: Episode Wrap-Up and Further Resources + startOffset: 2846 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2846 + endOffset: 2882 + transcript: - header: Podcast Introduction - line: We had an amazing webinar about getting a data engineering job. It was a couple @@ -763,134 +884,6 @@ transcript: sec: 2882 time: '48:02' who: Jeff -intro: 'How do you actually get a data engineering job today — and which skills hiring - teams care about most? In this episode, Jeff Katz, a Machine Learning Engineer at - AppFolio and longtime instructor/founder of Jigsaw Labs and Flatiron School curriculum - lead, distills a webinar on hiring demand into practical advice for job seekers. - Drawing on applied AI and data engineering experience plus open-source contributions, - Jeff walks through the core data engineering skills employers expect: deep Python - and SQL, Docker, Airflow, and data warehouse fundamentals.

You’ll hear - concrete guidance on portfolio strategy (personal projects and open source), code - quality and OOP patterns, the application funnel (LinkedIn → resume → interviews), - behavioral and technical interview formats (SQL LeetCode, Python problems, take-home - projects), and essential database concepts (views, OLTP vs OLAP). The episode also - covers learning resources, transitioning from BI, certification vs skills trade-offs, - remote work realities, and how to leverage non-coding experience. Listen to learn - a practical roadmap for interviews, portfolio building, and job search tactics to - increase your chances of landing a data engineering role.' -description: 'Master data engineering job prep: Python, SQL tips, portfolio strategy, - interview formats and job search tactics to land offers faster. quick wins' -dateadded: '2022-06-10' -duration: PT00H48M02S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=0 - endOffset: 36 -- name: 'Webinar Recap: Hiring Demand and Skill Gaps' - startOffset: 36 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=36 - endOffset: 80 -- name: 'Core Skills & Tools: Python, SQL, Docker, Airflow, Data Warehouses' - startOffset: 80 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=80 - endOffset: 109 -- name: 'Python & SQL Depth: Project Volume and Emphasis' - startOffset: 109 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=109 - endOffset: 142 -- name: 'Code Quality & OOP: Small Functions, Classes, Tests' - startOffset: 142 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=142 - endOffset: 166 -- name: 'Portfolio Strategy: Personal Projects and Open Source Contributions' - startOffset: 166 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=166 - endOffset: 218 -- name: 'Application Funnel: LinkedIn, Resume, and Interview Stages' - startOffset: 218 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=218 - endOffset: 315 -- name: 'Behavioral Interview Best Practices: Positivity, Structure, Motivation' - startOffset: 315 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=315 - endOffset: 466 -- name: 'Technical Interview Formats: SQL LeetCode, Python Problems, Take-Home Projects' - startOffset: 466 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=466 - endOffset: 581 -- name: 'Core Database Concepts: Views, Materialized Views, OLTP vs OLAP' - startOffset: 581 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=581 - endOffset: 684 -- name: 'Learning Resources: Python Books, Flask Mega-Tutorial, SQL Platforms' - startOffset: 684 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=684 - endOffset: 851 -- name: 'BI to Data Engineering Transition: Upskilling Within Your Role' - startOffset: 851 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=851 - endOffset: 953 -- name: 'Job Search Strategy: Apply Broadly and Avoid Self-Filtering' - startOffset: 953 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=953 - endOffset: 1008 -- name: Leveraging Non-Coding Experience and Domain Expertise - startOffset: 1008 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1008 - endOffset: 1197 -- name: 'Role Differentiation: Data Analyst vs Data Engineer' - startOffset: 1197 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1197 - endOffset: 1316 -- name: 'Certifications vs Skills: When Certificates Help and When They Don’t' - startOffset: 1316 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1316 - endOffset: 1393 -- name: 'Master’s Degree Trade-offs: Research Depth vs Applied Learning' - startOffset: 1393 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1393 - endOffset: 1666 -- name: 'Remote Work Reality: Timezones, Legal Constraints, and Standout Candidates' - startOffset: 1666 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1666 - endOffset: 1806 -- name: 'Teaching & Coaching on Resume: Communication and Mentorship Value' - startOffset: 1806 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1806 - endOffset: 1942 -- name: 'OOP Relevance: Patterns for Airflow and Maintainable Code' - startOffset: 1942 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1942 - endOffset: 1983 -- name: 'Language Choices: Python Focus; Java/Scala and Spark Considerations' - startOffset: 1983 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1983 - endOffset: 2109 -- name: 'Interview Load: Typical Number and Style of Technical Questions' - startOffset: 2109 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2109 - endOffset: 2269 -- name: 'Cloud Certification Prep: Learning Fundamentals vs Credential Hunting' - startOffset: 2269 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2269 - endOffset: 2389 -- name: 'Commercial Experience Alternatives: Nonprofits, Contract Work, Internships' - startOffset: 2389 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2389 - endOffset: 2611 -- name: 'Mid-Career Switch: Sales Skills as an Asset in Tech Hiring' - startOffset: 2611 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2611 - endOffset: 2776 -- name: 'Solution Engineer Pathway: Pre-/Post-Sales Roles as Transition Options' - startOffset: 2776 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2776 - endOffset: 2846 -- name: Episode Wrap-Up and Further Resources - startOffset: 2846 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2846 - endOffset: 2882 --- Links: diff --git a/_podcast/s01e04-standing-out-as-a-data-scientist.md b/_podcast/get-data-scientist-job.md similarity index 97% rename from _podcast/s01e04-standing-out-as-a-data-scientist.md rename to _podcast/get-data-scientist-job.md index c26b4570..362053ce 100644 --- a/_podcast/s01e04-standing-out-as-a-data-scientist.md +++ b/_podcast/get-data-scientist-job.md @@ -1,11 +1,11 @@ --- title: 'Land Data Scientist Roles: Resumes, Portfolios, Interviews & Recruiter Workflow' short: Standing out as a Data Scientist +season: 1 +episode: 4 guests: - lukewhipps image: images/podcast/s01e04-standing-out-as-a-data-scientist.jpg -season: 1 -episode: 4 ids: youtube: Sb4CJlonB3c anchor: Standing-out-as-a-Data-Scientist---Luke-Whipps-envr7e @@ -14,6 +14,135 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Standing-out-as-a-Data-Scientist---Luke-Whipps-envr7e spotify: https://open.spotify.com/episode/2Yxay9HJmd6dvk34MHJ0K2 apple: https://podcasts.apple.com/us/podcast/standing-out-as-a-data-scientist-luke-whipps/id1541710331?i=1000502844994 + +description: Master data scientist resumes, portfolios & interviews—insider recruiter workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster +intro: How do you actually land a data scientist role — from a resume that passes screening to a portfolio that wins interviews and an offer that closes? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast with 8+ years recruiting experience, walks through the recruiter workflow and practical steps data scientists can use to improve hiring outcomes.

We cover Luke’s six‑stage recruitment process (role definition to close), how to define data scientist roles across companies, and recruiter expectations for CV design, information hierarchy, and industry/use‑case alignment. Learn how to structure portfolios to link tech stack to concrete projects, craft a clear career narrative that demonstrates business impact, and prepare for interviews and negotiations. Junior candidates will get guidance on choosing an industry and showing purpose; academics learn how to productize research for industry. You’ll also hear tactical advice on tailored applications, LinkedIn outreach, candidate funnel sizes, salary signals, job‑title alignment, and acceptable tenure patterns.

Listen to gain actionable tips for resumes, portfolios, interviews, and working effectively with recruiters to increase your chances of landing a data scientist role +topics: +- data science +- career growth +- job search +dateadded: 2021-02-23 + +duration: PT01H08M47S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=0 + endOffset: 99 +- name: 'Guest Introduction: Luke Whipps, recruiter and podcast host' + startOffset: 99 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=99 + endOffset: 177 +- name: 'Recruiting background: a decade in data, analytics and AI' + startOffset: 177 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=177 + endOffset: 223 +- name: 'Neural AI origin: founding principles and non‑transactional recruiting' + startOffset: 223 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=223 + endOffset: 316 +- name: 'Community focus: podcasts, events and value‑driven talent work' + startOffset: 316 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=316 + endOffset: 422 +- name: 'Hiring challenges: why data scientist roles vary by company' + startOffset: 422 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=422 + endOffset: 455 +- name: 'Recruitment workflow: six‑stage process from definition to close' + startOffset: 455 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=455 + endOffset: 495 +- name: Role definition & market guidance for data science hires + startOffset: 495 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=495 + endOffset: 554 +- name: Shortlist, interview preparation, feedback and offer negotiation + startOffset: 554 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=554 + endOffset: 683 +- name: 'Candidate funnel sizes: longlists, headhunting and volume hiring' + startOffset: 683 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=683 + endOffset: 847 +- name: 'First impressions: CV design, formatting and professional clarity' + startOffset: 847 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=847 + endOffset: 975 +- name: Industry and use‑case alignment on resumes for better matches + startOffset: 975 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=975 + endOffset: 1190 +- name: 'Projects & portfolio: linking tech stack to concrete work' + startOffset: 1190 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1190 + endOffset: 1328 +- name: 'Career narrative: tenure, common themes and progression' + startOffset: 1328 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1328 + endOffset: 1504 +- name: Demonstrating business impact and real world use cases + startOffset: 1504 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1504 + endOffset: 1639 +- name: 'CV structure: clarity, audience fit and information hierarchy' + startOffset: 1639 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1639 + endOffset: 1810 +- name: 'Job‑hopping: red flags, ideal tenure and acceptable exceptions' + startOffset: 1810 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1810 + endOffset: 1942 +- name: 'Junior candidates: pick an industry, aim small and show purpose' + startOffset: 1942 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1942 + endOffset: 2274 +- name: 'Tailored applications: research job needs and map your skills' + startOffset: 2274 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2274 + endOffset: 2381 +- name: 'Targeted outreach tactics: emails, LinkedIn and creative approaches' + startOffset: 2381 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2381 + endOffset: 2666 +- name: 'Focus strategy: approach fewer companies and segment your market' + startOffset: 2666 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2666 + endOffset: 2785 +- name: 'Academia → industry: adopt a product mindset and productionize research' + startOffset: 2785 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2785 + endOffset: 3039 +- name: 'Motivation vs money: career focus, progression and tradeoffs' + startOffset: 3039 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3039 + endOffset: 3142 +- name: 'Salary signals: asking salary, market alignment and recruiter views' + startOffset: 3142 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3142 + endOffset: 3407 +- name: 'CV formats & length: country differences and the two‑page guideline' + startOffset: 3407 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3407 + endOffset: 3531 +- name: 'Job title alignment: adapt titles to industry norms without lying' + startOffset: 3531 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3531 + endOffset: 3615 +- name: 'Switching backgrounds: web development to machine learning skills' + startOffset: 3615 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3615 + endOffset: 3727 +- name: 'Disclosing other interviews: transparency, trust and recruiter differences' + startOffset: 3727 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3727 + endOffset: 4057 +- name: 'Episode summary: purpose‑driven candidates and standing out as a data scientist' + startOffset: 4057 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=4057 + endOffset: 4127 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Luke Whipps, recruiter and podcast host' @@ -1078,141 +1207,4 @@ transcript: sec: 4226 time: '1:10:26' who: Alexey -description: Master data scientist resumes, portfolios & interviews—insider recruiter - workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster. -intro: How do you actually land a data scientist role — from a resume that passes - screening to a portfolio that wins interviews and an offer that closes? In this - episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast - with 8+ years recruiting experience, walks through the recruiter workflow and practical - steps data scientists can use to improve hiring outcomes.

We cover Luke’s - six‑stage recruitment process (role definition to close), how to define data scientist - roles across companies, and recruiter expectations for CV design, information hierarchy, - and industry/use‑case alignment. Learn how to structure portfolios to link tech - stack to concrete projects, craft a clear career narrative that demonstrates business - impact, and prepare for interviews and negotiations. Junior candidates will get - guidance on choosing an industry and showing purpose; academics learn how to productize - research for industry. You’ll also hear tactical advice on tailored applications, - LinkedIn outreach, candidate funnel sizes, salary signals, job‑title alignment, - and acceptable tenure patterns.

Listen to gain actionable tips for resumes, - portfolios, interviews, and working effectively with recruiters to increase your - chances of landing a data scientist role. -dateadded: '2021-02-23' -duration: PT01H08M47S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=0 - endOffset: 99 -- name: 'Guest Introduction: Luke Whipps, recruiter and podcast host' - startOffset: 99 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=99 - endOffset: 177 -- name: 'Recruiting background: a decade in data, analytics and AI' - startOffset: 177 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=177 - endOffset: 223 -- name: 'Neural AI origin: founding principles and non‑transactional recruiting' - startOffset: 223 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=223 - endOffset: 316 -- name: 'Community focus: podcasts, events and value‑driven talent work' - startOffset: 316 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=316 - endOffset: 422 -- name: 'Hiring challenges: why data scientist roles vary by company' - startOffset: 422 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=422 - endOffset: 455 -- name: 'Recruitment workflow: six‑stage process from definition to close' - startOffset: 455 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=455 - endOffset: 495 -- name: Role definition & market guidance for data science hires - startOffset: 495 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=495 - endOffset: 554 -- name: Shortlist, interview preparation, feedback and offer negotiation - startOffset: 554 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=554 - endOffset: 683 -- name: 'Candidate funnel sizes: longlists, headhunting and volume hiring' - startOffset: 683 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=683 - endOffset: 847 -- name: 'First impressions: CV design, formatting and professional clarity' - startOffset: 847 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=847 - endOffset: 975 -- name: Industry and use‑case alignment on resumes for better matches - startOffset: 975 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=975 - endOffset: 1190 -- name: 'Projects & portfolio: linking tech stack to concrete work' - startOffset: 1190 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1190 - endOffset: 1328 -- name: 'Career narrative: tenure, common themes and progression' - startOffset: 1328 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1328 - endOffset: 1504 -- name: Demonstrating business impact and real world use cases - startOffset: 1504 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1504 - endOffset: 1639 -- name: 'CV structure: clarity, audience fit and information hierarchy' - startOffset: 1639 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1639 - endOffset: 1810 -- name: 'Job‑hopping: red flags, ideal tenure and acceptable exceptions' - startOffset: 1810 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1810 - endOffset: 1942 -- name: 'Junior candidates: pick an industry, aim small and show purpose' - startOffset: 1942 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1942 - endOffset: 2274 -- name: 'Tailored applications: research job needs and map your skills' - startOffset: 2274 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2274 - endOffset: 2381 -- name: 'Targeted outreach tactics: emails, LinkedIn and creative approaches' - startOffset: 2381 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2381 - endOffset: 2666 -- name: 'Focus strategy: approach fewer companies and segment your market' - startOffset: 2666 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2666 - endOffset: 2785 -- name: 'Academia → industry: adopt a product mindset and productionize research' - startOffset: 2785 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2785 - endOffset: 3039 -- name: 'Motivation vs money: career focus, progression and tradeoffs' - startOffset: 3039 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3039 - endOffset: 3142 -- name: 'Salary signals: asking salary, market alignment and recruiter views' - startOffset: 3142 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3142 - endOffset: 3407 -- name: 'CV formats & length: country differences and the two‑page guideline' - startOffset: 3407 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3407 - endOffset: 3531 -- name: 'Job title alignment: adapt titles to industry norms without lying' - startOffset: 3531 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3531 - endOffset: 3615 -- name: 'Switching backgrounds: web development to machine learning skills' - startOffset: 3615 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3615 - endOffset: 3727 -- name: 'Disclosing other interviews: transparency, trust and recruiter differences' - startOffset: 3727 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3727 - endOffset: 4057 -- name: 'Episode summary: purpose‑driven candidates and standing out as a data scientist' - startOffset: 4057 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=4057 - endOffset: 4127 --- diff --git a/_podcast/s07e04-career-coaching.md b/_podcast/get-junior-data-job-and-transferable-skills.md similarity index 97% rename from _podcast/s07e04-career-coaching.md rename to _podcast/get-junior-data-job-and-transferable-skills.md index 9c557729..7fcc6390 100644 --- a/_podcast/s07e04-career-coaching.md +++ b/_podcast/get-junior-data-job-and-transferable-skills.md @@ -1,39 +1,130 @@ --- +title: 'Land Junior Data Jobs: CVs, Interviews, Transferable Skills & Overcome Imposter Syndrome' +short: Career Coaching +season: 7 episode: 4 guests: - lindsaymcquade -short: Career Coaching -title: 'Land Junior Data Jobs: CVs, Interviews, Transferable Skills & Overcome Imposter - Syndrome' -intro: Struggling to land a junior data job—how do you turn non‑linear experience - into a recruiter‑ready CV, prepare for interviews, and push past imposter syndrome? - In this episode, Lindsay McQuade, a transformational coach with 20+ years across - management consulting, higher education and tech and former Senior Career & Development - Coach at SPICED Academy, guides listeners through practical steps for junior data - roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED - training rated 94% “very good/excellent”) to explain CV writing for data roles, - achievement‑based resumes, interview prep and negotiation. Topics include reframing - past experience into evidence, identifying transferable skills for data analyst/scientist/engineer - roles, tailoring applications by industry, the ikigai framework for career focus, - and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, - objective feedback strategies, and structured learning and T‑shaped skills to build - confidence. Tune in for clear job search strategy (balanced volume and targeted - applications), how to choose a career coach, and practical LinkedIn networking tips - to convert applications into interviews. -description: 'Master landing junior data jobs: craft achievement-based CVs, highlight - transferable skills, ace interviews and beat imposter syndrome with coach tips.' -topics: -- career growth +image: images/podcast/s07e04-career-coaching.jpg ids: anchor: Career-Coaching---Lindsay-McQuade-e1e8elk youtube: _U8GrYJvmJM -image: images/podcast/s07e04-career-coaching.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Career-Coaching---Lindsay-McQuade-e1e8elk apple: https://podcasts.apple.com/us/podcast/career-coaching-lindsay-mcquade/id1541710331?i=1000550822996 spotify: https://open.spotify.com/episode/3jMRuqU3ZEcSeoizuOU5q1 youtube: https://www.youtube.com/watch?v=_U8GrYJvmJM -season: 7 + +description: 'Master landing junior data jobs: craft achievement-based CVs, highlight transferable skills, ace interviews and beat imposter syndrome with coach tips.' +intro: Struggling to land a junior data job—how do you turn non‑linear experience into a recruiter‑ready CV, prepare for interviews, and push past imposter syndrome? In this episode, Lindsay McQuade, a transformational coach with 20+ years across management consulting, higher education and tech and former Senior Career & Development Coach at SPICED Academy, guides listeners through practical steps for junior data roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED training rated 94% “very good/excellent”) to explain CV writing for data roles, achievement‑based resumes, interview prep and negotiation. Topics include reframing past experience into evidence, identifying transferable skills for data analyst/scientist/engineer roles, tailoring applications by industry, the ikigai framework for career focus, and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, objective feedback strategies, and structured learning and T‑shaped skills to build confidence. Tune in for clear job search strategy (balanced volume and targeted applications), how to choose a career coach, and practical LinkedIn networking tips to convert applications into interviews +topics: +- career growth +dateadded: 2022-02-12 + +duration: PT00H58M39S + +quotableClips: +- name: Guest Introduction & Career Journey + startOffset: 68 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=68 + endOffset: 302 +- name: Spiced Academy Programs Overview (Full‑Stack & Data Science) + startOffset: 302 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=302 + endOffset: 360 +- name: 'Career Coaching Services: CVs, Interview Prep, Negotiation' + startOffset: 360 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=360 + endOffset: 560 +- name: Defining Ideal Job Environment & Career Experiments + startOffset: 560 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=560 + endOffset: 711 +- name: Reframing Past Experience into Recruiter‑Friendly Evidence + startOffset: 711 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=711 + endOffset: 782 +- name: Identifying Transferable Skills for Data Roles + startOffset: 782 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=782 + endOffset: 906 +- name: Achievement‑Based CV Writing vs Responsibility Lists + startOffset: 906 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=906 + endOffset: 1014 +- name: Coaching Access Model & Typical Student Engagement + startOffset: 1014 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1014 + endOffset: 1078 +- name: 'Career Coach Impact: Belief, Market Navigation, Paperwork' + startOffset: 1078 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1078 + endOffset: 1228 +- name: 'Marketplace Ambiguity: Data Scientist vs Analyst vs Engineer' + startOffset: 1228 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1228 + endOffset: 1428 +- name: 'Job Focus Importance: Tailoring Applications to Industry' + startOffset: 1428 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1428 + endOffset: 1539 +- name: Ikigai Framework for Finding Career Focus and Projects + startOffset: 1539 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1539 + endOffset: 1714 +- name: Junior Job Market Trends in Berlin (Analytics vs Engineering) + startOffset: 1714 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1714 + endOffset: 1837 +- name: 'Job Search Strategy: Balanced Tailored Applications & Volume' + startOffset: 1837 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1837 + endOffset: 2011 +- name: 'Finding a Career Coach: Credentials, Specialization, Location' + startOffset: 2011 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2011 + endOffset: 2091 +- name: 'Imposter Syndrome: Origins and Common Triggers' + startOffset: 2091 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2091 + endOffset: 2225 +- name: Objective Feedback vs Distorted Self‑Perception + startOffset: 2225 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2225 + endOffset: 2517 +- name: 'Coping Strategies: Accepting Failure and Learning Loops' + startOffset: 2517 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2517 + endOffset: 2695 +- name: 'Resources for Imposter Syndrome: Research, Coaches, Mentors' + startOffset: 2695 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2695 + endOffset: 2828 +- name: 'Building Confidence: Structured Learning Paths & T‑Shaped Skills' + startOffset: 2828 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2828 + endOffset: 3070 +- name: 'Managing Expectations: Junior Role Requirements & Progression' + startOffset: 3070 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3070 + endOffset: 3171 +- name: Internships vs Junior Roles and Creating Hidden Opportunities + startOffset: 3171 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3171 + endOffset: 3320 +- name: 'Working with Recruiters: When They Help Juniors & Seniors' + startOffset: 3320 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3320 + endOffset: 3510 +- name: 'LinkedIn Networking: Credible Informational Outreach' + startOffset: 3510 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3510 + endOffset: 3564 +- name: Episode Wrap‑Up & Final Career Coaching Takeaways + startOffset: 3564 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3564 + endOffset: 3519 + transcript: - header: Guest Introduction & Career Journey - line: This week, we'll talk about career coaching. We have a special guest today, @@ -1123,109 +1214,6 @@ transcript: sec: 3587 time: '59:47' who: Alexey -dateadded: '2022-02-12' -duration: PT00H58M39S -quotableClips: -- name: Guest Introduction & Career Journey - startOffset: 68 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=68 - endOffset: 302 -- name: Spiced Academy Programs Overview (Full‑Stack & Data Science) - startOffset: 302 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=302 - endOffset: 360 -- name: 'Career Coaching Services: CVs, Interview Prep, Negotiation' - startOffset: 360 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=360 - endOffset: 560 -- name: Defining Ideal Job Environment & Career Experiments - startOffset: 560 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=560 - endOffset: 711 -- name: Reframing Past Experience into Recruiter‑Friendly Evidence - startOffset: 711 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=711 - endOffset: 782 -- name: Identifying Transferable Skills for Data Roles - startOffset: 782 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=782 - endOffset: 906 -- name: Achievement‑Based CV Writing vs Responsibility Lists - startOffset: 906 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=906 - endOffset: 1014 -- name: Coaching Access Model & Typical Student Engagement - startOffset: 1014 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1014 - endOffset: 1078 -- name: 'Career Coach Impact: Belief, Market Navigation, Paperwork' - startOffset: 1078 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1078 - endOffset: 1228 -- name: 'Marketplace Ambiguity: Data Scientist vs Analyst vs Engineer' - startOffset: 1228 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1228 - endOffset: 1428 -- name: 'Job Focus Importance: Tailoring Applications to Industry' - startOffset: 1428 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1428 - endOffset: 1539 -- name: Ikigai Framework for Finding Career Focus and Projects - startOffset: 1539 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1539 - endOffset: 1714 -- name: Junior Job Market Trends in Berlin (Analytics vs Engineering) - startOffset: 1714 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1714 - endOffset: 1837 -- name: 'Job Search Strategy: Balanced Tailored Applications & Volume' - startOffset: 1837 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1837 - endOffset: 2011 -- name: 'Finding a Career Coach: Credentials, Specialization, Location' - startOffset: 2011 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2011 - endOffset: 2091 -- name: 'Imposter Syndrome: Origins and Common Triggers' - startOffset: 2091 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2091 - endOffset: 2225 -- name: Objective Feedback vs Distorted Self‑Perception - startOffset: 2225 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2225 - endOffset: 2517 -- name: 'Coping Strategies: Accepting Failure and Learning Loops' - startOffset: 2517 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2517 - endOffset: 2695 -- name: 'Resources for Imposter Syndrome: Research, Coaches, Mentors' - startOffset: 2695 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2695 - endOffset: 2828 -- name: 'Building Confidence: Structured Learning Paths & T‑Shaped Skills' - startOffset: 2828 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2828 - endOffset: 3070 -- name: 'Managing Expectations: Junior Role Requirements & Progression' - startOffset: 3070 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3070 - endOffset: 3171 -- name: Internships vs Junior Roles and Creating Hidden Opportunities - startOffset: 3171 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3171 - endOffset: 3320 -- name: 'Working with Recruiters: When They Help Juniors & Seniors' - startOffset: 3320 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3320 - endOffset: 3510 -- name: 'LinkedIn Networking: Credible Informational Outreach' - startOffset: 3510 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3510 - endOffset: 3564 -- name: Episode Wrap‑Up & Final Career Coaching Takeaways - startOffset: 3564 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3564 - endOffset: 3519 --- Links: diff --git a/_podcast/s11e02-data-science-career-development.md b/_podcast/hire-and-manage-data-science-teams-in-b2b-saas.md similarity index 97% rename from _podcast/s11e02-data-science-career-development.md rename to _podcast/hire-and-manage-data-science-teams-in-b2b-saas.md index 63c4d68c..89cc0b80 100644 --- a/_podcast/s11e02-data-science-career-development.md +++ b/_podcast/hire-and-manage-data-science-teams-in-b2b-saas.md @@ -1,19 +1,128 @@ --- +title: "How to Hire, Manage, and Grow a Data Science Team in B2B SaaS" +short: "How to Hire, Manage, and Grow a Data Science Team in B2B SaaS" +season: 11 episode: 2 guests: - katiebauer +image: images/podcast/s11e02-data-science-career-development.jpg ids: anchor: Data-Science-Career-Development---Katie-Bauer-e1oq96g youtube: i1NHRroQClQ -image: images/podcast/s11e02-data-science-career-development.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Science-Career-Development---Katie-Bauer-e1oq96g apple: https://podcasts.apple.com/us/podcast/data-science-career-development-katie-bauer/id1541710331?i=1000582680396 spotify: https://open.spotify.com/episode/0sm5qB1Cj4EJlbQ2giLtHR youtube: https://www.youtube.com/watch?v=i1NHRroQClQ -season: 11 -short: Data Science Career Development -title: 'Data Science Career Guide: Hiring, Managing & Growing Teams in B2B SaaS' + +description: Learn hiring & management tactics for data science in B2B SaaS — hiring, onboarding, mentorship, and career growth strategies to build scalable analytics teams +intro: 'How do you hire, manage, and grow a high-impact data science team inside a B2B SaaS company? In this episode, Katie Bauer — Head of Data at GlossGenius and former data leader at Twitter and Reddit — walks through practical career frameworks and team-building strategies for product analysts, analytics engineers, marketing scientists, and data scientists. Katie traces her own trajectory from linguistics to analytics and explains what “craft” looks like in analytics: maintainability, documentation, and peer review. She breaks down IC versus management paths, how to mentor juniors through project-based learning, and when to buy versus build entry-level talent. You’ll hear concrete hiring and interview approaches for managers, how to assess strategy through case studies and trade-offs, and tips to help new hires succeed in their first month via proactive communication and async support channels. For leaders, Katie covers prioritization, raising data literacy, and fostering a data-driven culture. Listen to get actionable guidance on hiring data scientists, onboarding newcomers, developing senior talent, and scaling data teams in B2B SaaS.' +topics: +- data science +- career development +- career growth +- hiring +- management +- team building +- mentorship +dateadded: 2022-10-15 + +duration: PT00H58M40S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=0 + endOffset: 93 +- name: 'Introduction: Episode focus on data science career development (Katie Bauer)' + startOffset: 93 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=93 + endOffset: 147 +- name: 'Career trajectory: linguistics to data science; Reddit and Twitter experience' + startOffset: 147 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=147 + endOffset: 276 +- name: GlossGenius product and head of data responsibilities (B2B SaaS) + startOffset: 276 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=276 + endOffset: 382 +- name: 'Current hiring needs: product analysts, analytics engineers, marketing scientists' + startOffset: 382 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=382 + endOffset: 428 +- name: 'Data scientist role: broad definition and varied responsibilities' + startOffset: 428 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=428 + endOffset: 513 +- name: 'Data science manager: building teams, matrix orgs, and cross-functional work' + startOffset: 513 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=513 + endOffset: 718 +- name: 'Craft quality: maintainability, documentation, peer review for analytics' + startOffset: 718 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=718 + endOffset: 912 +- name: 'Career framework: junior vs senior and the “terminal” career level' + startOffset: 912 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=912 + endOffset: 1130 +- name: 'Senior growth: abstraction, leadership exposure, and delegation' + startOffset: 1130 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1130 + endOffset: 1554 +- name: 'IC vs management: trying people leadership and the IC–manager pendulum' + startOffset: 1554 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1554 + endOffset: 1810 +- name: 'Managing juniors: mentorship, skills training, and project-based learning' + startOffset: 1810 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1810 + endOffset: 2056 +- name: 'Stakeholder conversations: talking to PMs and senior leaders (prep & questions)' + startOffset: 2056 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2056 + endOffset: 2342 +- name: 'Junior development: practice, exposure, and avoiding early specialization' + startOffset: 2342 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2342 + endOffset: 2412 +- name: 'Hiring juniors: build vs buy, long-term org benefits, and succession' + startOffset: 2412 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2412 + endOffset: 2679 +- name: 'Hiring managers: evaluation criteria for data science manager roles' + startOffset: 2679 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2679 + endOffset: 2841 +- name: 'Strategy assessment: case studies, trade-offs, and measurement in interviews' + startOffset: 2841 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2841 + endOffset: 3021 +- name: 'Entry-level hiring tips: standing out, outreach, and interview preparation' + startOffset: 3021 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3021 + endOffset: 3163 +- name: 'Onboarding first month: proactive communication and asking for help' + startOffset: 3163 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3163 + endOffset: 3251 +- name: 'Support mechanisms: regular check-ins, rubber‑duck channels, async help' + startOffset: 3251 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3251 + endOffset: 3380 +- name: 'Head of data challenges: prioritization, data literacy, and culture building' + startOffset: 3380 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3380 + endOffset: 3549 +- name: 'Closing advice: careers as direction and guiding team growth' + startOffset: 3549 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3549 + endOffset: 3613 +- name: Episode wrap and contact information + startOffset: 3613 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3613 + endOffset: 3520 + transcript: - header: Podcast Introduction - header: 'Introduction: Episode focus on data science career development (Katie Bauer)' @@ -1199,118 +1308,6 @@ transcript: sec: 3613 time: '1:00:13' who: Alexey -description: Learn hiring & management tactics for data science in B2B SaaS — hiring, - onboarding, mentorship, and career growth strategies to build scalable analytics - teams. -intro: 'How do you hire, manage, and grow a high-impact data science team inside a - B2B SaaS company? In this episode, Katie Bauer — Head of Data at GlossGenius and - former data leader at Twitter and Reddit — walks through practical career frameworks - and team-building strategies for product analysts, analytics engineers, marketing - scientists, and data scientists. Katie traces her own trajectory from linguistics - to analytics and explains what “craft” looks like in analytics: maintainability, - documentation, and peer review. She breaks down IC versus management paths, how - to mentor juniors through project-based learning, and when to buy versus build entry-level - talent. You’ll hear concrete hiring and interview approaches for managers, how to - assess strategy through case studies and trade-offs, and tips to help new hires - succeed in their first month via proactive communication and async support channels. - For leaders, Katie covers prioritization, raising data literacy, and fostering a - data-driven culture. Listen to get actionable guidance on hiring data scientists, - onboarding newcomers, developing senior talent, and scaling data teams in B2B SaaS.' -dateadded: '2022-10-15' -duration: PT00H58M40S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=0 - endOffset: 93 -- name: 'Introduction: Episode focus on data science career development (Katie Bauer)' - startOffset: 93 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=93 - endOffset: 147 -- name: 'Career trajectory: linguistics to data science; Reddit and Twitter experience' - startOffset: 147 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=147 - endOffset: 276 -- name: GlossGenius product and head of data responsibilities (B2B SaaS) - startOffset: 276 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=276 - endOffset: 382 -- name: 'Current hiring needs: product analysts, analytics engineers, marketing scientists' - startOffset: 382 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=382 - endOffset: 428 -- name: 'Data scientist role: broad definition and varied responsibilities' - startOffset: 428 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=428 - endOffset: 513 -- name: 'Data science manager: building teams, matrix orgs, and cross-functional work' - startOffset: 513 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=513 - endOffset: 718 -- name: 'Craft quality: maintainability, documentation, peer review for analytics' - startOffset: 718 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=718 - endOffset: 912 -- name: 'Career framework: junior vs senior and the “terminal” career level' - startOffset: 912 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=912 - endOffset: 1130 -- name: 'Senior growth: abstraction, leadership exposure, and delegation' - startOffset: 1130 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1130 - endOffset: 1554 -- name: 'IC vs management: trying people leadership and the IC–manager pendulum' - startOffset: 1554 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1554 - endOffset: 1810 -- name: 'Managing juniors: mentorship, skills training, and project-based learning' - startOffset: 1810 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1810 - endOffset: 2056 -- name: 'Stakeholder conversations: talking to PMs and senior leaders (prep & questions)' - startOffset: 2056 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2056 - endOffset: 2342 -- name: 'Junior development: practice, exposure, and avoiding early specialization' - startOffset: 2342 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2342 - endOffset: 2412 -- name: 'Hiring juniors: build vs buy, long-term org benefits, and succession' - startOffset: 2412 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2412 - endOffset: 2679 -- name: 'Hiring managers: evaluation criteria for data science manager roles' - startOffset: 2679 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2679 - endOffset: 2841 -- name: 'Strategy assessment: case studies, trade-offs, and measurement in interviews' - startOffset: 2841 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2841 - endOffset: 3021 -- name: 'Entry-level hiring tips: standing out, outreach, and interview preparation' - startOffset: 3021 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3021 - endOffset: 3163 -- name: 'Onboarding first month: proactive communication and asking for help' - startOffset: 3163 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3163 - endOffset: 3251 -- name: 'Support mechanisms: regular check-ins, rubber‑duck channels, async help' - startOffset: 3251 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3251 - endOffset: 3380 -- name: 'Head of data challenges: prioritization, data literacy, and culture building' - startOffset: 3380 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3380 - endOffset: 3549 -- name: 'Closing advice: careers as direction and guiding team growth' - startOffset: 3549 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3549 - endOffset: 3613 -- name: Episode wrap and contact information - startOffset: 3613 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3613 - endOffset: 3520 --- Links: diff --git a/_podcast/s07e02-recruiting-data-professionals.md b/_podcast/hiring-data-scientists-and-analysts.md similarity index 98% rename from _podcast/s07e02-recruiting-data-professionals.md rename to _podcast/hiring-data-scientists-and-analysts.md index 8e0b299d..3fcc352d 100644 --- a/_podcast/s07e02-recruiting-data-professionals.md +++ b/_podcast/hiring-data-scientists-and-analysts.md @@ -1,12 +1,11 @@ --- -title: 'Hiring Data Scientists & Analysts: Talent Pipelines, Job Specs, CV Screening - & Salary Tips' +title: 'Hiring Data Scientists & Analysts: Talent Pipelines, Job Specs, CV Screening & Salary Tips' short: Recruiting Data Professionals +season: 7 +episode: 2 guests: - alicjanotowska image: images/podcast/s07e02-recruiting-data-professionals.jpg -season: 7 -episode: 2 ids: youtube: WSMDXsjKYx4 anchor: Recruiting-Data-Professionals---Alicja-Notowska-e1dj2qn @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Recruiting-Data-Professionals---Alicja-Notowska-e1dj2qn spotify: https://open.spotify.com/episode/4LFZX7IfpdYkQ6si4ed0OR apple: https://podcasts.apple.com/us/podcast/recruiting-data-professionals-alicja-notowska/id1541710331?i=1000549307220 + +description: 'Master hiring for data scientist & data analyst roles: craft job descriptions, build talent pipelines, screen CVs, negotiate salaries and land top hires.' +intro: How do you consistently find and hire the right data scientists and analysts in a competitive market? In this episode, Alicja Notowska — a talent acquisition specialist with 10+ years recruiting at Google, Zalando and now with embedded agency WeAreKeen — breaks down practical recruiting tactics for hiring data scientists and data analysts.

We cover the full interview funnel and end-to-end recruiting responsibilities, crafting job specs that emphasise problems over perks, and using inclusive language plus AI tools to attract diverse candidates. Alicja explains sourcing channels (LinkedIn, GitHub, conferences, academia), building talent pipelines and 360° recruitment, and keyword strategies for CV screening. You’ll hear guidance on education signals (BSc/MSc/PhD), CV best practices (clear responsibilities, dates, avoid buzzwords), recruiter screening interviews, and evaluating portfolio projects and online course work.

The episode also tackles salary conversations — bands, transparency and negotiation — managing hiring manager expectations, pathways for career changers, and offer etiquette. Listen to gain actionable tactics for job specs, CV screening, sourcing and salary negotiation when hiring data talent +dateadded: 2022-01-29 + +duration: PT01H04M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=0 + endOffset: 125 +- name: Guest Background & Recruiting Experience + startOffset: 125 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=125 + endOffset: 186 +- name: 'Current Role: Embedded Talent Agency & Client Lead' + startOffset: 186 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=186 + endOffset: 284 +- name: Typical Interview Funnel for Data Roles + startOffset: 284 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=284 + endOffset: 380 +- name: End-to-End Recruiting Responsibilities + startOffset: 380 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=380 + endOffset: 429 +- name: Collaborating with Hiring Managers & Crafting Job Specs + startOffset: 429 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=429 + endOffset: 542 +- name: 'Sourcing Channels: LinkedIn, GitHub, Conferences & Academia' + startOffset: 542 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=542 + endOffset: 694 +- name: Building Talent Pipelines & 360° Recruitment + startOffset: 694 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=694 + endOffset: 837 +- name: Managing Hiring Expectations & Market Reality + startOffset: 837 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=837 + endOffset: 1038 +- name: Using Talent Market Data to Negotiate Requirements + startOffset: 1038 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1038 + endOffset: 1108 +- name: 'Job Description Focus: Problems Over Perks' + startOffset: 1108 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1108 + endOffset: 1204 +- name: Inclusive JD Language & AI Tools for Attraction + startOffset: 1204 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1204 + endOffset: 1292 +- name: 'Screening Profiles: Experience, Education & Responsibilities' + startOffset: 1292 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1292 + endOffset: 1556 +- name: Keyword Strategy for Data Scientist Searches + startOffset: 1556 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1556 + endOffset: 1630 +- name: 'Education Signals: Bachelor’s, Master’s & PhD Expectations' + startOffset: 1630 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1630 + endOffset: 1721 +- name: 'CV Best Practices: Responsibilities, Dates & Clarity' + startOffset: 1721 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1721 + endOffset: 1960 +- name: Avoiding Buzzwords & Making CVs Interview-Ready + startOffset: 1960 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1960 + endOffset: 2168 +- name: 'Recruiter Screening Interviews: Behavioral & Motivation Checks' + startOffset: 2168 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2168 + endOffset: 2433 +- name: 'Salary Conversations: Bands, Transparency & Negotiation' + startOffset: 2433 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2433 + endOffset: 2705 +- name: Handling High Salary Requests & Market Research + startOffset: 2705 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2705 + endOffset: 2856 +- name: 'Pathways for Career Changers: Gaining Practical Experience' + startOffset: 2856 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2856 + endOffset: 3053 +- name: 'Cover Letters vs CVs: When They Matter' + startOffset: 3053 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3053 + endOffset: 3249 +- name: 'Data Analyst Hiring: Title Ambiguity & Similar Processes' + startOffset: 3249 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3249 + endOffset: 3570 +- name: Portfolio Projects & Online Courses on Your CV + startOffset: 3570 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3570 + endOffset: 3697 +- name: 'Making a Strong Impression: Clear Explanations & Examples' + startOffset: 3697 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3697 + endOffset: 3709 +- name: 'Offer Etiquette: Communication, Commitments & Withdrawals' + startOffset: 3709 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3709 + endOffset: 4004 +- name: Where to Find Alicja & Episode Wrap-up + startOffset: 4004 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=4004 + endOffset: 3895 + transcript: - header: Podcast Introduction - header: Guest Background & Recruiting Experience @@ -1216,132 +1332,4 @@ transcript: sec: 4020 time: '1:07:00' who: Alicja -description: 'Master hiring for data scientist & data analyst roles: craft job descriptions, - build talent pipelines, screen CVs, negotiate salaries and land top hires.' -intro: How do you consistently find and hire the right data scientists and analysts - in a competitive market? In this episode, Alicja Notowska — a talent acquisition - specialist with 10+ years recruiting at Google, Zalando and now with embedded agency - WeAreKeen — breaks down practical recruiting tactics for hiring data scientists - and data analysts.

We cover the full interview funnel and end-to-end recruiting - responsibilities, crafting job specs that emphasise problems over perks, and using - inclusive language plus AI tools to attract diverse candidates. Alicja explains - sourcing channels (LinkedIn, GitHub, conferences, academia), building talent pipelines - and 360° recruitment, and keyword strategies for CV screening. You’ll hear guidance - on education signals (BSc/MSc/PhD), CV best practices (clear responsibilities, dates, - avoid buzzwords), recruiter screening interviews, and evaluating portfolio projects - and online course work.

The episode also tackles salary conversations — - bands, transparency and negotiation — managing hiring manager expectations, pathways - for career changers, and offer etiquette. Listen to gain actionable tactics for - job specs, CV screening, sourcing and salary negotiation when hiring data talent. -dateadded: '2022-01-29' -duration: PT01H04M55S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=0 - endOffset: 125 -- name: Guest Background & Recruiting Experience - startOffset: 125 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=125 - endOffset: 186 -- name: 'Current Role: Embedded Talent Agency & Client Lead' - startOffset: 186 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=186 - endOffset: 284 -- name: Typical Interview Funnel for Data Roles - startOffset: 284 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=284 - endOffset: 380 -- name: End-to-End Recruiting Responsibilities - startOffset: 380 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=380 - endOffset: 429 -- name: Collaborating with Hiring Managers & Crafting Job Specs - startOffset: 429 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=429 - endOffset: 542 -- name: 'Sourcing Channels: LinkedIn, GitHub, Conferences & Academia' - startOffset: 542 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=542 - endOffset: 694 -- name: Building Talent Pipelines & 360° Recruitment - startOffset: 694 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=694 - endOffset: 837 -- name: Managing Hiring Expectations & Market Reality - startOffset: 837 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=837 - endOffset: 1038 -- name: Using Talent Market Data to Negotiate Requirements - startOffset: 1038 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1038 - endOffset: 1108 -- name: 'Job Description Focus: Problems Over Perks' - startOffset: 1108 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1108 - endOffset: 1204 -- name: Inclusive JD Language & AI Tools for Attraction - startOffset: 1204 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1204 - endOffset: 1292 -- name: 'Screening Profiles: Experience, Education & Responsibilities' - startOffset: 1292 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1292 - endOffset: 1556 -- name: Keyword Strategy for Data Scientist Searches - startOffset: 1556 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1556 - endOffset: 1630 -- name: 'Education Signals: Bachelor’s, Master’s & PhD Expectations' - startOffset: 1630 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1630 - endOffset: 1721 -- name: 'CV Best Practices: Responsibilities, Dates & Clarity' - startOffset: 1721 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1721 - endOffset: 1960 -- name: Avoiding Buzzwords & Making CVs Interview-Ready - startOffset: 1960 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1960 - endOffset: 2168 -- name: 'Recruiter Screening Interviews: Behavioral & Motivation Checks' - startOffset: 2168 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2168 - endOffset: 2433 -- name: 'Salary Conversations: Bands, Transparency & Negotiation' - startOffset: 2433 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2433 - endOffset: 2705 -- name: Handling High Salary Requests & Market Research - startOffset: 2705 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2705 - endOffset: 2856 -- name: 'Pathways for Career Changers: Gaining Practical Experience' - startOffset: 2856 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2856 - endOffset: 3053 -- name: 'Cover Letters vs CVs: When They Matter' - startOffset: 3053 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3053 - endOffset: 3249 -- name: 'Data Analyst Hiring: Title Ambiguity & Similar Processes' - startOffset: 3249 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3249 - endOffset: 3570 -- name: Portfolio Projects & Online Courses on Your CV - startOffset: 3570 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3570 - endOffset: 3697 -- name: 'Making a Strong Impression: Clear Explanations & Examples' - startOffset: 3697 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3697 - endOffset: 3709 -- name: 'Offer Etiquette: Communication, Commitments & Withdrawals' - startOffset: 3709 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3709 - endOffset: 4004 -- name: Where to Find Alicja & Episode Wrap-up - startOffset: 4004 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=4004 - endOffset: 3895 --- diff --git a/_podcast/s08e06-recruiting-data-engineers.md b/_podcast/hiring-for-data-engineering-jobs-in-europe.md similarity index 96% rename from _podcast/s08e06-recruiting-data-engineers.md rename to _podcast/hiring-for-data-engineering-jobs-in-europe.md index 00a4c2ee..987f9c32 100644 --- a/_podcast/s08e06-recruiting-data-engineers.md +++ b/_podcast/hiring-for-data-engineering-jobs-in-europe.md @@ -1,44 +1,145 @@ --- +title: 'Hiring Data Engineers in Europe: Nicolas Rassam on Interviews, Skills & Career Switches' +short: Recruiting Data Engineers +season: 8 episode: 6 guests: - nicolasrassam -date: 2025-11-07 -intro: How do you hire data engineers in Europe today — and what should candidates - and hiring managers actually focus on during interviews? In this episode, Nicolas - Rassam, a Senior Talent Acquisition Partner at Helsing with 10+ years scaling AI - and engineering teams at Onfido and Criteo, walks through the practical realities - of hiring data engineers across Europe's competitive, borderless market.

- We cover why data engineering matters now, differences in European hiring footprints, - and the rising demand for modern tooling. Nicolas breaks down common hiring challenges - — title ambiguity, experience mismatches, and recruiter technical literacy — and - explains how to evaluate transferable experience from software and BI roles. You'll - get concrete guidance on level expectations (junior → senior), typical interview - processes and assessments, resume essentials (SQL, Python, problem solving, outcomes), - cloud fundamentals, when infrastructure/DevOps skills matter, portfolio/GitHub storytelling, - and strategies for career switchers (internships, targeted projects). The episode - also addresses hiring without degrees, industry fit for regulated data, and how - targeted applications beat spray-and-pray. Listen to learn what to prepare for interviews, - how to position projects, and what hiring teams really look for when recruiting - data engineering talent in Europe. +image: images/podcast/s08e06-recruiting-data-engineers.jpg ids: anchor: Recruiting-Data-Engineers---Nicolas-Rassam-e1hnkl1 youtube: hylxiu4VGTo -image: images/podcast/s08e06-recruiting-data-engineers.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Recruiting-Data-Engineers---Nicolas-Rassam-e1hnkl1 apple: https://podcasts.apple.com/us/podcast/recruiting-data-engineers-nicolas-rassam/id1541710331?i=1000559128813 spotify: https://open.spotify.com/episode/5ldkzYiHFvJCKoEyfAlvDs?si=WFJzcZ7fRCi1dzwapNGfzA youtube: https://www.youtube.com/watch?v=hylxiu4VGTo -season: 8 -short: Recruiting Data Engineers -title: 'Hiring Data Engineers in Europe: Nicolas Rassam on Interviews, Skills & Career - Switches' -description: 'Learn hiring strategies for data engineering in Europe: interview prep, - resume tips (SQL/Python), career-switch paths and cloud fundamentals to win roles.' + +description: 'Learn hiring strategies for data engineering in Europe: interview prep, resume tips (SQL/Python), career-switch paths and cloud fundamentals to win roles.' +intro: How do you hire data engineers in Europe today — and what should candidates and hiring managers actually focus on during interviews? In this episode, Nicolas Rassam, a Senior Talent Acquisition Partner at Helsing with 10+ years scaling AI and engineering teams at Onfido and Criteo, walks through the practical realities of hiring data engineers across Europe's competitive, borderless market.

We cover why data engineering matters now, differences in European hiring footprints, and the rising demand for modern tooling. Nicolas breaks down common hiring challenges — title ambiguity, experience mismatches, and recruiter technical literacy — and explains how to evaluate transferable experience from software and BI roles. You'll get concrete guidance on level expectations (junior → senior), typical interview processes and assessments, resume essentials (SQL, Python, problem solving, outcomes), cloud fundamentals, when infrastructure/DevOps skills matter, portfolio/GitHub storytelling, and strategies for career switchers (internships, targeted projects). The episode also addresses hiring without degrees, industry fit for regulated data, and how targeted applications beat spray-and-pray. Listen to learn what to prepare for interviews, how to position projects, and what hiring teams really look for when recruiting data engineering talent in Europe topics: - data engineering -- career switch +- career transition - career growth +dateadded: 2022-04-30 +date: 2025-11-07 + +duration: PT01H01M05S + +quotableClips: +- name: Episode Opening & Guest Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=0 + endOffset: 75 +- name: Guest Background and Career Path + startOffset: 75 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=75 + endOffset: 192 +- name: Onfido Role & European Hiring Footprint + startOffset: 192 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=192 + endOffset: 220 +- name: 'Roles Recruited: Data, ML & Research Spectrum' + startOffset: 220 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=220 + endOffset: 273 +- name: European Tech Market Differences + startOffset: 273 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=273 + endOffset: 375 +- name: Borderless Recruitment and Competition Dynamics + startOffset: 375 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=375 + endOffset: 419 +- name: 'Episode Focus: Why Data Engineering Matters Now' + startOffset: 419 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=419 + endOffset: 468 +- name: Tech vs Business Balance and Training Gaps + startOffset: 468 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=468 + endOffset: 666 +- name: Data Science Misconceptions and Data Quality Dependence + startOffset: 666 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=666 + endOffset: 794 +- name: Rising Demand for Data Engineering and Modern Tooling + startOffset: 794 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=794 + endOffset: 960 +- name: 'Recruiter Technical Literacy: Big-Picture Knowledge' + startOffset: 960 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=960 + endOffset: 1127 +- name: 'Hiring Challenges: Titles, Experience Mismatch, Demand' + startOffset: 1127 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1127 + endOffset: 1257 +- name: Evaluating Transferable Experience from Software/BI Roles + startOffset: 1257 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1257 + endOffset: 1375 +- name: 'Expectations by Level: Junior → Senior Responsibilities' + startOffset: 1375 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1375 + endOffset: 1598 +- name: Typical Interview Process and Level-Based Assessments + startOffset: 1598 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1598 + endOffset: 1839 +- name: 'Career Switchers: Internships, Projects, and Focused Skills' + startOffset: 1839 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1839 + endOffset: 1876 +- name: 'Resume Essentials: SQL, Python, Problems & Outcomes' + startOffset: 1876 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1876 + endOffset: 2107 +- name: 'Transition Strategy: Team Structure and Role Selection' + startOffset: 2107 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2107 + endOffset: 2381 +- name: 'Cloud Fundamentals: Tool-Agnostic Conceptual Knowledge' + startOffset: 2381 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2381 + endOffset: 2510 +- name: 'Infrastructure & DevOps Skills: When They Matter' + startOffset: 2510 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2510 + endOffset: 2675 +- name: 'Interview Prep: Research Company and Explain Projects Clearly' + startOffset: 2675 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2675 + endOffset: 2893 +- name: Targeted Applications vs. Spray-and-Pray Approach + startOffset: 2893 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2893 + endOffset: 3045 +- name: 'Hiring Without Degrees: Skills, Projects, Continuous Learning' + startOffset: 3045 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3045 + endOffset: 3265 +- name: 'Standout Project Examples: First Pipelines & Privacy Work' + startOffset: 3265 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3265 + endOffset: 3353 +- name: 'Portfolio & GitHub: Shareable Work and Storytelling' + startOffset: 3353 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3353 + endOffset: 3485 +- name: 'Industry Fit: Domain Knowledge for Regulated Data' + startOffset: 3485 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3485 + endOffset: 3659 +- name: 'Follow-up Resources: Webinars and Further Reading' + startOffset: 3659 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3659 + endOffset: 3698 +- name: Episode Close and Final Tips + startOffset: 3698 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3698 + endOffset: 3712 + transcript: - header: Episode Opening & Guest Welcome - line: This week, we'll talk about recruiting data engineers. We have a special guest @@ -150,8 +251,8 @@ transcript: sec: 419 time: '6:59' who: Alexey -- line: We're talking about hiring data engineers, but I also wanted to ask you - – you are recruiting for a wide range of positions ML engineers, data scientists, +- line: We're talking about hiring data engineers, but I also wanted to ask you – + you are recruiting for a wide range of positions ML engineers, data scientists, data analysts, data engineers – in your opinion, what is the main difference between hiring data scientists and data engineers? sec: 419 @@ -862,121 +963,6 @@ transcript: sec: 3712 time: '1:01:52' who: Nicolas -dateadded: '2022-04-30' -duration: PT01H01M05S -quotableClips: -- name: Episode Opening & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=0 - endOffset: 75 -- name: Guest Background and Career Path - startOffset: 75 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=75 - endOffset: 192 -- name: Onfido Role & European Hiring Footprint - startOffset: 192 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=192 - endOffset: 220 -- name: 'Roles Recruited: Data, ML & Research Spectrum' - startOffset: 220 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=220 - endOffset: 273 -- name: European Tech Market Differences - startOffset: 273 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=273 - endOffset: 375 -- name: Borderless Recruitment and Competition Dynamics - startOffset: 375 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=375 - endOffset: 419 -- name: 'Episode Focus: Why Data Engineering Matters Now' - startOffset: 419 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=419 - endOffset: 468 -- name: Tech vs Business Balance and Training Gaps - startOffset: 468 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=468 - endOffset: 666 -- name: Data Science Misconceptions and Data Quality Dependence - startOffset: 666 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=666 - endOffset: 794 -- name: Rising Demand for Data Engineering and Modern Tooling - startOffset: 794 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=794 - endOffset: 960 -- name: 'Recruiter Technical Literacy: Big-Picture Knowledge' - startOffset: 960 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=960 - endOffset: 1127 -- name: 'Hiring Challenges: Titles, Experience Mismatch, Demand' - startOffset: 1127 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1127 - endOffset: 1257 -- name: Evaluating Transferable Experience from Software/BI Roles - startOffset: 1257 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1257 - endOffset: 1375 -- name: 'Expectations by Level: Junior → Senior Responsibilities' - startOffset: 1375 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1375 - endOffset: 1598 -- name: Typical Interview Process and Level-Based Assessments - startOffset: 1598 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1598 - endOffset: 1839 -- name: 'Career Switchers: Internships, Projects, and Focused Skills' - startOffset: 1839 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1839 - endOffset: 1876 -- name: 'Resume Essentials: SQL, Python, Problems & Outcomes' - startOffset: 1876 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1876 - endOffset: 2107 -- name: 'Transition Strategy: Team Structure and Role Selection' - startOffset: 2107 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2107 - endOffset: 2381 -- name: 'Cloud Fundamentals: Tool-Agnostic Conceptual Knowledge' - startOffset: 2381 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2381 - endOffset: 2510 -- name: 'Infrastructure & DevOps Skills: When They Matter' - startOffset: 2510 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2510 - endOffset: 2675 -- name: 'Interview Prep: Research Company and Explain Projects Clearly' - startOffset: 2675 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2675 - endOffset: 2893 -- name: Targeted Applications vs. Spray-and-Pray Approach - startOffset: 2893 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2893 - endOffset: 3045 -- name: 'Hiring Without Degrees: Skills, Projects, Continuous Learning' - startOffset: 3045 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3045 - endOffset: 3265 -- name: 'Standout Project Examples: First Pipelines & Privacy Work' - startOffset: 3265 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3265 - endOffset: 3353 -- name: 'Portfolio & GitHub: Shareable Work and Storytelling' - startOffset: 3353 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3353 - endOffset: 3485 -- name: 'Industry Fit: Domain Knowledge for Regulated Data' - startOffset: 3485 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3485 - endOffset: 3659 -- name: 'Follow-up Resources: Webinars and Further Reading' - startOffset: 3659 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3659 - endOffset: 3698 -- name: Episode Close and Final Tips - startOffset: 3698 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3698 - endOffset: 3712 --- Links: diff --git a/_podcast/s09e09-hiring-data-science-talent.md b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md similarity index 97% rename from _podcast/s09e09-hiring-data-science-talent.md rename to _podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md index 79e75209..2bc21ae4 100644 --- a/_podcast/s09e09-hiring-data-science-talent.md +++ b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md @@ -1,20 +1,117 @@ --- +title: 'How to Hire Data Scientists: Interview Questions, MLOps, AutoML Limits & Inclusive Hiring' +short: Hiring Data Science Talent +season: 9 episode: 9 guests: - olgaivina +image: images/podcast/s09e09-hiring-data-science-talent.jpg ids: anchor: Hiring-Data-Science-Talent---Olga-Ivina-e1l4aku youtube: Af9t9r2b0z0 -image: images/podcast/s09e09-hiring-data-science-talent.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Hiring-Data-Science-Talent---Olga-Ivina-e1l4aku apple: https://podcasts.apple.com/us/podcast/hiring-data-science-talent-olga-ivina/id1541710331?i=1000570846380 spotify: https://open.spotify.com/episode/7ddvA9zNTip5Bt6EYnMNty?si=4fee84a6ad43465d youtube: https://www.youtube.com/watch?v=Af9t9r2b0z0 -season: 9 -short: Hiring Data Science Talent -title: 'How to Hire Data Scientists: Interview Questions, MLOps, AutoML Limits & Inclusive - Hiring' + +description: 'Learn to hire data scientists: interview questions, MLOps insights and inclusive hiring tactics to assess technical depth, AutoML limits and build better teams.' +intro: 'How do you hire the right data scientists today—balancing algorithmic depth, MLOps skills, and inclusive hiring practices? In this episode, Olga Ivina, Delivery Data Science Director at Microsoft and former Deloitte consultant with a Ph.D. and 16+ years in AI, walks through practical strategies for recruiting strong data science talent.

Olga draws on her journey from applied mathematics and air pollution research to leading delivery teams to explain core hiring criteria: technical excellence, growth mindset, communication, and humility. We cover concrete interview questions and diagnostic problems that reveal algorithmic understanding and assumptions, how to structure coding and analytical tasks, and ways to assess role fit between mathematical expertise and engineering skills. The conversation also addresses the rise of MLOps, realistic limits of AutoML and the human-in-the-loop, career path trade-offs, and interviewing candidates with employment gaps.

If you’re hiring data scientists or building interview processes, this episode delivers actionable frameworks, sample diagnostic questions, and inclusive hiring tips—language to avoid in job posts and strategies to attract diverse candidates—so you can evaluate both technical depth and practical delivery capability.' +topics: +- data science +- career growth +- hiring +- MLOps +dateadded: 2022-07-22 + +duration: PT00H58M12S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=0 + endOffset: 104 +- name: 'Career Beginnings: Applied mathematics, forecasting, and consulting' + startOffset: 104 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=104 + endOffset: 385 +- name: 'PhD Research: Air pollution modeling and conformal prediction' + startOffset: 385 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=385 + endOffset: 510 +- name: 'Current Role: Leading delivery data science teams and startup support' + startOffset: 510 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=510 + endOffset: 638 +- name: 'Evolution of Data Science: Skill changes and rise of MLOps' + startOffset: 638 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=638 + endOffset: 889 +- name: 'Core Hiring Criteria: Technical excellence and growth mindset' + startOffset: 889 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=889 + endOffset: 945 +- name: 'Technical Depth: Demonstrating algorithmic understanding and assumptions' + startOffset: 945 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=945 + endOffset: 1083 +- name: 'Attitude & Motivation: Assessing passion, humility, and communication' + startOffset: 1083 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1083 + endOffset: 1216 +- name: 'Podcasting as Learning: Conversations that shape career perspectives' + startOffset: 1216 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1216 + endOffset: 1381 +- name: 'Staying Current: Sources for data science and engineering updates' + startOffset: 1381 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1381 + endOffset: 1521 +- name: 'Technical Interviews: Coding, analytical tasks, and objective criteria' + startOffset: 1521 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1521 + endOffset: 1712 +- name: 'Diagnostic Questions: Sample problems that reveal depth of knowledge' + startOffset: 1712 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1712 + endOffset: 1875 +- name: 'Foundational Skills: Descriptive statistics and recommended reading' + startOffset: 1875 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1875 + endOffset: 1952 +- name: 'Role Fit: Hiring for mathematical expertise versus engineering skills' + startOffset: 1952 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1952 + endOffset: 2264 +- name: 'AutoML & Automation: Limits of AutoML and the human-in-the-loop' + startOffset: 2264 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2264 + endOffset: 2529 +- name: 'Career Paths: Individual contributor vs management trade-offs' + startOffset: 2529 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2529 + endOffset: 2737 +- name: 'Career Transition: From data analyst to data scientist' + startOffset: 2737 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2737 + endOffset: 2826 +- name: 'Diversity Hiring: Strategies to attract female data science talent' + startOffset: 2826 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2826 + endOffset: 3233 +- name: 'Inclusive Job Posts: Language, requirements, and avoiding discouraging wording' + startOffset: 3233 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3233 + endOffset: 3391 +- name: 'Employment Gaps: Evaluating candidates with long CV breaks' + startOffset: 3391 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3391 + endOffset: 3553 +- name: Episode Wrap-up & Resources + startOffset: 3553 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3553 + endOffset: 3492 + transcript: - header: Episode Introduction - header: 'Career Beginnings: Applied mathematics, forecasting, and consulting' @@ -1269,112 +1366,6 @@ transcript: sec: 3596 time: '59:56' who: Olga -description: 'Learn to hire data scientists: interview questions, MLOps insights and - inclusive hiring tactics to assess technical depth, AutoML limits and build better - teams.' -intro: 'How do you hire the right data scientists today—balancing algorithmic depth, - MLOps skills, and inclusive hiring practices? In this episode, Olga Ivina, Delivery - Data Science Director at Microsoft and former Deloitte consultant with a Ph.D. and - 16+ years in AI, walks through practical strategies for recruiting strong data science - talent.

Olga draws on her journey from applied mathematics and air pollution - research to leading delivery teams to explain core hiring criteria: technical excellence, - growth mindset, communication, and humility. We cover concrete interview questions - and diagnostic problems that reveal algorithmic understanding and assumptions, how - to structure coding and analytical tasks, and ways to assess role fit between mathematical - expertise and engineering skills. The conversation also addresses the rise of MLOps, - realistic limits of AutoML and the human-in-the-loop, career path trade-offs, and - interviewing candidates with employment gaps.

If you’re hiring data scientists - or building interview processes, this episode delivers actionable frameworks, sample - diagnostic questions, and inclusive hiring tips—language to avoid in job posts and - strategies to attract diverse candidates—so you can evaluate both technical depth - and practical delivery capability.' -dateadded: '2022-07-22' -duration: PT00H58M12S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=0 - endOffset: 104 -- name: 'Career Beginnings: Applied mathematics, forecasting, and consulting' - startOffset: 104 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=104 - endOffset: 385 -- name: 'PhD Research: Air pollution modeling and conformal prediction' - startOffset: 385 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=385 - endOffset: 510 -- name: 'Current Role: Leading delivery data science teams and startup support' - startOffset: 510 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=510 - endOffset: 638 -- name: 'Evolution of Data Science: Skill changes and rise of MLOps' - startOffset: 638 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=638 - endOffset: 889 -- name: 'Core Hiring Criteria: Technical excellence and growth mindset' - startOffset: 889 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=889 - endOffset: 945 -- name: 'Technical Depth: Demonstrating algorithmic understanding and assumptions' - startOffset: 945 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=945 - endOffset: 1083 -- name: 'Attitude & Motivation: Assessing passion, humility, and communication' - startOffset: 1083 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1083 - endOffset: 1216 -- name: 'Podcasting as Learning: Conversations that shape career perspectives' - startOffset: 1216 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1216 - endOffset: 1381 -- name: 'Staying Current: Sources for data science and engineering updates' - startOffset: 1381 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1381 - endOffset: 1521 -- name: 'Technical Interviews: Coding, analytical tasks, and objective criteria' - startOffset: 1521 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1521 - endOffset: 1712 -- name: 'Diagnostic Questions: Sample problems that reveal depth of knowledge' - startOffset: 1712 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1712 - endOffset: 1875 -- name: 'Foundational Skills: Descriptive statistics and recommended reading' - startOffset: 1875 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1875 - endOffset: 1952 -- name: 'Role Fit: Hiring for mathematical expertise versus engineering skills' - startOffset: 1952 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1952 - endOffset: 2264 -- name: 'AutoML & Automation: Limits of AutoML and the human-in-the-loop' - startOffset: 2264 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2264 - endOffset: 2529 -- name: 'Career Paths: Individual contributor vs management trade-offs' - startOffset: 2529 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2529 - endOffset: 2737 -- name: 'Career Transition: From data analyst to data scientist' - startOffset: 2737 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2737 - endOffset: 2826 -- name: 'Diversity Hiring: Strategies to attract female data science talent' - startOffset: 2826 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2826 - endOffset: 3233 -- name: 'Inclusive Job Posts: Language, requirements, and avoiding discouraging wording' - startOffset: 3233 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3233 - endOffset: 3391 -- name: 'Employment Gaps: Evaluating candidates with long CV breaks' - startOffset: 3391 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3391 - endOffset: 3553 -- name: Episode Wrap-up & Resources - startOffset: 3553 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3553 - endOffset: 3492 --- Links: diff --git a/_podcast/s09e05-data-scientists-at-work.md b/_podcast/how-to-break-into-data-science.md similarity index 98% rename from _podcast/s09e05-data-scientists-at-work.md rename to _podcast/how-to-break-into-data-science.md index f0f60d69..de46f0b2 100644 --- a/_podcast/s09e05-data-scientists-at-work.md +++ b/_podcast/how-to-break-into-data-science.md @@ -1,20 +1,124 @@ --- +title: 'Data Science Career Playbook: Job Hunt, Portfolios, DALL·E 2 & Overcoming FOMO' +short: Data Scientists at Work +season: 9 episode: 5 guests: - misraturp +image: images/podcast/s09e05-data-scientists-at-work.jpg ids: anchor: Data-Scientists-at-Work---Msra-Turp-e1k7pbn youtube: oUycqtMoYr8 -image: images/podcast/s09e05-data-scientists-at-work.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Scientists-at-Work---Msra-Turp-e1k7pbn apple: https://podcasts.apple.com/us/podcast/data-scientists-at-work-m%C4%B1sra-turp/id1541710331?i=1000567625873 spotify: https://open.spotify.com/episode/1RSUsWDOBDD4sNDruEbnEY youtube: https://www.youtube.com/watch?v=oUycqtMoYr8 -season: 9 -short: Data Scientists at Work -title: 'Data Science Career Playbook: Job Hunt, Portfolios, DALL·E 2 & Overcoming - FOMO' + +description: 'Master data science job hunt and portfolio tactics: actionable projects, recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster.' +intro: How do you actually break into data science, build a portfolio that gets interviews, and stay sane while every new AI model vies for your attention? In this episode Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI (founder of “So you want to be a data scientist?”) — walks through a practical career playbook for job hunting, portfolio building, and coping with FOMO and imposter syndrome.

We cover Mısra’s career path from big data engineering to developer advocacy, what a data scientist’s day‑to‑day looks like, and the typical deliverables hiring managers expect (models, pipelines, reports, presentations). She explains role variants (consultant, in‑house, freelance), tradeoffs between generalist and specialist tracks, and when a master’s or PhD matters. You’ll get concrete job‑hunt tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why real‑world datasets (like NYC Open Data) matter. The episode also includes a clear, high‑level overview of DALL·E 2 and diffusion models, plus strategies for staying current (conferences vs social media) and knowing when a new framework is “good enough.”

Listen to learn actionable steps to refine your portfolio, present data science value to stakeholders, and manage FOMO while advancing your career +topics: +- data science +- career growth +- job search +dateadded: 2022-06-25 + +duration: PT01H04M12S + +quotableClips: +- name: Episode Introduction + startOffset: 67 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=67 + endOffset: 117 +- name: 'Misra Career Path: From Big Data Engineering to Content Creator' + startOffset: 117 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=117 + endOffset: 251 +- name: Transition to Developer Advocate and Content Work + startOffset: 251 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=251 + endOffset: 389 +- name: 'Data Scientist Day‑to‑Day: Explaining the Role to Non‑Tech Audiences' + startOffset: 389 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=389 + endOffset: 541 +- name: 'Deliverables: Trained Models, Pipelines, Reports, and Presentations' + startOffset: 541 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=541 + endOffset: 658 +- name: 'Role Variants: Consultant, In‑House, and Freelance Responsibilities' + startOffset: 658 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=658 + endOffset: 849 +- name: Unrealistic Expectations of Data Scientists in Industry + startOffset: 849 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=849 + endOffset: 943 +- name: 'Keeping Current with AI: Managing FOMA (Fear of Missing Out)' + startOffset: 943 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=943 + endOffset: 1221 +- name: 'DALL·E 2 Overview: Text‑to‑Image Capabilities' + startOffset: 1221 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1221 + endOffset: 1301 +- name: 'Diffusion Models: High‑Level Explanation' + startOffset: 1301 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1301 + endOffset: 1659 +- name: 'Staying Updated: Value of Industry Conferences over Social Media' + startOffset: 1659 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1659 + endOffset: 1811 +- name: 'Major Challenge: Communicating Data Science Value to Stakeholders' + startOffset: 1811 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1811 + endOffset: 2131 +- name: 'FOMA and Imposter Syndrome: Causes and Coping Strategies' + startOffset: 2131 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2131 + endOffset: 2412 +- name: 'Learning a New Framework: Knowing When It''s "Good Enough"' + startOffset: 2412 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2412 + endOffset: 2567 +- name: 'Preferred Setup: Advantages of In‑House Data Science Roles' + startOffset: 2567 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2567 + endOffset: 2853 +- name: 'Career Tradeoffs: Generalist Versus Specialist Paths' + startOffset: 2853 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2853 + endOffset: 3032 +- name: 'Breaking In: Job‑Hunting Strategies for Entry‑Level Data Scientists' + startOffset: 3032 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3032 + endOffset: 3271 +- name: 'Catching Recruiter Attention: Research, Questions, and Relevant Projects' + startOffset: 3271 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3271 + endOffset: 3429 +- name: 'Portfolio Projects: What Hiring Managers Really Look For' + startOffset: 3429 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3429 + endOffset: 3494 +- name: 'Real‑World Datasets: Using NYC Open Data and Dirty Data Examples' + startOffset: 3494 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3494 + endOffset: 3702 +- name: 'Degrees vs Experience: When a Master''s or PhD Matters' + startOffset: 3702 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3702 + endOffset: 3868 +- name: Where to Find Misra Online and Recommended Resources + startOffset: 3868 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3868 + endOffset: 3903 +- name: Episode Closing and Further Links + startOffset: 3903 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3903 + endOffset: 3852 + transcript: - header: Episode Introduction - line: Hi, everyone. This week, we'll talk about the work of data scientists and @@ -1301,119 +1405,6 @@ transcript: sec: 3919 time: '1:05:19' who: Misra -description: 'Master data science job hunt and portfolio tactics: actionable projects, - recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster.' -intro: How do you actually break into data science, build a portfolio that gets interviews, - and stay sane while every new AI model vies for your attention? In this episode - Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI - (founder of “So you want to be a data scientist?”) — walks through a practical career - playbook for job hunting, portfolio building, and coping with FOMO and imposter - syndrome.

We cover Mısra’s career path from big data engineering to developer - advocacy, what a data scientist’s day‑to‑day looks like, and the typical deliverables - hiring managers expect (models, pipelines, reports, presentations). She explains - role variants (consultant, in‑house, freelance), tradeoffs between generalist and - specialist tracks, and when a master’s or PhD matters. You’ll get concrete job‑hunt - tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why - real‑world datasets (like NYC Open Data) matter. The episode also includes a clear, - high‑level overview of DALL·E 2 and diffusion models, plus strategies for staying - current (conferences vs social media) and knowing when a new framework is “good - enough.”

Listen to learn actionable steps to refine your portfolio, present - data science value to stakeholders, and manage FOMO while advancing your career. -dateadded: '2022-06-25' -duration: PT01H04M12S -quotableClips: -- name: Episode Introduction - startOffset: 67 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=67 - endOffset: 117 -- name: 'Misra Career Path: From Big Data Engineering to Content Creator' - startOffset: 117 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=117 - endOffset: 251 -- name: Transition to Developer Advocate and Content Work - startOffset: 251 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=251 - endOffset: 389 -- name: 'Data Scientist Day‑to‑Day: Explaining the Role to Non‑Tech Audiences' - startOffset: 389 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=389 - endOffset: 541 -- name: 'Deliverables: Trained Models, Pipelines, Reports, and Presentations' - startOffset: 541 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=541 - endOffset: 658 -- name: 'Role Variants: Consultant, In‑House, and Freelance Responsibilities' - startOffset: 658 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=658 - endOffset: 849 -- name: Unrealistic Expectations of Data Scientists in Industry - startOffset: 849 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=849 - endOffset: 943 -- name: 'Keeping Current with AI: Managing FOMA (Fear of Missing Out)' - startOffset: 943 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=943 - endOffset: 1221 -- name: 'DALL·E 2 Overview: Text‑to‑Image Capabilities' - startOffset: 1221 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1221 - endOffset: 1301 -- name: 'Diffusion Models: High‑Level Explanation' - startOffset: 1301 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1301 - endOffset: 1659 -- name: 'Staying Updated: Value of Industry Conferences over Social Media' - startOffset: 1659 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1659 - endOffset: 1811 -- name: 'Major Challenge: Communicating Data Science Value to Stakeholders' - startOffset: 1811 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1811 - endOffset: 2131 -- name: 'FOMA and Imposter Syndrome: Causes and Coping Strategies' - startOffset: 2131 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2131 - endOffset: 2412 -- name: 'Learning a New Framework: Knowing When It''s "Good Enough"' - startOffset: 2412 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2412 - endOffset: 2567 -- name: 'Preferred Setup: Advantages of In‑House Data Science Roles' - startOffset: 2567 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2567 - endOffset: 2853 -- name: 'Career Tradeoffs: Generalist Versus Specialist Paths' - startOffset: 2853 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2853 - endOffset: 3032 -- name: 'Breaking In: Job‑Hunting Strategies for Entry‑Level Data Scientists' - startOffset: 3032 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3032 - endOffset: 3271 -- name: 'Catching Recruiter Attention: Research, Questions, and Relevant Projects' - startOffset: 3271 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3271 - endOffset: 3429 -- name: 'Portfolio Projects: What Hiring Managers Really Look For' - startOffset: 3429 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3429 - endOffset: 3494 -- name: 'Real‑World Datasets: Using NYC Open Data and Dirty Data Examples' - startOffset: 3494 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3494 - endOffset: 3702 -- name: 'Degrees vs Experience: When a Master''s or PhD Matters' - startOffset: 3702 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3702 - endOffset: 3868 -- name: Where to Find Misra Online and Recommended Resources - startOffset: 3868 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3868 - endOffset: 3903 -- name: Episode Closing and Further Links - startOffset: 3903 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3903 - endOffset: 3852 --- Links: diff --git a/_podcast/s12e07-navigating-career-changes-in-machine-learning.md b/_podcast/how-to-grow-your-ml-engineering-career.md similarity index 97% rename from _podcast/s12e07-navigating-career-changes-in-machine-learning.md rename to _podcast/how-to-grow-your-ml-engineering-career.md index 40f58e58..f8e00b4a 100644 --- a/_podcast/s12e07-navigating-career-changes-in-machine-learning.md +++ b/_podcast/how-to-grow-your-ml-engineering-career.md @@ -1,20 +1,126 @@ --- +title: 'How to Grow Your ML Engineering Career: Platform Work, LLM Workflows & Debugging Skills' +short: How to Grow Your ML Engineering Career +season: 12 episode: 7 guests: - krzysztofszafanek +image: images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg ids: anchor: Navigating-Career-Changes-in-Machine-Learning---Chris-Szafranek-e1ucvn2 youtube: cUxZBXQgZaU -image: images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Navigating-Career-Changes-in-Machine-Learning---Chris-Szafranek-e1ucvn2 apple: https://podcasts.apple.com/us/podcast/navigating-career-changes-in-machine-learning-chris/id1541710331?i=1000597921745 spotify: https://open.spotify.com/episode/1mDlJi7vfLeJgIZStQ4G90?si=Spd04VwmSh2zZCgZzLIPbA youtube: https://www.youtube.com/watch?v=cUxZBXQgZaU -season: 12 -short: Navigating Career Changes in Machine Learning -title: 'From Web and Game Dev to ML Platforms and LLMs: Career Transitions, Prompt - Engineering and Debugging' + +description: Discover career transitions into ML, prompt engineering and LLMs—practical debugging tips, transferable skills, hiring insights, and real platform lessons +intro: How do you move from web and game development into building machine learning platforms and working with LLMs—and what practical skills carry over? In this episode Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, gaming and online retail, and currently an ML Platform engineer and internal consultant at Zalando, answers that question through concrete examples and career lessons.

We trace Krzysztof’s path from HTML5, Objective‑C, Swift and Unity to Python, ML platform work (the zflow library and pipeline architecture), and hands‑on experiments with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions between stacks and roles, platform consulting—training, onboarding and user support—prompt engineering tips, debugging strategies (rubber ducking, divide‑and‑conquer), and a real Postgres optimization troubleshooting case. He also discusses transferable skills like SQL, Git and shell, T‑shaped expertise, hiring dynamics, and how to get unstuck with ChatGPT and problem decomposition.

Listen to gain practical guidance on ML platforms, prompt engineering, debugging techniques, and career strategy for transitioning into ML and LLM work—plus actionable resources and prioritization tactics you can apply immediately +topics: +- machine learning +- career transitions +- LLMs +- hiring +- career strategy +dateadded: 2023-02-04 + +duration: PT00H59M42S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=0 + endOffset: 132 +- name: 'Career Overview: Web, Game Development, and Python' + startOffset: 132 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=132 + endOffset: 384 +- name: 'Mobile & Game Development: HTML5, Objective‑C, Swift, and Unity' + startOffset: 384 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=384 + endOffset: 425 +- name: 'Career Transitions: Adapting Between Stacks and Roles' + startOffset: 425 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=425 + endOffset: 606 +- name: Tech Radar & Language Freedom at Zalando + startOffset: 606 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=606 + endOffset: 805 +- name: 'Machine Learning Platform: zflow Library and Pipeline Architecture' + startOffset: 805 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=805 + endOffset: 959 +- name: 'Platform Consulting: Training, Onboarding, and User Support' + startOffset: 959 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=959 + endOffset: 1068 +- name: 'From Engineer to Consultant: Reduced Hands‑on Coding' + startOffset: 1068 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1068 + endOffset: 1106 +- name: 'Sabbatical Focus: Learning, Break, and Exploration' + startOffset: 1106 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1106 + endOffset: 1112 +- name: 'Sabbatical Projects: Diffusion Models, ChatGPT Experiments, and Modal Labs' + startOffset: 1112 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1112 + endOffset: 1321 +- name: 'Large Language Models: Coding Assistance, Architecture Sparring, and Caveats' + startOffset: 1321 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1321 + endOffset: 1606 +- name: 'Prompt Engineering: Practical Tips and People to Follow' + startOffset: 1606 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1606 + endOffset: 1740 +- name: 'Transferable Skills: SQL, Git, Shell, and the Lindy Effect' + startOffset: 1740 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1740 + endOffset: 2014 +- name: 'Troubleshooting Example: Postgres Optimization and Performance Gains' + startOffset: 2014 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2014 + endOffset: 2123 +- name: 'T‑Shaped Expertise: Depth, Breadth, and Career Strategy' + startOffset: 2123 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2123 + endOffset: 2257 +- name: 'Debugging as a Strength: Rubber Duck, Divide‑and‑Conquer, and Mentoring' + startOffset: 2257 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2257 + endOffset: 2692 +- name: 'Learning Motivation: Curiosity, Pomodoro, and Inspirational Content' + startOffset: 2692 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2692 + endOffset: 2917 +- name: 'Getting Unstuck: ChatGPT, Tutorials, and Problem Decomposition' + startOffset: 2917 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2917 + endOffset: 3022 +- name: 'Hiring Dynamics: Job Descriptions and Recruiting Generalists' + startOffset: 3022 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3022 + endOffset: 3263 +- name: 'Job Market Trends: Specialists vs Generalists in Machine Learning' + startOffset: 3263 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3263 + endOffset: 3382 +- name: 'Prioritization Techniques: To‑Do Lists, Deadlines, and Focus' + startOffset: 3382 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3382 + endOffset: 3491 +- name: 'Learning Resources: Books, Documentation, YouTube, and Practical Learning' + startOffset: 3491 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3491 + endOffset: 3632 +- name: Episode Wrap‑up and Key Takeaways + startOffset: 3632 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3632 + endOffset: 3582 + transcript: - header: Podcast Introduction - line: This week, we'll talk about quite a few things – building a machine learning @@ -1224,119 +1330,6 @@ transcript: sec: 3652 time: '1:00:52' who: Chris -description: Discover career transitions into ML, prompt engineering and LLMs—practical - debugging tips, transferable skills, hiring insights, and real platform lessons. -intro: How do you move from web and game development into building machine learning - platforms and working with LLMs—and what practical skills carry over? In this episode - Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, - gaming and online retail, and currently an ML Platform engineer and internal consultant - at Zalando, answers that question through concrete examples and career lessons. -

We trace Krzysztof’s path from HTML5, Objective‑C, Swift and Unity to Python, - ML platform work (the zflow library and pipeline architecture), and hands‑on experiments - with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions - between stacks and roles, platform consulting—training, onboarding and user support—prompt - engineering tips, debugging strategies (rubber ducking, divide‑and‑conquer), and - a real Postgres optimization troubleshooting case. He also discusses transferable - skills like SQL, Git and shell, T‑shaped expertise, hiring dynamics, and how to - get unstuck with ChatGPT and problem decomposition.

Listen to gain practical - guidance on ML platforms, prompt engineering, debugging techniques, and career strategy - for transitioning into ML and LLM work—plus actionable resources and prioritization - tactics you can apply immediately. -dateadded: '2023-02-04' -duration: PT00H59M42S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=0 - endOffset: 132 -- name: 'Career Overview: Web, Game Development, and Python' - startOffset: 132 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=132 - endOffset: 384 -- name: 'Mobile & Game Development: HTML5, Objective‑C, Swift, and Unity' - startOffset: 384 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=384 - endOffset: 425 -- name: 'Career Transitions: Adapting Between Stacks and Roles' - startOffset: 425 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=425 - endOffset: 606 -- name: Tech Radar & Language Freedom at Zalando - startOffset: 606 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=606 - endOffset: 805 -- name: 'Machine Learning Platform: zflow Library and Pipeline Architecture' - startOffset: 805 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=805 - endOffset: 959 -- name: 'Platform Consulting: Training, Onboarding, and User Support' - startOffset: 959 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=959 - endOffset: 1068 -- name: 'From Engineer to Consultant: Reduced Hands‑on Coding' - startOffset: 1068 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1068 - endOffset: 1106 -- name: 'Sabbatical Focus: Learning, Break, and Exploration' - startOffset: 1106 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1106 - endOffset: 1112 -- name: 'Sabbatical Projects: Diffusion Models, ChatGPT Experiments, and Modal Labs' - startOffset: 1112 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1112 - endOffset: 1321 -- name: 'Large Language Models: Coding Assistance, Architecture Sparring, and Caveats' - startOffset: 1321 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1321 - endOffset: 1606 -- name: 'Prompt Engineering: Practical Tips and People to Follow' - startOffset: 1606 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1606 - endOffset: 1740 -- name: 'Transferable Skills: SQL, Git, Shell, and the Lindy Effect' - startOffset: 1740 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1740 - endOffset: 2014 -- name: 'Troubleshooting Example: Postgres Optimization and Performance Gains' - startOffset: 2014 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2014 - endOffset: 2123 -- name: 'T‑Shaped Expertise: Depth, Breadth, and Career Strategy' - startOffset: 2123 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2123 - endOffset: 2257 -- name: 'Debugging as a Strength: Rubber Duck, Divide‑and‑Conquer, and Mentoring' - startOffset: 2257 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2257 - endOffset: 2692 -- name: 'Learning Motivation: Curiosity, Pomodoro, and Inspirational Content' - startOffset: 2692 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2692 - endOffset: 2917 -- name: 'Getting Unstuck: ChatGPT, Tutorials, and Problem Decomposition' - startOffset: 2917 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2917 - endOffset: 3022 -- name: 'Hiring Dynamics: Job Descriptions and Recruiting Generalists' - startOffset: 3022 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3022 - endOffset: 3263 -- name: 'Job Market Trends: Specialists vs Generalists in Machine Learning' - startOffset: 3263 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3263 - endOffset: 3382 -- name: 'Prioritization Techniques: To‑Do Lists, Deadlines, and Focus' - startOffset: 3382 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3382 - endOffset: 3491 -- name: 'Learning Resources: Books, Documentation, YouTube, and Practical Learning' - startOffset: 3491 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3491 - endOffset: 3632 -- name: Episode Wrap‑up and Key Takeaways - startOffset: 3632 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3632 - endOffset: 3582 --- Links: diff --git a/_podcast/s08e02-hacking-your-data-career.md b/_podcast/how-to-stand-out-in-data-science.md similarity index 98% rename from _podcast/s08e02-hacking-your-data-career.md rename to _podcast/how-to-stand-out-in-data-science.md index 157d2ece..aa851e34 100644 --- a/_podcast/s08e02-hacking-your-data-career.md +++ b/_podcast/how-to-stand-out-in-data-science.md @@ -1,35 +1,136 @@ --- +title: 'Data Science Career Playbook: Build Unique IoT Portfolios, Explainable AI, OSINT & LinkedIn Growth' +short: Hacking Your Data Career +season: 8 episode: 2 guests: - marijnmarkus -intro: In this episode, Marijn Markus—AI Lead and Managing Data Scientist at Capgemini—shares - how to stand out in data science by combining curiosity, courage, and creativity. - From his unconventional background in sociology and criminology, Marijn explains - how diverse teams outperform homogeneous ones, why proactive problem-solving matters, - and how to challenge hierarchy with data-driven insights.

You'll learn - how to build unique portfolio projects (like time series modeling from a coffee - machine), apply OSINT concepts to modern analytics, and grow your visibility through - a thoughtful LinkedIn strategy. -date: 2025-11-07 -topics: -- data science -- career growth +image: images/podcast/s08e02-hacking-your-data-career.jpg ids: anchor: Hacking-Your-Data-Career---Marijn-Markus-e1gijep youtube: RhSg8ill1So -image: images/podcast/s08e02-hacking-your-data-career.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Hacking-Your-Data-Career---Marijn-Markus-e1gijep apple: https://podcasts.apple.com/us/podcast/hacking-your-data-career-marijn-markus/id1541710331?i=1000555977653 spotify: https://open.spotify.com/episode/6oJsS0vhvAQasLNv3IklQ6 youtube: https://www.youtube.com/watch?v=RhSg8ill1So -season: 8 -short: Hacking Your Data Career -title: 'Data Science Career Playbook: Build Unique IoT Portfolios, Explainable AI, - OSINT & LinkedIn Growth' -description: 'Discover proven strategies to stand out in data science: build unique - portfolio projects, master proactive task selection, and grow visibility with expert - LinkedIn tactics.' + +description: 'Discover proven strategies to stand out in data science: build unique portfolio projects, master proactive task selection, and grow visibility with expert LinkedIn tactics.' +intro: In this episode, Marijn Markus—AI Lead and Managing Data Scientist at Capgemini—shares how to stand out in data science by combining curiosity, courage, and creativity. From his unconventional background in sociology and criminology, Marijn explains how diverse teams outperform homogeneous ones, why proactive problem-solving matters, and how to challenge hierarchy with data-driven insights.

You'll learn how to build unique portfolio projects (like time series modeling from a coffee machine), apply OSINT concepts to modern analytics, and grow your visibility through a thoughtful LinkedIn strategy +topics: +- data science +- career growth +dateadded: 2022-04-01 +date: 2025-11-07 + +duration: PT01H02M16S + +quotableClips: +- name: Episode Introduction & Guest Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=0 + endOffset: 70 +- name: 'From Sociology to Data Science: Election Models, Social Media & Crime Research' + startOffset: 70 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=70 + endOffset: 242 +- name: Diverse Backgrounds as a Competitive Advantage in Data Science + startOffset: 242 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=242 + endOffset: 409 +- name: 'Hiring Pitfalls: Keyword-Driven Recruitment and Role Mismatch' + startOffset: 409 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=409 + endOffset: 462 +- name: 'Curriculum Myth: Rejecting Perfection—Double Down on Unique Strengths' + startOffset: 462 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=462 + endOffset: 511 +- name: 'Core Pillars: Statistics, Programming, and Domain Knowledge' + startOffset: 511 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=511 + endOffset: 676 +- name: 'Qualitative Methods & Interviews: Turning Social Science into Value' + startOffset: 676 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=676 + endOffset: 725 +- name: 'Proactive Task Ownership: Choosing High-Impact Assignments' + startOffset: 725 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=725 + endOffset: 1029 +- name: 'Learning on the Job: Growing into Management and Product Roles' + startOffset: 1029 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1029 + endOffset: 1152 +- name: 'Explainable AI & Risky Insights: Communicating Sensitive Findings' + startOffset: 1152 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1152 + endOffset: 1405 +- name: 'Constructive Pushback: Advising Seniors and Challenging Hierarchies' + startOffset: 1405 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1405 + endOffset: 1703 +- name: 'Stretch Assignments: Bite Off More to Discover Your Limits' + startOffset: 1703 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1703 + endOffset: 1847 +- name: 'Home Automation Demo: Home Assistant on Raspberry Pi' + startOffset: 1847 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1847 + endOffset: 1878 +- name: 'IoT for Plants: Sensors, Bluetooth, Zigbee and Practical Monitoring' + startOffset: 1878 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1878 + endOffset: 2071 +- name: 'Data Pipelines for Home Projects: Storage, Thresholds, and Alerts' + startOffset: 2071 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2071 + endOffset: 2181 +- name: 'Coffee Machine Time Series: Turning Laziness into a Portfolio Project' + startOffset: 2181 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2181 + endOffset: 2269 +- name: 'Portfolio Strategy: Build Unique Projects Instead of Only Doing Kaggle' + startOffset: 2269 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2269 + endOffset: 2425 +- name: 'NGO Impact Work: Predicting & Optimizing Smallholder Farmer Yields' + startOffset: 2425 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2425 + endOffset: 2588 +- name: 'Team Composition: Using Domain Experts and Cross-Disciplinary Skills' + startOffset: 2588 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2588 + endOffset: 2759 +- name: 'OSINT Explained: Social Media, GPS Intel and Modern Information Warfare' + startOffset: 2759 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2759 + endOffset: 3053 +- name: 'OSINT Applications: Mapping Reports, Evidence Gathering & Task Forces' + startOffset: 3053 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3053 + endOffset: 3151 +- name: 'Global Ripple Effects: Ukraine, Grain Shortages and Food Security' + startOffset: 3151 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3151 + endOffset: 3214 +- name: 'Soft Skills & Differentiation: Communication, Presence and Niche Expertise' + startOffset: 3214 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3214 + endOffset: 3450 +- name: 'LinkedIn Growth Strategy: Timing, Content Mix, Hashtags and Comments' + startOffset: 3450 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3450 + endOffset: 3744 +- name: 'Personal Branding Examples: Memes, Authenticity and Content Types' + startOffset: 3744 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3744 + endOffset: 3777 +- name: Episode Wrap-Up & Where to Find Marijn + startOffset: 3777 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3777 + endOffset: 3736 + transcript: - header: Episode Introduction & Guest Welcome - header: 'From Sociology to Data Science: Election Models, Social Media & Crime Research' @@ -1221,111 +1322,4 @@ transcript: sec: 3806 time: '1:03:26' who: Marijn -dateadded: '2022-04-01' -duration: PT01H02M16S -quotableClips: -- name: Episode Introduction & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=0 - endOffset: 70 -- name: 'From Sociology to Data Science: Election Models, Social Media & Crime Research' - startOffset: 70 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=70 - endOffset: 242 -- name: Diverse Backgrounds as a Competitive Advantage in Data Science - startOffset: 242 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=242 - endOffset: 409 -- name: 'Hiring Pitfalls: Keyword-Driven Recruitment and Role Mismatch' - startOffset: 409 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=409 - endOffset: 462 -- name: 'Curriculum Myth: Rejecting Perfection—Double Down on Unique Strengths' - startOffset: 462 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=462 - endOffset: 511 -- name: 'Core Pillars: Statistics, Programming, and Domain Knowledge' - startOffset: 511 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=511 - endOffset: 676 -- name: 'Qualitative Methods & Interviews: Turning Social Science into Value' - startOffset: 676 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=676 - endOffset: 725 -- name: 'Proactive Task Ownership: Choosing High-Impact Assignments' - startOffset: 725 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=725 - endOffset: 1029 -- name: 'Learning on the Job: Growing into Management and Product Roles' - startOffset: 1029 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1029 - endOffset: 1152 -- name: 'Explainable AI & Risky Insights: Communicating Sensitive Findings' - startOffset: 1152 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1152 - endOffset: 1405 -- name: 'Constructive Pushback: Advising Seniors and Challenging Hierarchies' - startOffset: 1405 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1405 - endOffset: 1703 -- name: 'Stretch Assignments: Bite Off More to Discover Your Limits' - startOffset: 1703 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1703 - endOffset: 1847 -- name: 'Home Automation Demo: Home Assistant on Raspberry Pi' - startOffset: 1847 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1847 - endOffset: 1878 -- name: 'IoT for Plants: Sensors, Bluetooth, Zigbee and Practical Monitoring' - startOffset: 1878 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1878 - endOffset: 2071 -- name: 'Data Pipelines for Home Projects: Storage, Thresholds, and Alerts' - startOffset: 2071 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2071 - endOffset: 2181 -- name: 'Coffee Machine Time Series: Turning Laziness into a Portfolio Project' - startOffset: 2181 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2181 - endOffset: 2269 -- name: 'Portfolio Strategy: Build Unique Projects Instead of Only Doing Kaggle' - startOffset: 2269 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2269 - endOffset: 2425 -- name: 'NGO Impact Work: Predicting & Optimizing Smallholder Farmer Yields' - startOffset: 2425 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2425 - endOffset: 2588 -- name: 'Team Composition: Using Domain Experts and Cross-Disciplinary Skills' - startOffset: 2588 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2588 - endOffset: 2759 -- name: 'OSINT Explained: Social Media, GPS Intel and Modern Information Warfare' - startOffset: 2759 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2759 - endOffset: 3053 -- name: 'OSINT Applications: Mapping Reports, Evidence Gathering & Task Forces' - startOffset: 3053 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3053 - endOffset: 3151 -- name: 'Global Ripple Effects: Ukraine, Grain Shortages and Food Security' - startOffset: 3151 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3151 - endOffset: 3214 -- name: 'Soft Skills & Differentiation: Communication, Presence and Niche Expertise' - startOffset: 3214 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3214 - endOffset: 3450 -- name: 'LinkedIn Growth Strategy: Timing, Content Mix, Hashtags and Comments' - startOffset: 3450 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3450 - endOffset: 3744 -- name: 'Personal Branding Examples: Memes, Authenticity and Content Types' - startOffset: 3744 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3744 - endOffset: 3777 -- name: Episode Wrap-Up & Where to Find Marijn - startOffset: 3777 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3777 - endOffset: 3736 --- diff --git a/_podcast/s08e07-from-roasting-coffee-to-backend-development.md b/_podcast/how-to-switch-to-ml-tech-without-experience.md similarity index 98% rename from _podcast/s08e07-from-roasting-coffee-to-backend-development.md rename to _podcast/how-to-switch-to-ml-tech-without-experience.md index 38469619..109cee76 100644 --- a/_podcast/s08e07-from-roasting-coffee-to-backend-development.md +++ b/_podcast/how-to-switch-to-ml-tech-without-experience.md @@ -1,41 +1,129 @@ --- +title: 'How to Switch to Tech: Community Meetups, Open Source Fellowships & Landing an Ecosia Internship' +short: From Roasting Coffee to Backend Development +season: 8 episode: 7 guests: - jessicagreene -intro: How do you switch to tech from a completely different career and actually land - an internship at a mission-driven company? In this episode, Jessica Greene — Senior - Machine Learning Engineer at Ecosia and co-organizer of PyLadies Berlin — walks - through her journey from film and coffee roasting to machine learning, sharing concrete - steps for a career change to tech. We cover the learning path Jessica used (Codecademy, - Andrew Ng, FreeCodeCamp), funding and study time via Germany’s Bildungsgutschein, - and hands-on experience through an open source fellowship (Rails Girls Summer of - Code) and pair programming. You’ll hear how community meetups, PyLadies, and networking - translated into an Ecosia internship, what interviewers notice (inquisitiveness, - creating roles), and practical tips for building system skills (terminal, dual-boot - Linux), overcoming imposter syndrome, and getting started with public speaking and - event organizing. If you’re considering a switch to tech, this episode offers realistic - guidance on open source fellowships, meetups, study resources, and interview strategies - to help you build skills, confidence, and professional connections. +image: images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg ids: anchor: From-Roasting-Coffee-to-Backend-Development---Jessica-Greene-e1i1ten/a-a7s65oj youtube: BKqmNdxsBko -image: images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Roasting-Coffee-to-Backend-Development---Jessica-Greene-e1i1ten/a-a7s65oj apple: https://podcasts.apple.com/us/podcast/from-roasting-coffee-to-backend-development-jessica/id1541710331?i=1000559856138 spotify: https://open.spotify.com/episode/3AnUc03nLbIYS6ichWIrRE?si=momJMlwdTpKFkI0FYQilag youtube: https://www.youtube.com/watch?v=BKqmNdxsBko -season: 8 -short: From Roasting Coffee to Backend Development -title: 'How to Switch to Tech: Community Meetups, Open Source Fellowships & Landing - an Ecosia Internship' -description: 'Discover practical career switch tips: meetups, open source fellowship - & landing an Ecosia internship—networking, study paths, funding, mentorship to get - hired.' + +description: 'Discover practical career switch tips: meetups, open source fellowship & landing an Ecosia internship—networking, study paths, funding, mentorship to get hired.' +intro: How do you switch to tech from a completely different career and actually land an internship at a mission-driven company? In this episode, Jessica Greene — Senior Machine Learning Engineer at Ecosia and co-organizer of PyLadies Berlin — walks through her journey from film and coffee roasting to machine learning, sharing concrete steps for a career change to tech. We cover the learning path Jessica used (Codecademy, Andrew Ng, FreeCodeCamp), funding and study time via Germany’s Bildungsgutschein, and hands-on experience through an open source fellowship (Rails Girls Summer of Code) and pair programming. You’ll hear how community meetups, PyLadies, and networking translated into an Ecosia internship, what interviewers notice (inquisitiveness, creating roles), and practical tips for building system skills (terminal, dual-boot Linux), overcoming imposter syndrome, and getting started with public speaking and event organizing. If you’re considering a switch to tech, this episode offers realistic guidance on open source fellowships, meetups, study resources, and interview strategies to help you build skills, confidence, and professional connections topics: - career switch -- data science +- machine learning +- job search - career growth +dateadded: 2022-05-07 + +duration: PT00H59M32S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=0 + endOffset: 84 +- name: 'Career Origin: From Film & Coffee Roasting to Tech' + startOffset: 84 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=84 + endOffset: 161 +- name: Community Support & Early Conference Exposure (PyLadies, meetups) + startOffset: 161 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=161 + endOffset: 391 +- name: 'Learning Path: Codecademy, Andrew Ng Course & FreeCodeCamp' + startOffset: 391 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=391 + endOffset: 506 +- name: 'Funding Support: German Bildungsgutschein & Structured Study Time' + startOffset: 506 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=506 + endOffset: 713 +- name: 'Open Source Fellowship: Rails Girls Summer of Code & Pair Programming' + startOffset: 713 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=713 + endOffset: 923 +- name: 'Meetups to Internship: Networking That Led to Ecosia' + startOffset: 923 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=923 + endOffset: 993 +- name: 'Interview Impressions: Inquisitiveness & Creating an Internship Role' + startOffset: 993 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=993 + endOffset: 1168 +- name: 'Career Switch Timing: Age, Concerns & Perceptions' + startOffset: 1168 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1168 + endOffset: 1342 +- name: 'Staying Motivated: Community, Mastermind Groups & Meetups' + startOffset: 1342 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1342 + endOffset: 1550 +- name: 'Community Organizing: Event Management, Soft Skills & Leadership' + startOffset: 1550 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1550 + endOffset: 1661 +- name: 'Overcoming Imposter Syndrome: Jupyter Notebook Setup & Peer Support' + startOffset: 1661 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1661 + endOffset: 1932 +- name: 'System Skills: Terminal, Dual‑Boot Linux & Hands‑on Troubleshooting' + startOffset: 1932 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1932 + endOffset: 2054 +- name: 'Community Events: Workshops, Study Groups & Remote Formats' + startOffset: 2054 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2054 + endOffset: 2172 +- name: 'Open Source Hack Evenings: Mentorship with scikit‑learn & Gene.ai' + startOffset: 2172 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2172 + endOffset: 2283 +- name: 'Hybrid Events & Outreach: Remote Reach vs. In‑Person Help' + startOffset: 2283 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2283 + endOffset: 2451 +- name: 'Organizing Benefits: Networking, Company Access & Management Skills' + startOffset: 2451 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2451 + endOffset: 2574 +- name: 'Public Speaking: Start Small, Dry Runs & Crafting a Personal Edge' + startOffset: 2574 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2574 + endOffset: 2811 +- name: 'Handling Q&A: Graceful "I Don''t Know" & Turning Questions into Learning' + startOffset: 2811 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2811 + endOffset: 3032 +- name: 'Speaking ROI: Visibility, Networking & Career Opportunities' + startOffset: 3032 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3032 + endOffset: 3336 +- name: 'Ecosia Overview: Green Search Engine, Tree‑Planting Mission & Backend (Go)' + startOffset: 3336 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3336 + endOffset: 3490 +- name: 'Diversity Challenges: Gender, Privilege & Inclusion in Tech' + startOffset: 3490 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3490 + endOffset: 3593 +- name: 'Connect with Jessica: Twitter, GitHub & PyLadies Slack' + startOffset: 3593 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3593 + endOffset: 3628 +- name: Episode Wrap‑Up and Closing Remarks + startOffset: 3628 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3628 + endOffset: 3572 + transcript: - header: Podcast Introduction - header: 'Career Origin: From Film & Coffee Roasting to Tech' @@ -1141,105 +1229,6 @@ transcript: sec: 3656 time: '1:00:56' who: Jessica -dateadded: '2022-05-07' -duration: PT00H59M32S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=0 - endOffset: 84 -- name: 'Career Origin: From Film & Coffee Roasting to Tech' - startOffset: 84 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=84 - endOffset: 161 -- name: Community Support & Early Conference Exposure (PyLadies, meetups) - startOffset: 161 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=161 - endOffset: 391 -- name: 'Learning Path: Codecademy, Andrew Ng Course & FreeCodeCamp' - startOffset: 391 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=391 - endOffset: 506 -- name: 'Funding Support: German Bildungsgutschein & Structured Study Time' - startOffset: 506 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=506 - endOffset: 713 -- name: 'Open Source Fellowship: Rails Girls Summer of Code & Pair Programming' - startOffset: 713 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=713 - endOffset: 923 -- name: 'Meetups to Internship: Networking That Led to Ecosia' - startOffset: 923 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=923 - endOffset: 993 -- name: 'Interview Impressions: Inquisitiveness & Creating an Internship Role' - startOffset: 993 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=993 - endOffset: 1168 -- name: 'Career Switch Timing: Age, Concerns & Perceptions' - startOffset: 1168 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1168 - endOffset: 1342 -- name: 'Staying Motivated: Community, Mastermind Groups & Meetups' - startOffset: 1342 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1342 - endOffset: 1550 -- name: 'Community Organizing: Event Management, Soft Skills & Leadership' - startOffset: 1550 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1550 - endOffset: 1661 -- name: 'Overcoming Imposter Syndrome: Jupyter Notebook Setup & Peer Support' - startOffset: 1661 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1661 - endOffset: 1932 -- name: 'System Skills: Terminal, Dual‑Boot Linux & Hands‑on Troubleshooting' - startOffset: 1932 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1932 - endOffset: 2054 -- name: 'Community Events: Workshops, Study Groups & Remote Formats' - startOffset: 2054 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2054 - endOffset: 2172 -- name: 'Open Source Hack Evenings: Mentorship with scikit‑learn & Gene.ai' - startOffset: 2172 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2172 - endOffset: 2283 -- name: 'Hybrid Events & Outreach: Remote Reach vs. In‑Person Help' - startOffset: 2283 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2283 - endOffset: 2451 -- name: 'Organizing Benefits: Networking, Company Access & Management Skills' - startOffset: 2451 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2451 - endOffset: 2574 -- name: 'Public Speaking: Start Small, Dry Runs & Crafting a Personal Edge' - startOffset: 2574 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2574 - endOffset: 2811 -- name: 'Handling Q&A: Graceful "I Don''t Know" & Turning Questions into Learning' - startOffset: 2811 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2811 - endOffset: 3032 -- name: 'Speaking ROI: Visibility, Networking & Career Opportunities' - startOffset: 3032 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3032 - endOffset: 3336 -- name: 'Ecosia Overview: Green Search Engine, Tree‑Planting Mission & Backend (Go)' - startOffset: 3336 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3336 - endOffset: 3490 -- name: 'Diversity Challenges: Gender, Privilege & Inclusion in Tech' - startOffset: 3490 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3490 - endOffset: 3593 -- name: 'Connect with Jessica: Twitter, GitHub & PyLadies Slack' - startOffset: 3593 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3593 - endOffset: 3628 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3628 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3628 - endOffset: 3572 --- Links: diff --git a/_podcast/s11e01-from-testing-phones-to-managing-nlp-projects.md b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md similarity index 98% rename from _podcast/s11e01-from-testing-phones-to-managing-nlp-projects.md rename to _podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md index cf28ff39..3fbb05ce 100644 --- a/_podcast/s11e01-from-testing-phones-to-managing-nlp-projects.md +++ b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md @@ -1,20 +1,126 @@ --- +title: 'Transition from QA to Machine Learning & Data Engineering: Projects, Cloud & Interview Prep' +short: From Testing Phones to Managing NLP Projects +season: 11 episode: 1 guests: - alvaronavaspeire +image: images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg ids: anchor: From-Testing-Phones-to-Managing-NLP-Projects---Alvaro-Navas-Peire-e1oj7n8 youtube: -xumbiXOlA8 -image: images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Testing-Phones-to-Managing-NLP-Projects---Alvaro-Navas-Peire-e1oj7n8 apple: https://podcasts.apple.com/us/podcast/from-testing-phones-to-managing-nlp-projects-alvaro/id1541710331?i=1000581943071 spotify: https://open.spotify.com/episode/1LMg70fGthIR2jF4JdmFkb?si=BmEfOtfgSEOpKvp5ENRA2g youtube: https://www.youtube.com/watch?v=-xumbiXOlA8 -season: 11 -short: From Testing Phones to Managing NLP Projects -title: 'Transition from QA to Machine Learning & Data Engineering: Projects, Cloud - & Interview Prep' + +description: 'Master the transition to machine learning & data engineering: build cloud-deployed projects, sharpen interview prep, and revamp your CV to land offers.' +intro: 'How do you move from a QA role into machine learning and data engineering—what projects, cloud skills, and interview prep actually make a difference? In this episode Alvaro Navas Peire walks through his journey from testing Android phones and QA checklists to quitting the industry, taking a gap year, and retraining in machine learning and data engineering. With an informatics engineering background and hands-on experience from postgraduate courses, Neuromatch, and DataTalks’ ML & DE Zoomcamps, Alvaro explains the structured learning path he followed and the portfolio projects (EDA, vegetable image classification, NLP) that proved useful for hiring teams.

We cover practical topics: cloud deployment on Google Cloud, AWS and Azure; using cloud credits and Databricks; how to present projects without underselling them; technical note-taking and GitHub visibility; and role-play for interview soft skills. Alvaro also contrasts research-heavy ML with tooling-focused data engineering and shares CV, portfolio, and negotiation tips. Tune in if you’re planning a transition to machine learning or data engineering and need concrete guidance on projects, cloud experience, and interview preparation.' +topics: +- QA +- machine learning +- data engineering +- career transition +- job search +dateadded: 2022-10-07 + +duration: PT01H01M24S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=0 + endOffset: 75 +- name: Early Life & Informatics Engineering; phone industry beginnings + startOffset: 75 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=75 + endOffset: 221 +- name: 'Phone prototyping and field testing: QA checklists, CTS & RF testing' + startOffset: 221 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=221 + endOffset: 515 +- name: 'Career pivot: quitting QA, gap year, and discovering machine learning' + startOffset: 515 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=515 + endOffset: 812 +- name: 'Structured learning path: postgraduate course, Neuromatch Academy, ML & Data + Engineering Zoomcamps' + startOffset: 812 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=812 + endOffset: 1077 +- name: 'Job search strategy: improving soft skills, hiring a coach, and CV redesign' + startOffset: 1077 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1077 + endOffset: 1358 +- name: 'Interview soft skills: role-play, confidence building, and behavioral prep' + startOffset: 1358 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1358 + endOffset: 1497 +- name: 'Zoomcamp projects: speed-dating EDA and vegetable image-classification' + startOffset: 1497 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1497 + endOffset: 1636 +- name: 'Project deployment experience: Google Cloud, AWS exercises, and cloud credits' + startOffset: 1636 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1636 + endOffset: 1732 +- name: 'Presenting projects objectively: avoid underselling and focus on facts' + startOffset: 1732 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1732 + endOffset: 1898 +- name: 'Interview formats encountered: take-home tasks, time-series exercise, and + NLP-focused hiring' + startOffset: 1898 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1898 + endOffset: 2041 +- name: 'Cloud familiarity in interviews: Google Cloud, Azure, AWS—what mattered' + startOffset: 2041 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2041 + endOffset: 2102 +- name: 'Creating technical notes: long-form Markdown, GitHub gists, and screenshots' + startOffset: 2102 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2102 + endOffset: 2238 +- name: 'Note-taking workflow: video pause-write method, indexes, and VS Code' + startOffset: 2238 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2238 + endOffset: 2613 +- name: 'Role of a coach: negotiation practice, interview framing, and communication' + startOffset: 2613 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2613 + endOffset: 2859 +- name: 'Skill distinction: math-heavy research ML vs. tooling-focused data engineering' + startOffset: 2859 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2859 + endOffset: 2972 +- name: 'Technical interview prep: tailor study to role, projects, and hands-on exercises' + startOffset: 2972 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2972 + endOffset: 3113 +- name: 'Typical workday as an ML project manager: planning, Teams, and task coordination' + startOffset: 3113 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3113 + endOffset: 3252 +- name: 'Production tech stack: Azure, Databricks, AutoKeras, Azure Data Factory, + and SQL' + startOffset: 3252 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3252 + endOffset: 3371 +- name: 'Transition advice: programming background, math, and transferable skills + for ML careers' + startOffset: 3371 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3371 + endOffset: 3626 +- name: 'CV and portfolio tips: visual résumé, GitHub visibility, and sample CV link' + startOffset: 3626 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3626 + endOffset: 3731 +- name: 'Closing remarks & links: CV, GitHub, and LinkedIn resources' + startOffset: 3731 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3731 + endOffset: 3684 + transcript: - header: Podcast Introduction - header: Early Life & Informatics Engineering; phone industry beginnings @@ -1269,118 +1375,6 @@ transcript: sec: 3759 time: '1:02:39' who: Alexey -description: 'Master the transition to machine learning & data engineering: build - cloud-deployed projects, sharpen interview prep, and revamp your CV to land offers.' -intro: 'How do you move from a QA role into machine learning and data engineering—what - projects, cloud skills, and interview prep actually make a difference? In this episode - Alvaro Navas Peire walks through his journey from testing Android phones and QA - checklists to quitting the industry, taking a gap year, and retraining in machine - learning and data engineering. With an informatics engineering background and hands-on - experience from postgraduate courses, Neuromatch, and DataTalks’ ML & DE Zoomcamps, - Alvaro explains the structured learning path he followed and the portfolio projects - (EDA, vegetable image classification, NLP) that proved useful for hiring teams. -

We cover practical topics: cloud deployment on Google Cloud, AWS and Azure; - using cloud credits and Databricks; how to present projects without underselling - them; technical note-taking and GitHub visibility; and role-play for interview soft - skills. Alvaro also contrasts research-heavy ML with tooling-focused data engineering - and shares CV, portfolio, and negotiation tips. Tune in if you’re planning a transition - to machine learning or data engineering and need concrete guidance on projects, - cloud experience, and interview preparation.' -dateadded: '2022-10-07' -duration: PT01H01M24S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=0 - endOffset: 75 -- name: Early Life & Informatics Engineering; phone industry beginnings - startOffset: 75 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=75 - endOffset: 221 -- name: 'Phone prototyping and field testing: QA checklists, CTS & RF testing' - startOffset: 221 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=221 - endOffset: 515 -- name: 'Career pivot: quitting QA, gap year, and discovering machine learning' - startOffset: 515 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=515 - endOffset: 812 -- name: 'Structured learning path: postgraduate course, Neuromatch Academy, ML & Data - Engineering Zoomcamps' - startOffset: 812 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=812 - endOffset: 1077 -- name: 'Job search strategy: improving soft skills, hiring a coach, and CV redesign' - startOffset: 1077 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1077 - endOffset: 1358 -- name: 'Interview soft skills: role-play, confidence building, and behavioral prep' - startOffset: 1358 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1358 - endOffset: 1497 -- name: 'Zoomcamp projects: speed-dating EDA and vegetable image-classification' - startOffset: 1497 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1497 - endOffset: 1636 -- name: 'Project deployment experience: Google Cloud, AWS exercises, and cloud credits' - startOffset: 1636 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1636 - endOffset: 1732 -- name: 'Presenting projects objectively: avoid underselling and focus on facts' - startOffset: 1732 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1732 - endOffset: 1898 -- name: 'Interview formats encountered: take-home tasks, time-series exercise, and - NLP-focused hiring' - startOffset: 1898 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1898 - endOffset: 2041 -- name: 'Cloud familiarity in interviews: Google Cloud, Azure, AWS—what mattered' - startOffset: 2041 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2041 - endOffset: 2102 -- name: 'Creating technical notes: long-form Markdown, GitHub gists, and screenshots' - startOffset: 2102 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2102 - endOffset: 2238 -- name: 'Note-taking workflow: video pause-write method, indexes, and VS Code' - startOffset: 2238 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2238 - endOffset: 2613 -- name: 'Role of a coach: negotiation practice, interview framing, and communication' - startOffset: 2613 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2613 - endOffset: 2859 -- name: 'Skill distinction: math-heavy research ML vs. tooling-focused data engineering' - startOffset: 2859 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2859 - endOffset: 2972 -- name: 'Technical interview prep: tailor study to role, projects, and hands-on exercises' - startOffset: 2972 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2972 - endOffset: 3113 -- name: 'Typical workday as an ML project manager: planning, Teams, and task coordination' - startOffset: 3113 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3113 - endOffset: 3252 -- name: 'Production tech stack: Azure, Databricks, AutoKeras, Azure Data Factory, - and SQL' - startOffset: 3252 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3252 - endOffset: 3371 -- name: 'Transition advice: programming background, math, and transferable skills - for ML careers' - startOffset: 3371 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3371 - endOffset: 3626 -- name: 'CV and portfolio tips: visual résumé, GitHub visibility, and sample CV link' - startOffset: 3626 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3626 - endOffset: 3731 -- name: 'Closing remarks & links: CV, GitHub, and LinkedIn resources' - startOffset: 3731 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3731 - endOffset: 3684 --- Links: diff --git a/_podcast/s09e06-developer-advocacy-engineer-for-open-source.md b/_podcast/hugging-face-contributions-and-nlp-portfolio.md similarity index 97% rename from _podcast/s09e06-developer-advocacy-engineer-for-open-source.md rename to _podcast/hugging-face-contributions-and-nlp-portfolio.md index f5b142ae..76a938a8 100644 --- a/_podcast/s09e06-developer-advocacy-engineer-for-open-source.md +++ b/_podcast/hugging-face-contributions-and-nlp-portfolio.md @@ -1,20 +1,137 @@ --- +title: 'Contribute to Hugging Face & Build an NLP Portfolio: Open Source, Datasets, Spaces' +short: Developer Advocacy Engineer for Open-Source +season: 9 episode: 6 guests: - mervenoyan +image: images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg ids: anchor: Developer-Advocacy-Engineer-for-Open-Source---Merve-Noyan-e1kcm3u youtube: SnEYvF-Ztb8 -image: images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Developer-Advocacy-Engineer-for-Open-Source---Merve-Noyan-e1kcm3u apple: https://podcasts.apple.com/us/podcast/developer-advocacy-engineer-for-open-source-merve-noyan/id1541710331?i=1000568463048 spotify: https://open.spotify.com/episode/5k60LWIwnMpvaIbTaryRv4?si=liHqmXVYT-uB1PO4uB65OQ youtube: https://www.youtube.com/watch?v=SnEYvF-Ztb8 -season: 9 -short: Developer Advocacy Engineer for Open-Source -title: 'Contribute to Hugging Face & Build an NLP Portfolio: Open Source, Datasets, - Spaces' + +description: 'Build an NLP portfolio on Hugging Face: contribute to open source, publish datasets, deploy Spaces demos, gain PR skills and boost hiring odds.' +intro: 'How do you go from beginner projects to contributing to Hugging Face and building an visible NLP portfolio? In this episode, Merve Noyan — Google Developer Expert in Machine Learning, grad student in Data Science, and NLP-focused ML engineer — walks through practical steps for contributing to open source, datasets, and Hugging Face Spaces.

We cover Merve’s transition into NLP, finding open source via contribution sprints and good-first issues, and the nuts-and-bolts of datasets work: canonical datasets, scripts, and CI. Learn how the Hub, TensorFlow & Keras integrations, and model reproducibility features support a reproducible workflow and model registry concepts. Merve explains creating demo apps with Streamlit or Gradio on Spaces, using the Community tab and forums, and how workshops and sprints build confidence.

You’ll also get concrete advice on starting contributions while working full-time, non-code contributions, structured programs like Google Summer of Code and Hacktoberfest, handling PR feedback, and what hiring managers look for on GitHub. Tune in to walk away with actionable steps to contribute to Hugging Face, publish datasets and demos, and build an NLP portfolio recruiters can evaluate.' +topics: +- machine learning +- NLP +- open-source +dateadded: 2022-07-02 + +duration: PT00H58M05S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=0 + endOffset: 85 +- name: Guest Welcome & Episode Overview + startOffset: 85 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=85 + endOffset: 122 +- name: 'Early Career: Industrial Engineering to NLP' + startOffset: 122 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=122 + endOffset: 252 +- name: 'Transition to NLP: First Projects & Sentiment Analysis' + startOffset: 252 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=252 + endOffset: 390 +- name: 'Open Source Discovery: Finding Hugging Face & Contribution Sprints' + startOffset: 390 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=390 + endOffset: 493 +- name: 'Datasets Work: Canonical Datasets, Scripts, and CI Learning' + startOffset: 493 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=493 + endOffset: 631 +- name: 'Contributor Onboarding: Sprints, Good-First Issues, and Confidence Building' + startOffset: 631 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=631 + endOffset: 693 +- name: 'Contributing as a Side Project: Motivation and Timing' + startOffset: 693 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=693 + endOffset: 766 +- name: 'Hugging Face Projects: Tasks, Hub, TensorFlow & Keras Integration' + startOffset: 766 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=766 + endOffset: 942 +- name: 'Model Reproducibility: Hub Features and Model Registry Concepts' + startOffset: 942 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=942 + endOffset: 1057 +- name: 'Spaces & Community Tab: Demos with Streamlit/Gradio and Community Collaboration' + startOffset: 1057 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1057 + endOffset: 1111 +- name: 'Developer Experience: Forum Support, Workshops, and Keras Sprints' + startOffset: 1111 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1111 + endOffset: 1288 +- name: 'Role Balance: Engineering vs. Advocacy Time Split' + startOffset: 1288 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1288 + endOffset: 1406 +- name: 'Hiring Signals: Evaluating Open Source Experience on GitHub' + startOffset: 1406 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1406 + endOffset: 1509 +- name: 'Getting Started with Open Source: Sprints, Documentation, and Non-Code Contributions' + startOffset: 1509 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1509 + endOffset: 1643 +- name: 'Structured Programs: Google Summer of Code and Hacktoberfest' + startOffset: 1643 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1643 + endOffset: 1766 +- name: 'Learning from PRs: Contributing to scikit-learn and Code Quality' + startOffset: 1766 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1766 + endOffset: 1821 +- name: 'Hiring Expectations: Working with Large Codebases and PR Workflows' + startOffset: 1821 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1821 + endOffset: 2003 +- name: 'Handling PR Rejections: Discussions, Design Decisions, and Unit Tests' + startOffset: 2003 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2003 + endOffset: 2282 +- name: 'NLP Learning Resources: Courses, spaCy, Keras Examples, and Transfer Learning' + startOffset: 2282 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2282 + endOffset: 2581 +- name: 'Beginner NLP Projects: Sentiment Analysis and Classification Tasks' + startOffset: 2581 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2581 + endOffset: 3072 +- name: 'Portfolio Advice: Deploying Demos with Streamlit, Gradio, and Hugging Face + Spaces' + startOffset: 3072 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3072 + endOffset: 3349 +- name: 'Content Creation: Twitch Streaming and Podcast Plans' + startOffset: 3349 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3349 + endOffset: 3462 +- name: 'Contact & Community: Slack, Twitter, and DataTalks.club Outreach' + startOffset: 3462 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3462 + endOffset: 3494 +- name: 'Personal Anecdote: Mario Kart at Hugging Face' + startOffset: 3494 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3494 + endOffset: 3551 +- name: Episode Outro & Next Steps + startOffset: 3551 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3551 + endOffset: 3485 + transcript: - header: Podcast Introduction - header: Guest Welcome & Episode Overview @@ -1099,132 +1216,6 @@ transcript: sec: 3570 time: '59:30' who: Alexey -description: 'Build an NLP portfolio on Hugging Face: contribute to open source, publish - datasets, deploy Spaces demos, gain PR skills and boost hiring odds.' -intro: 'How do you go from beginner projects to contributing to Hugging Face and building - an visible NLP portfolio? In this episode, Merve Noyan — Google Developer Expert - in Machine Learning, grad student in Data Science, and NLP-focused ML engineer — - walks through practical steps for contributing to open source, datasets, and Hugging - Face Spaces.

We cover Merve’s transition into NLP, finding open source - via contribution sprints and good-first issues, and the nuts-and-bolts of datasets - work: canonical datasets, scripts, and CI. Learn how the Hub, TensorFlow & Keras - integrations, and model reproducibility features support a reproducible workflow - and model registry concepts. Merve explains creating demo apps with Streamlit or - Gradio on Spaces, using the Community tab and forums, and how workshops and sprints - build confidence.

You’ll also get concrete advice on starting contributions - while working full-time, non-code contributions, structured programs like Google - Summer of Code and Hacktoberfest, handling PR feedback, and what hiring managers - look for on GitHub. Tune in to walk away with actionable steps to contribute to - Hugging Face, publish datasets and demos, and build an NLP portfolio recruiters - can evaluate.' -dateadded: '2022-07-02' -duration: PT00H58M05S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=0 - endOffset: 85 -- name: Guest Welcome & Episode Overview - startOffset: 85 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=85 - endOffset: 122 -- name: 'Early Career: Industrial Engineering to NLP' - startOffset: 122 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=122 - endOffset: 252 -- name: 'Transition to NLP: First Projects & Sentiment Analysis' - startOffset: 252 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=252 - endOffset: 390 -- name: 'Open Source Discovery: Finding Hugging Face & Contribution Sprints' - startOffset: 390 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=390 - endOffset: 493 -- name: 'Datasets Work: Canonical Datasets, Scripts, and CI Learning' - startOffset: 493 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=493 - endOffset: 631 -- name: 'Contributor Onboarding: Sprints, Good-First Issues, and Confidence Building' - startOffset: 631 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=631 - endOffset: 693 -- name: 'Contributing as a Side Project: Motivation and Timing' - startOffset: 693 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=693 - endOffset: 766 -- name: 'Hugging Face Projects: Tasks, Hub, TensorFlow & Keras Integration' - startOffset: 766 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=766 - endOffset: 942 -- name: 'Model Reproducibility: Hub Features and Model Registry Concepts' - startOffset: 942 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=942 - endOffset: 1057 -- name: 'Spaces & Community Tab: Demos with Streamlit/Gradio and Community Collaboration' - startOffset: 1057 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1057 - endOffset: 1111 -- name: 'Developer Experience: Forum Support, Workshops, and Keras Sprints' - startOffset: 1111 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1111 - endOffset: 1288 -- name: 'Role Balance: Engineering vs. Advocacy Time Split' - startOffset: 1288 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1288 - endOffset: 1406 -- name: 'Hiring Signals: Evaluating Open Source Experience on GitHub' - startOffset: 1406 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1406 - endOffset: 1509 -- name: 'Getting Started with Open Source: Sprints, Documentation, and Non-Code Contributions' - startOffset: 1509 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1509 - endOffset: 1643 -- name: 'Structured Programs: Google Summer of Code and Hacktoberfest' - startOffset: 1643 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1643 - endOffset: 1766 -- name: 'Learning from PRs: Contributing to scikit-learn and Code Quality' - startOffset: 1766 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1766 - endOffset: 1821 -- name: 'Hiring Expectations: Working with Large Codebases and PR Workflows' - startOffset: 1821 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1821 - endOffset: 2003 -- name: 'Handling PR Rejections: Discussions, Design Decisions, and Unit Tests' - startOffset: 2003 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2003 - endOffset: 2282 -- name: 'NLP Learning Resources: Courses, spaCy, Keras Examples, and Transfer Learning' - startOffset: 2282 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2282 - endOffset: 2581 -- name: 'Beginner NLP Projects: Sentiment Analysis and Classification Tasks' - startOffset: 2581 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2581 - endOffset: 3072 -- name: 'Portfolio Advice: Deploying Demos with Streamlit, Gradio, and Hugging Face - Spaces' - startOffset: 3072 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3072 - endOffset: 3349 -- name: 'Content Creation: Twitch Streaming and Podcast Plans' - startOffset: 3349 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3349 - endOffset: 3462 -- name: 'Contact & Community: Slack, Twitter, and DataTalks.club Outreach' - startOffset: 3462 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3462 - endOffset: 3494 -- name: 'Personal Anecdote: Mario Kart at Hugging Face' - startOffset: 3494 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3494 - endOffset: 3551 -- name: Episode Outro & Next Steps - startOffset: 3551 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3551 - endOffset: 3485 --- Links: diff --git a/_podcast/s04e06-humans-in-the-loop.md b/_podcast/human-centered-mlops-and-model-monitoring.md similarity index 97% rename from _podcast/s04e06-humans-in-the-loop.md rename to _podcast/human-centered-mlops-and-model-monitoring.md index 96095426..74bc2fee 100644 --- a/_podcast/s04e06-humans-in-the-loop.md +++ b/_podcast/human-centered-mlops-and-model-monitoring.md @@ -1,12 +1,11 @@ --- -title: 'Master Human-Centered MLOps: Stakeholder Buy-In, Monitoring, Debugging & Incident - Response' +title: 'Master Human-Centered MLOps: Stakeholder Buy-In, Monitoring, Debugging & Incident Response' short: Humans in the Loop +season: 4 +episode: 6 guests: - linaweichbrodt image: images/podcast/s04e06-humans-in-the-loop.jpg -season: 4 -episode: 6 ids: youtube: o50j_Ndx2Hg anchor: Humans-in-the-Loop---Lina-Weichbrodt-e14npgp @@ -15,6 +14,134 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Humans-in-the-Loop---Lina-Weichbrodt-e14npgp spotify: https://open.spotify.com/episode/23VxmAEkKUs1kjaludRQAR apple: https://podcasts.apple.com/us/podcast/humans-in-the-loop-lina-weichbrodt/id1541710331?i=1000530535704 + +description: 'Master human-centered MLOps: actionable stakeholder buy-in tactics, model monitoring and incident response playbooks to debug and ship reliable ML.' +intro: 'How do you make MLOps human-centered so stakeholders actually trust models and teams can monitor, debug, and respond to incidents? In this episode, Lina Weichbrodt — a generalist machine learning developer who prototypes data-driven products end-to-end (design, implementation, A/B tests, operations) — walks through practical MLOps strategies that prioritize people as much as pipelines.

We cover a project intake checklist (business case, KPIs, alternatives), how to evaluate whether AI is needed, and scoping problems so outcomes are visible in the UI. Lina explains stakeholder engagement techniques (pairing, availability, converting fears into mitigations), demos vs reporting for buy-in, and building trust through domain understanding and data issue support. You’ll get concrete guidance on incident preparedness and ML incident response: service levels, impact assessment, post-mortems, Five Whys root-cause debugging, and turning findings into tickets. We also dive into model monitoring and detection (live test sets, small A/B tests, feature drift, data monitoring), observability practices, explainability vs debugging, and a credit-scoring case study to illustrate prioritization. Listen to learn repeatable, human-centered tactics for stakeholder buy-in, model monitoring, ML debugging, and incident response.' +topics: +- MLOps +- machine learning +- production +- tools +- communication +dateadded: 2021-08-01 + +duration: PT00H58M19S + +quotableClips: +- name: 'Episode Introduction: Humans in the Loop — MLOps & human-centered ML' + startOffset: 0 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=0 + endOffset: 209 +- name: 'Guest Career Path: Lina Weichbrodt — business to ML engineering' + startOffset: 209 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=209 + endOffset: 290 +- name: 'Project Intake Checklist: business case, KPIs, and alternative solutions' + startOffset: 290 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=290 + endOffset: 583 +- name: 'Evaluate AI Necessity: quantify alternatives before modeling' + startOffset: 583 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=583 + endOffset: 626 +- name: 'Problem Scoping: make business problems specific and UI-visible' + startOffset: 626 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=626 + endOffset: 742 +- name: 'Stakeholder Engagement: pairing, availability, and buy‑in' + startOffset: 742 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=742 + endOffset: 827 +- name: 'Communicating Across Teams: translating technical and business language' + startOffset: 827 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=827 + endOffset: 907 +- name: 'Trust Building: domain understanding and helping with data issues' + startOffset: 907 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=907 + endOffset: 1109 +- name: 'Addressing Concerns: convert stakeholder fears into mitigations and metrics' + startOffset: 1109 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1109 + endOffset: 1356 +- name: 'Demos vs Reporting: what stakeholders need to believe the solution works' + startOffset: 1356 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1356 + endOffset: 1474 +- name: 'Incident Preparedness: service levels and impact assessment with stakeholders' + startOffset: 1474 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1474 + endOffset: 1634 +- name: 'ML Incident Response: post‑mortems and ML‑specific recovery steps' + startOffset: 1634 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1634 + endOffset: 1763 +- name: Live Test Sets & Small A/B Tests for model monitoring and detection + startOffset: 1763 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1763 + endOffset: 1931 +- name: 'Root‑Cause Debugging: applying Five Whys to ML product issues' + startOffset: 1931 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1931 + endOffset: 2201 +- name: 'User Feedback Channels: internal bug reports and product QA processes' + startOffset: 2201 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2201 + endOffset: 2232 +- name: 'Case Study: credit scoring surprises and interpreting feature importance' + startOffset: 2232 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2232 + endOffset: 2300 +- name: 'Prioritizing Bugs: investigating widespread user complaints' + startOffset: 2300 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2300 + endOffset: 2366 +- name: 'Post‑Mortem Evidence: facts, blameless analysis, and investigation steps' + startOffset: 2366 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2366 + endOffset: 2523 +- name: 'Action Items: turning post‑mortems into tickets and process changes' + startOffset: 2523 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2523 + endOffset: 2651 +- name: 'Explainability vs Debugging: when to use Explainable AI tools' + startOffset: 2651 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2651 + endOffset: 2788 +- name: 'Data Monitoring: input distribution, unit changes, and feature drift' + startOffset: 2788 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2788 + endOffset: 2840 +- name: 'Project Evaluation Tools: AI Canvas and online checklists' + startOffset: 2840 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2840 + endOffset: 2968 +- name: 'Observability Practices: logging features, feature stores, and reproducibility' + startOffset: 2968 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2968 + endOffset: 3030 +- name: 'End‑User Research: mystery shopping and direct user testing' + startOffset: 3030 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3030 + endOffset: 3159 +- name: 'Idea Sourcing: proposing ML projects vs refining stakeholder problems' + startOffset: 3159 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3159 + endOffset: 3289 +- name: 'Data Literacy: educating teams and community building inside companies' + startOffset: 3289 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3289 + endOffset: 3388 +- name: 'People Skills & Tactical Hacks: convincing stakeholders and improving data + quality' + startOffset: 3388 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3388 + endOffset: 3566 +- name: 'Wrap‑Up & Contact: where to find Lina and episode closing' + startOffset: 3566 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3566 + endOffset: 3499 + transcript: - header: 'Episode Introduction: Humans in the Loop — MLOps & human-centered ML' - line: Today, we will talk about the human aspect in ML Ops. We have a special guest @@ -1087,138 +1214,4 @@ transcript: sec: 3621 time: '1:00:21' who: Lina -description: 'Master human-centered MLOps: actionable stakeholder buy-in tactics, - model monitoring and incident response playbooks to debug and ship reliable ML.' -intro: 'How do you make MLOps human-centered so stakeholders actually trust models - and teams can monitor, debug, and respond to incidents? In this episode, Lina Weichbrodt - — a generalist machine learning developer who prototypes data-driven products end-to-end - (design, implementation, A/B tests, operations) — walks through practical MLOps - strategies that prioritize people as much as pipelines.

We cover a project - intake checklist (business case, KPIs, alternatives), how to evaluate whether AI - is needed, and scoping problems so outcomes are visible in the UI. Lina explains - stakeholder engagement techniques (pairing, availability, converting fears into - mitigations), demos vs reporting for buy-in, and building trust through domain understanding - and data issue support. You’ll get concrete guidance on incident preparedness and - ML incident response: service levels, impact assessment, post-mortems, Five Whys - root-cause debugging, and turning findings into tickets. We also dive into model - monitoring and detection (live test sets, small A/B tests, feature drift, data monitoring), - observability practices, explainability vs debugging, and a credit-scoring case - study to illustrate prioritization. Listen to learn repeatable, human-centered tactics - for stakeholder buy-in, model monitoring, ML debugging, and incident response.' -dateadded: '2021-08-01' -duration: PT00H58M19S -quotableClips: -- name: 'Episode Introduction: Humans in the Loop — MLOps & human-centered ML' - startOffset: 0 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=0 - endOffset: 209 -- name: 'Guest Career Path: Lina Weichbrodt — business to ML engineering' - startOffset: 209 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=209 - endOffset: 290 -- name: 'Project Intake Checklist: business case, KPIs, and alternative solutions' - startOffset: 290 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=290 - endOffset: 583 -- name: 'Evaluate AI Necessity: quantify alternatives before modeling' - startOffset: 583 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=583 - endOffset: 626 -- name: 'Problem Scoping: make business problems specific and UI-visible' - startOffset: 626 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=626 - endOffset: 742 -- name: 'Stakeholder Engagement: pairing, availability, and buy‑in' - startOffset: 742 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=742 - endOffset: 827 -- name: 'Communicating Across Teams: translating technical and business language' - startOffset: 827 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=827 - endOffset: 907 -- name: 'Trust Building: domain understanding and helping with data issues' - startOffset: 907 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=907 - endOffset: 1109 -- name: 'Addressing Concerns: convert stakeholder fears into mitigations and metrics' - startOffset: 1109 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1109 - endOffset: 1356 -- name: 'Demos vs Reporting: what stakeholders need to believe the solution works' - startOffset: 1356 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1356 - endOffset: 1474 -- name: 'Incident Preparedness: service levels and impact assessment with stakeholders' - startOffset: 1474 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1474 - endOffset: 1634 -- name: 'ML Incident Response: post‑mortems and ML‑specific recovery steps' - startOffset: 1634 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1634 - endOffset: 1763 -- name: Live Test Sets & Small A/B Tests for model monitoring and detection - startOffset: 1763 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1763 - endOffset: 1931 -- name: 'Root‑Cause Debugging: applying Five Whys to ML product issues' - startOffset: 1931 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1931 - endOffset: 2201 -- name: 'User Feedback Channels: internal bug reports and product QA processes' - startOffset: 2201 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2201 - endOffset: 2232 -- name: 'Case Study: credit scoring surprises and interpreting feature importance' - startOffset: 2232 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2232 - endOffset: 2300 -- name: 'Prioritizing Bugs: investigating widespread user complaints' - startOffset: 2300 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2300 - endOffset: 2366 -- name: 'Post‑Mortem Evidence: facts, blameless analysis, and investigation steps' - startOffset: 2366 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2366 - endOffset: 2523 -- name: 'Action Items: turning post‑mortems into tickets and process changes' - startOffset: 2523 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2523 - endOffset: 2651 -- name: 'Explainability vs Debugging: when to use Explainable AI tools' - startOffset: 2651 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2651 - endOffset: 2788 -- name: 'Data Monitoring: input distribution, unit changes, and feature drift' - startOffset: 2788 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2788 - endOffset: 2840 -- name: 'Project Evaluation Tools: AI Canvas and online checklists' - startOffset: 2840 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2840 - endOffset: 2968 -- name: 'Observability Practices: logging features, feature stores, and reproducibility' - startOffset: 2968 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2968 - endOffset: 3030 -- name: 'End‑User Research: mystery shopping and direct user testing' - startOffset: 3030 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3030 - endOffset: 3159 -- name: 'Idea Sourcing: proposing ML projects vs refining stakeholder problems' - startOffset: 3159 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3159 - endOffset: 3289 -- name: 'Data Literacy: educating teams and community building inside companies' - startOffset: 3289 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3289 - endOffset: 3388 -- name: 'People Skills & Tactical Hacks: convincing stakeholders and improving data - quality' - startOffset: 3388 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3388 - endOffset: 3566 -- name: 'Wrap‑Up & Contact: where to find Lina and episode closing' - startOffset: 3566 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3566 - endOffset: 3499 --- diff --git a/_podcast/s13e08-navigating-industrial-data-challenges.md b/_podcast/industrial-data-small-data-production-machine-learning.md similarity index 97% rename from _podcast/s13e08-navigating-industrial-data-challenges.md rename to _podcast/industrial-data-small-data-production-machine-learning.md index 558ad6ae..cee6ff24 100644 --- a/_podcast/s13e08-navigating-industrial-data-challenges.md +++ b/_podcast/industrial-data-small-data-production-machine-learning.md @@ -1,20 +1,152 @@ --- +title: 'Master Industrial Data: Synthetic Tabular Data, Small-Data Modeling, Sensors & MLOps' +short: Navigating Industrial Data Challenges +season: 13 episode: 8 guests: - rosonaeldred +image: images/podcast/s13e08-navigating-industrial-data-challenges.jpg ids: anchor: ow/datatalksclub/episodes/Navigating-Industrial-Data-Challenges---Rosona-Eldred-e225aam youtube: rwuud5wr3J4 -image: images/podcast/s13e08-navigating-industrial-data-challenges.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Navigating-Industrial-Data-Challenges---Rosona-Eldred-e225aam apple: https://podcasts.apple.com/us/podcast/navigating-industrial-data-challenges-rosona-eldred/id1541710331?i=1000608992445 spotify: https://open.spotify.com/episode/1o6rtfFydBVoc0ER5ZUiRQ?si=rkgzEFquSfql4Za6cyjX2g youtube: https://www.youtube.com/watch?v=rwuud5wr3J4 -season: 13 -short: Navigating Industrial Data Challenges -title: 'Master Industrial Data: Synthetic Tabular Data, Small-Data Modeling, Sensors - & MLOps' + +description: 'Master industrial data: learn synthetic tabular data and small-data modeling for sensors & MLOps—optimize QC, predictive maintenance and deploy models faster.' +intro: How do you build reliable machine learning when your datasets are generated by production lines, tiny R&D campaigns, or long-running quality tests instead of millions of web events? In this episode, Rosona Eldred — a mathematician-turned-machine learning engineer leading synthetic tabular data work in an AI Innovation team — walks us through mastering industrial data, from sensors and traceability to small-data modeling and MLOps trade-offs.

We explore what makes industrial data unique (R&D experiments, pilot plants, full production), concrete process examples like blue-paint scale-up and packing-peanuts manufacturing, and long-term quality tests such as the Florida weathering trial. Rosona breaks down sensor choices, batching and granularity challenges, inline versus destructive quality measurements, and how anomaly detection should feed human decisioning. She also covers regulatory and sustainability tracking, reusing historical experiments for reformulation, proxy metrics, optimization trade-offs, and practical methods for tiny-data problems — statistical techniques, transfer learning, and leveraging domain experts. Finally, she contrasts sparse R&D models with streaming, production-scale MLOps.

Listen to gain concrete strategies for synthetic tabular data, small-data modeling, sensor-driven monitoring, and when to adopt production MLOps versus lightweight R&D workflows +topics: +- industrial data +- synthetic tabular data +- MLOps +dateadded: 2023-04-16 + +duration: PT01H01M28S + +quotableClips: +- name: 'Episode Intro: Guest Overview & Synthetic Tabular Data Focus' + startOffset: 83 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=83 + endOffset: 158 +- name: 'Career Pivot: From PhD Algebraic Topology to Industry' + startOffset: 158 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=158 + endOffset: 352 +- name: 'Academic Roots: 3D Topological Models and Research Background' + startOffset: 352 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=352 + endOffset: 468 +- name: 'Mathematical Mindset: Logical Reasoning, Proof-Style Thinking for Data' + startOffset: 468 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=468 + endOffset: 571 +- name: 'Transition Challenges: Seniority vs Domain Experience in Industry' + startOffset: 571 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=571 + endOffset: 645 +- name: 'Defining Industrial Data: Production-Generated Datasets Explained' + startOffset: 645 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=645 + endOffset: 743 +- name: 'Industrial Data Spectrum: R&D Experiments, Pilot Plants, Full Production' + startOffset: 743 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=743 + endOffset: 910 +- name: 'Process Example: Blue Paint R&D, Automation, and Scale-Up' + startOffset: 910 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=910 + endOffset: 968 +- name: 'Long-Term Quality Testing: Weathering & the Florida Paint Test' + startOffset: 968 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=968 + endOffset: 1049 +- name: 'Industrial vs Internet Data: Fixed Sensors and Heterogeneous Equipment' + startOffset: 1049 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1049 + endOffset: 1122 +- name: 'Process Illustration: Packing Peanuts Production and Sensor Choices' + startOffset: 1122 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1122 + endOffset: 1337 +- name: 'Data Granularity & Traceability: Batching, Mixing, and Coarseness Challenges' + startOffset: 1337 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1337 + endOffset: 1493 +- name: 'Business Use Cases: Quality Control, Predictive Maintenance, Monitoring' + startOffset: 1493 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1493 + endOffset: 1657 +- name: 'Quality Measurement Methods: Inline Monitoring vs Destructive Tests' + startOffset: 1657 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1657 + endOffset: 1734 +- name: 'From Alerts to Action: Anomaly Detection and Human Decisioning' + startOffset: 1734 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1734 + endOffset: 1870 +- name: 'Regulatory & Sustainability Tracking: New Requirements and Data Gaps' + startOffset: 1870 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1870 + endOffset: 2135 +- name: 'Tiny Data R&D: Reformulation and Experimental Design After Regulation' + startOffset: 2135 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2135 + endOffset: 2300 +- name: 'Reusing Historical Experiments: Informing Product Redevelopment' + startOffset: 2300 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2300 + endOffset: 2340 +- name: 'Industrial Data Types: Ingredients, Spectra, Material Properties, Tests' + startOffset: 2340 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2340 + endOffset: 2508 +- name: 'Proxy Metrics & Application Tests: Measuring End-Product Behavior' + startOffset: 2508 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2508 + endOffset: 2686 +- name: 'Optimization Problems: Logistics, Mathematical Solvers, Trade-offs' + startOffset: 2686 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2686 + endOffset: 2961 +- name: 'Modeling Small Data: Statistical Methods, Transfer Learning, Domain Experts' + startOffset: 2961 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2961 + endOffset: 3044 +- name: 'MLOps Fit: Sparse R&D Models vs High-Volume Production Deployments' + startOffset: 3044 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3044 + endOffset: 3123 +- name: 'Production-Scale Data: Streaming, Big Data Processing, Real-Time Alerts' + startOffset: 3123 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3123 + endOffset: 3250 +- name: 'Domain Knowledge Value: Tacit Expertise Beyond the CSV' + startOffset: 3250 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3250 + endOffset: 3344 +- name: 'Collaborative Workflow: EDA, Definitions, and Aligning Measurements' + startOffset: 3344 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3344 + endOffset: 3426 +- name: 'Learning Resources: Sensor Datasets and Semiconductor Anomaly Repos' + startOffset: 3426 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3426 + endOffset: 3545 +- name: 'Career Motivation: Choosing Industry Over Academia' + startOffset: 3545 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3545 + endOffset: 3640 +- name: 'Industry Work Culture: Shop Floor Interactions and Research Flavor' + startOffset: 3640 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3640 + endOffset: 3750 +- name: 'Conclusion: Key Takeaways and Next Steps' + startOffset: 3750 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3750 + endOffset: 3688 + transcript: - header: 'Episode Intro: Guest Overview & Synthetic Tabular Data Focus' - line: This week we'll talk about industrial data challenges. We have a special guest @@ -1509,150 +1641,6 @@ transcript: sec: 3771 time: '1:02:51' who: Rosona -description: 'Master industrial data: learn synthetic tabular data and small-data - modeling for sensors & MLOps—optimize QC, predictive maintenance and deploy models - faster.' -intro: How do you build reliable machine learning when your datasets are generated - by production lines, tiny R&D campaigns, or long-running quality tests instead of - millions of web events? In this episode, Rosona Eldred — a mathematician-turned-machine - learning engineer leading synthetic tabular data work in an AI Innovation team — - walks us through mastering industrial data, from sensors and traceability to small-data - modeling and MLOps trade-offs.

We explore what makes industrial data unique - (R&D experiments, pilot plants, full production), concrete process examples like - blue-paint scale-up and packing-peanuts manufacturing, and long-term quality tests - such as the Florida weathering trial. Rosona breaks down sensor choices, batching - and granularity challenges, inline versus destructive quality measurements, and - how anomaly detection should feed human decisioning. She also covers regulatory - and sustainability tracking, reusing historical experiments for reformulation, proxy - metrics, optimization trade-offs, and practical methods for tiny-data problems — - statistical techniques, transfer learning, and leveraging domain experts. Finally, - she contrasts sparse R&D models with streaming, production-scale MLOps.

- Listen to gain concrete strategies for synthetic tabular data, small-data modeling, - sensor-driven monitoring, and when to adopt production MLOps versus lightweight - R&D workflows. -dateadded: '2023-04-16' -duration: PT01H01M28S -quotableClips: -- name: 'Episode Intro: Guest Overview & Synthetic Tabular Data Focus' - startOffset: 83 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=83 - endOffset: 158 -- name: 'Career Pivot: From PhD Algebraic Topology to Industry' - startOffset: 158 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=158 - endOffset: 352 -- name: 'Academic Roots: 3D Topological Models and Research Background' - startOffset: 352 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=352 - endOffset: 468 -- name: 'Mathematical Mindset: Logical Reasoning, Proof-Style Thinking for Data' - startOffset: 468 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=468 - endOffset: 571 -- name: 'Transition Challenges: Seniority vs Domain Experience in Industry' - startOffset: 571 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=571 - endOffset: 645 -- name: 'Defining Industrial Data: Production-Generated Datasets Explained' - startOffset: 645 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=645 - endOffset: 743 -- name: 'Industrial Data Spectrum: R&D Experiments, Pilot Plants, Full Production' - startOffset: 743 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=743 - endOffset: 910 -- name: 'Process Example: Blue Paint R&D, Automation, and Scale-Up' - startOffset: 910 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=910 - endOffset: 968 -- name: 'Long-Term Quality Testing: Weathering & the Florida Paint Test' - startOffset: 968 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=968 - endOffset: 1049 -- name: 'Industrial vs Internet Data: Fixed Sensors and Heterogeneous Equipment' - startOffset: 1049 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1049 - endOffset: 1122 -- name: 'Process Illustration: Packing Peanuts Production and Sensor Choices' - startOffset: 1122 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1122 - endOffset: 1337 -- name: 'Data Granularity & Traceability: Batching, Mixing, and Coarseness Challenges' - startOffset: 1337 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1337 - endOffset: 1493 -- name: 'Business Use Cases: Quality Control, Predictive Maintenance, Monitoring' - startOffset: 1493 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1493 - endOffset: 1657 -- name: 'Quality Measurement Methods: Inline Monitoring vs Destructive Tests' - startOffset: 1657 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1657 - endOffset: 1734 -- name: 'From Alerts to Action: Anomaly Detection and Human Decisioning' - startOffset: 1734 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1734 - endOffset: 1870 -- name: 'Regulatory & Sustainability Tracking: New Requirements and Data Gaps' - startOffset: 1870 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1870 - endOffset: 2135 -- name: 'Tiny Data R&D: Reformulation and Experimental Design After Regulation' - startOffset: 2135 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2135 - endOffset: 2300 -- name: 'Reusing Historical Experiments: Informing Product Redevelopment' - startOffset: 2300 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2300 - endOffset: 2340 -- name: 'Industrial Data Types: Ingredients, Spectra, Material Properties, Tests' - startOffset: 2340 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2340 - endOffset: 2508 -- name: 'Proxy Metrics & Application Tests: Measuring End-Product Behavior' - startOffset: 2508 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2508 - endOffset: 2686 -- name: 'Optimization Problems: Logistics, Mathematical Solvers, Trade-offs' - startOffset: 2686 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2686 - endOffset: 2961 -- name: 'Modeling Small Data: Statistical Methods, Transfer Learning, Domain Experts' - startOffset: 2961 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2961 - endOffset: 3044 -- name: 'MLOps Fit: Sparse R&D Models vs High-Volume Production Deployments' - startOffset: 3044 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3044 - endOffset: 3123 -- name: 'Production-Scale Data: Streaming, Big Data Processing, Real-Time Alerts' - startOffset: 3123 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3123 - endOffset: 3250 -- name: 'Domain Knowledge Value: Tacit Expertise Beyond the CSV' - startOffset: 3250 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3250 - endOffset: 3344 -- name: 'Collaborative Workflow: EDA, Definitions, and Aligning Measurements' - startOffset: 3344 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3344 - endOffset: 3426 -- name: 'Learning Resources: Sensor Datasets and Semiconductor Anomaly Repos' - startOffset: 3426 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3426 - endOffset: 3545 -- name: 'Career Motivation: Choosing Industry Over Academia' - startOffset: 3545 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3545 - endOffset: 3640 -- name: 'Industry Work Culture: Shop Floor Interactions and Research Flavor' - startOffset: 3640 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3640 - endOffset: 3750 -- name: 'Conclusion: Key Takeaways and Next Steps' - startOffset: 3750 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3750 - endOffset: 3688 --- Links: diff --git a/_podcast/s15e02-investing-in-open-source-data-tools.md b/_podcast/investing-in-open-source-developer-tools.md similarity index 97% rename from _podcast/s15e02-investing-in-open-source-data-tools.md rename to _podcast/investing-in-open-source-developer-tools.md index eb46496d..acf2e418 100644 --- a/_podcast/s15e02-investing-in-open-source-data-tools.md +++ b/_podcast/investing-in-open-source-developer-tools.md @@ -1,21 +1,141 @@ --- +title: 'Early-Stage Investing in Open Source Developer Tools: Deal Sourcing, Due Diligence & Commercialization Models' +short: Investing in Open-Source Data Tools +season: 15 episode: 2 guests: - belawiertz -date: 2025-11-07 +image: images/podcast/s15e02-investing-in-open-source-data-tools.jpg ids: anchor: atatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 youtube: 7Bg1JQLnCao -image: images/podcast/s15e02-investing-in-open-source-data-tools.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 apple: https://podcasts.apple.com/us/podcast/investing-in-open-source-data-tools-bela-wiertz/id1541710331?i=1000621912675 spotify: https://open.spotify.com/episode/6mHnZ3IswczK46UP3MBp4d?si=KkrbjXmJSaiWbq3d9BzTUQ youtube: https://www.youtube.com/watch?v=7Bg1JQLnCao -season: 15 -short: Investing in Open-Source Data Tools -title: 'Investing in Open Source Developer, Data & AI Tooling: Go-to-Market, Funding - & Monetization' + +description: "Discover early-stage investing in open-source developer tools: deal sourcing, due diligence, and commercialization models for data, AI & developer tooling startups." +intro: "How do early-stage investors evaluate open-source developer tools — and what signals actually predict commercial success? In this episode, Bela Wiertz — who invests in early-stage open-source startups at a German family office focused on Data, AI & Developer Tooling — breaks down the investor playbook for sourcing, evaluating, and funding OSS companies. Drawing from hands-on deal flow and due diligence experience, Bela reveals how investors screen GitHub repositories, conduct developer interviews, and assess community engagement beyond vanity metrics like stars.

We explore the mechanics of open-source commercialization: open-core versus hosted services, enterprise licensing models, support revenue limitations, and why community-driven distribution creates unique investment opportunities. Bela explains practical due diligence techniques (co-investor reference checks, user adoption analysis, founder-market fit), funding stage dynamics from angel to seed rounds, and geographic investment patterns in European OSS startups. Real-world case studies include Hugging Face's AI ecosystem play, Supabase's Firebase alternative, Kong's API gateway monetization, and Qdrant's vector database positioning.

Listen to understand how investors think about open-source deal sourcing, what community metrics matter for fundraising, and which monetization models actually scale — essential insights for founders building OSS developer tools and investors evaluating this unique category." +dateadded: 2023-07-23 +topics: +- open source +- tools +- investing +- fundraising +- early-stage startups +duration: PT01H01M26S + +quotableClips: +- name: Episode Start & Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=0 + endOffset: 75 +- name: 'Guest Overview: Bela’s Role at a Family Office' + startOffset: 75 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=75 + endOffset: 160 +- name: 'Career Path: From Business Studies to Open Source Investing' + startOffset: 160 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=160 + endOffset: 337 +- name: 'Commercializing Open Source Communities: Company Builder Insights' + startOffset: 337 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=337 + endOffset: 584 +- name: Why Venture Funding Matters for Early-Stage Startups + startOffset: 584 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=584 + endOffset: 822 +- name: 'Open Source as Go-to-Market: Community Trust and Distribution' + startOffset: 822 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=822 + endOffset: 1000 +- name: 'Bottom-Up Distribution: Developer Adoption Feeding Enterprise Sales' + startOffset: 1000 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1000 + endOffset: 1113 +- name: 'Investment Focus: Early-Stage B2B Developer, Data & AI Tooling' + startOffset: 1113 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1113 + endOffset: 1187 +- name: 'Funding Stage Primer: Angels, Pre-Seed, and Seed Explained' + startOffset: 1187 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1187 + endOffset: 1340 +- name: 'Fundraising Strategy: 12–18 Month Runway & Use of Proceeds' + startOffset: 1340 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1340 + endOffset: 1422 +- name: 'Geographic & Sector Focus: Europe and Developer Stack Niches' + startOffset: 1422 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1422 + endOffset: 1519 +- name: 'Investor Types Compared: Angels, VCs, and Family Offices' + startOffset: 1519 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1519 + endOffset: 1828 +- name: 'Check Sizes & Stage Variability: No One-Size-Fits-All' + startOffset: 1828 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1828 + endOffset: 1951 +- name: 'Investment Criteria: Team, Market Need, and Commercialization Plan' + startOffset: 1951 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1951 + endOffset: 2187 +- name: 'Early-Stage Signals: Assessing Problem Validity over PMF' + startOffset: 2187 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2187 + endOffset: 2239 +- name: 'Due Diligence: Founder Calls, User Interviews, and Co-Investor Checks' + startOffset: 2239 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2239 + endOffset: 2341 +- name: 'Community Metrics: Interpreting GitHub Stars vs. Active Engagement' + startOffset: 2341 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2341 + endOffset: 2538 +- name: 'Sourcing Deal Flow: GitHub Screening, Data Tools, and Networking' + startOffset: 2538 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2538 + endOffset: 2649 +- name: 'Daily Sourcing Routine: Allocating Time for Outbound Discovery' + startOffset: 2649 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2649 + endOffset: 2783 +- name: 'Inbound Outreach: How Founders Should Pitch Investors' + startOffset: 2783 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2783 + endOffset: 2968 +- name: 'Open-Core & Licensing Strategies: Mixing Open and Proprietary Code' + startOffset: 2968 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2968 + endOffset: 3069 +- name: 'Monetization Models: Hosted Services, Enterprise Licenses, Support' + startOffset: 3069 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3069 + endOffset: 3287 +- name: 'Scalability Considerations: Limits of Support-Based Revenue' + startOffset: 3287 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3287 + endOffset: 3333 +- name: 'Open Source Outlook: Paths to Market Leadership and Challenges' + startOffset: 3333 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3333 + endOffset: 3446 +- name: 'Recent Open Source Successes: Hugging Face, Supabase, Kong, Qdrant' + startOffset: 3446 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3446 + endOffset: 3618 +- name: Recommended Reading & Resources on Investing and Community Building + startOffset: 3618 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3618 + endOffset: 3739 +- name: Episode Wrap-Up & Closing Remarks + startOffset: 3739 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3739 + endOffset: 3686 + transcript: - header: Episode Start & Welcome - header: 'Guest Overview: Bela’s Role at a Family Office' @@ -1085,135 +1205,6 @@ transcript: sec: 3761 time: '1:02:41' who: Alexey -intro: How do you build a sustainable business around open source developer, data - and AI tooling — and what does it take to fund, commercialize and scale it? In this - episode Bela Wiertz, who works at a German family office investing in VC funds and - early-stage startups with a focus on open-source Data, AI & Developer Tooling, walks - through the practical playbook for founders and investors. Drawing on hands-on sourcing - and evaluation of early-stage open-source companies, Bela covers go-to-market strategies - that leverage community trust and bottom-up developer adoption, the role of venture - funding for angels, pre-seed and seed rounds, and fundraising hygiene like a 12–18 - month runway. We dig into open-core and licensing trade-offs, monetization models - (hosted services, enterprise licenses, support), limits to support-led revenue, - and how to read community metrics — GitHub stars versus active engagement. Bela - also explains sourcing and due diligence techniques (GitHub screening, user interviews, - co-investor checks), geographic and sector focus in Europe, and real-world examples - like Hugging Face, Supabase, Kong and Qdrant. Listen to gain actionable frameworks - for GTM, funding strategy, monetization and early-stage investment signals for open - source developer, data and AI tooling. -description: 'Discover open source go-to-market for developer tooling: funding, monetization - models, community metrics and fundraising tactics to scale early-stage startups.' -dateadded: '2023-07-23' -duration: PT01H01M26S -quotableClips: -- name: Episode Start & Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=0 - endOffset: 75 -- name: 'Guest Overview: Bela’s Role at a Family Office' - startOffset: 75 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=75 - endOffset: 160 -- name: 'Career Path: From Business Studies to Open Source Investing' - startOffset: 160 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=160 - endOffset: 337 -- name: 'Commercializing Open Source Communities: Company Builder Insights' - startOffset: 337 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=337 - endOffset: 584 -- name: Why Venture Funding Matters for Early-Stage Startups - startOffset: 584 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=584 - endOffset: 822 -- name: 'Open Source as Go-to-Market: Community Trust and Distribution' - startOffset: 822 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=822 - endOffset: 1000 -- name: 'Bottom-Up Distribution: Developer Adoption Feeding Enterprise Sales' - startOffset: 1000 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1000 - endOffset: 1113 -- name: 'Investment Focus: Early-Stage B2B Developer, Data & AI Tooling' - startOffset: 1113 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1113 - endOffset: 1187 -- name: 'Funding Stage Primer: Angels, Pre-Seed, and Seed Explained' - startOffset: 1187 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1187 - endOffset: 1340 -- name: 'Fundraising Strategy: 12–18 Month Runway & Use of Proceeds' - startOffset: 1340 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1340 - endOffset: 1422 -- name: 'Geographic & Sector Focus: Europe and Developer Stack Niches' - startOffset: 1422 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1422 - endOffset: 1519 -- name: 'Investor Types Compared: Angels, VCs, and Family Offices' - startOffset: 1519 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1519 - endOffset: 1828 -- name: 'Check Sizes & Stage Variability: No One-Size-Fits-All' - startOffset: 1828 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1828 - endOffset: 1951 -- name: 'Investment Criteria: Team, Market Need, and Commercialization Plan' - startOffset: 1951 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1951 - endOffset: 2187 -- name: 'Early-Stage Signals: Assessing Problem Validity over PMF' - startOffset: 2187 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2187 - endOffset: 2239 -- name: 'Due Diligence: Founder Calls, User Interviews, and Co-Investor Checks' - startOffset: 2239 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2239 - endOffset: 2341 -- name: 'Community Metrics: Interpreting GitHub Stars vs. Active Engagement' - startOffset: 2341 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2341 - endOffset: 2538 -- name: 'Sourcing Deal Flow: GitHub Screening, Data Tools, and Networking' - startOffset: 2538 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2538 - endOffset: 2649 -- name: 'Daily Sourcing Routine: Allocating Time for Outbound Discovery' - startOffset: 2649 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2649 - endOffset: 2783 -- name: 'Inbound Outreach: How Founders Should Pitch Investors' - startOffset: 2783 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2783 - endOffset: 2968 -- name: 'Open-Core & Licensing Strategies: Mixing Open and Proprietary Code' - startOffset: 2968 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2968 - endOffset: 3069 -- name: 'Monetization Models: Hosted Services, Enterprise Licenses, Support' - startOffset: 3069 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3069 - endOffset: 3287 -- name: 'Scalability Considerations: Limits of Support-Based Revenue' - startOffset: 3287 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3287 - endOffset: 3333 -- name: 'Open Source Outlook: Paths to Market Leadership and Challenges' - startOffset: 3333 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3333 - endOffset: 3446 -- name: 'Recent Open Source Successes: Hugging Face, Supabase, Kong, Qdrant' - startOffset: 3446 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3446 - endOffset: 3618 -- name: Recommended Reading & Resources on Investing and Community Building - startOffset: 3618 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3618 - endOffset: 3739 -- name: Episode Wrap-Up & Closing Remarks - startOffset: 3739 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3739 - endOffset: 3686 --- Links: diff --git a/_podcast/s05e08-the-last-mile-in-data.md b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md similarity index 97% rename from _podcast/s05e08-the-last-mile-in-data.md rename to _podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md index 433ab3d6..ba4bad81 100644 --- a/_podcast/s05e08-the-last-mile-in-data.md +++ b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md @@ -1,12 +1,11 @@ --- -title: 'Last-Mile Data Delivery for the Modern Data Stack: Build Data Products to - Boost Adoption' +title: 'Last-Mile Data Delivery for the Modern Data Stack: Build Data Products to Boost Adoption' short: Conquering the Last Mile in Data +season: 5 +episode: 8 guests: - caitlinmoorman image: images/podcast/s05e08-the-last-mile-in-data.jpg -season: 5 -episode: 8 ids: youtube: HfMpG2zpa2I anchor: Conquering-the-Last-Mile-in-Data---Caitlin-Moorman-e1958c1 @@ -15,6 +14,121 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Conquering-the-Last-Mile-in-Data---Caitlin-Moorman-e1958c1 spotify: https://open.spotify.com/episode/6SGjBev8koFDRpDvLV76ZQ apple: https://podcasts.apple.com/us/podcast/conquering-the-last-mile-in-data-caitlin-moorman/id1541710331?i=1000539421886 + +description: Learn last-mile data delivery, build data products for the modern data stack, boost adoption, embed analytics in decisions, and prove measurable ROI +intro: 'How do you turn a powerful modern data stack into analytics people actually use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove Recommerce and former data lead in crowdfunding and self-publishing, walks through the last-mile data delivery challenges that block adoption and offers practical approaches to build data products that drive decisions.

We define the “last mile” in data delivery and contrast modern data stack capabilities with last‑mile execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), treating data as a product, user research to diagnose poor adoption, and simplifying A/B testing reporting for decision‑makers. Caitlin outlines a product‑design mindset—outcome‑first projects, persona-driven abstractions, low‑fidelity prototyping, and embedding metrics in meetings—to prove impact and build advocacy. She also covers cultural barriers, measuring hard‑to‑track work with proxies, scoping narrow slices, recruiting advocates, and using growth marketing as an early use case.

Listen to learn actionable frameworks and experiments you can use to improve data adoption, design usable data products, and measure tangible wins that create momentum in your organization.' +topics: +- data analytics +- tools +- product management +- leadership +dateadded: 2021-10-23 + +duration: PT01H01M58S + +quotableClips: +- name: Episode introduction & Locally Optimistic community + startOffset: 0 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=0 + endOffset: 280 +- name: 'Career journey: private equity to modern data stacks' + startOffset: 280 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=280 + endOffset: 528 +- name: Defining the "last mile" in data delivery + startOffset: 528 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=528 + endOffset: 804 +- name: Modern data stack vs last-mile execution challenges + startOffset: 804 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=804 + endOffset: 1005 +- name: 'Pareto thinking for analytics: 80/20 and high-leverage work' + startOffset: 1005 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1005 + endOffset: 1202 +- name: 'Cultural barriers to adoption: incentives and behavior' + startOffset: 1202 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1202 + endOffset: 1453 +- name: 'Trust and usability: discoverability, interpretability, and data quality' + startOffset: 1453 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1453 + endOffset: 1581 +- name: 'Diagnosing poor adoption: treat data as a product and do user research' + startOffset: 1581 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1581 + endOffset: 1722 +- name: 'A/B testing reporting: simplify statistics for decision-makers' + startOffset: 1722 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1722 + endOffset: 1945 +- name: 'Product-design mindset for analytics: abstractions and personas' + startOffset: 1945 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1945 + endOffset: 2040 +- name: 'Outcome-first design: start projects from the decision you want to enable' + startOffset: 2040 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2040 + endOffset: 2295 +- name: 'Embedding data in meetings: mapping metrics to real decisions' + startOffset: 2295 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2295 + endOffset: 2372 +- name: 'Low-fidelity prototyping: sketches, whiteboards, and rapid feedback' + startOffset: 2372 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2372 + endOffset: 2478 +- name: 'Proving impact: creating measurable wins to build advocacy' + startOffset: 2478 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2478 + endOffset: 2538 +- name: 'Measuring hard-to-track work: proxies, time studies, and practical metrics' + startOffset: 2538 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2538 + endOffset: 2735 +- name: Driving change by scoping narrow slices and building momentum + startOffset: 2735 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2735 + endOffset: 2850 +- name: 'Identifying high-leverage questions: start with financials and cost centers' + startOffset: 2850 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2850 + endOffset: 2965 +- name: 'Handling resistance: recruiting advocates and selling upside' + startOffset: 2965 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2965 + endOffset: 3165 +- name: Growth marketing as a starter use case for data-driven change + startOffset: 3165 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3165 + endOffset: 3226 +- name: 'Interviewing domain experts: curiosity, rapport, and job documentation' + startOffset: 3226 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3226 + endOffset: 3335 +- name: 'Building influence: soft skills and recommended reading' + startOffset: 3335 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3335 + endOffset: 3491 +- name: 'Managing uncertainty: linear projects vs circular (exploratory) projects' + startOffset: 3491 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3491 + endOffset: 3690 +- name: 'Advice for aspiring analysts: curiosity, business impact, and on‑the‑job + learning' + startOffset: 3690 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3690 + endOffset: 3833 +- name: Where to find Caitlin and the Locally Optimistic community + startOffset: 3833 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3833 + endOffset: 3877 +- name: Episode wrap-up and key takeaways + startOffset: 3877 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3877 + endOffset: 3718 + transcript: - header: Episode introduction & Locally Optimistic community - line: This week, we'll talk about the “last mile of data” and we have a special @@ -1022,127 +1136,6 @@ transcript: sec: 3905 time: '1:05:05' who: Caitlin -description: Learn last-mile data delivery, build data products for the modern data - stack, boost adoption, embed analytics in decisions, and prove measurable ROI. -intro: 'How do you turn a powerful modern data stack into analytics people actually - use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove - Recommerce and former data lead in crowdfunding and self-publishing, walks through - the last-mile data delivery challenges that block adoption and offers practical - approaches to build data products that drive decisions.

We define the “last - mile” in data delivery and contrast modern data stack capabilities with last‑mile - execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), - treating data as a product, user research to diagnose poor adoption, and simplifying - A/B testing reporting for decision‑makers. Caitlin outlines a product‑design mindset—outcome‑first - projects, persona-driven abstractions, low‑fidelity prototyping, and embedding metrics - in meetings—to prove impact and build advocacy. She also covers cultural barriers, - measuring hard‑to‑track work with proxies, scoping narrow slices, recruiting advocates, - and using growth marketing as an early use case.

Listen to learn actionable - frameworks and experiments you can use to improve data adoption, design usable data - products, and measure tangible wins that create momentum in your organization.' -dateadded: '2021-10-23' -duration: PT01H01M58S -quotableClips: -- name: Episode introduction & Locally Optimistic community - startOffset: 0 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=0 - endOffset: 280 -- name: 'Career journey: private equity to modern data stacks' - startOffset: 280 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=280 - endOffset: 528 -- name: Defining the "last mile" in data delivery - startOffset: 528 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=528 - endOffset: 804 -- name: Modern data stack vs last-mile execution challenges - startOffset: 804 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=804 - endOffset: 1005 -- name: 'Pareto thinking for analytics: 80/20 and high-leverage work' - startOffset: 1005 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1005 - endOffset: 1202 -- name: 'Cultural barriers to adoption: incentives and behavior' - startOffset: 1202 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1202 - endOffset: 1453 -- name: 'Trust and usability: discoverability, interpretability, and data quality' - startOffset: 1453 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1453 - endOffset: 1581 -- name: 'Diagnosing poor adoption: treat data as a product and do user research' - startOffset: 1581 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1581 - endOffset: 1722 -- name: 'A/B testing reporting: simplify statistics for decision-makers' - startOffset: 1722 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1722 - endOffset: 1945 -- name: 'Product-design mindset for analytics: abstractions and personas' - startOffset: 1945 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1945 - endOffset: 2040 -- name: 'Outcome-first design: start projects from the decision you want to enable' - startOffset: 2040 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2040 - endOffset: 2295 -- name: 'Embedding data in meetings: mapping metrics to real decisions' - startOffset: 2295 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2295 - endOffset: 2372 -- name: 'Low-fidelity prototyping: sketches, whiteboards, and rapid feedback' - startOffset: 2372 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2372 - endOffset: 2478 -- name: 'Proving impact: creating measurable wins to build advocacy' - startOffset: 2478 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2478 - endOffset: 2538 -- name: 'Measuring hard-to-track work: proxies, time studies, and practical metrics' - startOffset: 2538 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2538 - endOffset: 2735 -- name: Driving change by scoping narrow slices and building momentum - startOffset: 2735 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2735 - endOffset: 2850 -- name: 'Identifying high-leverage questions: start with financials and cost centers' - startOffset: 2850 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2850 - endOffset: 2965 -- name: 'Handling resistance: recruiting advocates and selling upside' - startOffset: 2965 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2965 - endOffset: 3165 -- name: Growth marketing as a starter use case for data-driven change - startOffset: 3165 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3165 - endOffset: 3226 -- name: 'Interviewing domain experts: curiosity, rapport, and job documentation' - startOffset: 3226 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3226 - endOffset: 3335 -- name: 'Building influence: soft skills and recommended reading' - startOffset: 3335 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3335 - endOffset: 3491 -- name: 'Managing uncertainty: linear projects vs circular (exploratory) projects' - startOffset: 3491 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3491 - endOffset: 3690 -- name: 'Advice for aspiring analysts: curiosity, business impact, and on‑the‑job - learning' - startOffset: 3690 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3690 - endOffset: 3833 -- name: Where to find Caitlin and the Locally Optimistic community - startOffset: 3833 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3833 - endOffset: 3877 -- name: Episode wrap-up and key takeaways - startOffset: 3877 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3877 - endOffset: 3718 --- Links: diff --git a/_podcast/s04e07-launching-a-startup.md b/_podcast/launch-and-build-retail-startup.md similarity index 97% rename from _podcast/s04e07-launching-a-startup.md rename to _podcast/launch-and-build-retail-startup.md index a7fb2081..de5c74e7 100644 --- a/_podcast/s04e07-launching-a-startup.md +++ b/_podcast/launch-and-build-retail-startup.md @@ -1,11 +1,11 @@ --- title: Build a Grocery Retail OS to Cut Supermarket Food Waste & Scale Your Startup short: 'Launching a Startup: From Idea to First Hire' +season: 4 +episode: 7 guests: - carminepaolino image: images/podcast/s04e07-launching-a-startup.jpg -season: 4 -episode: 7 ids: youtube: s-w8_GDgIlU anchor: Launching-a-Startup-From-Idea-to-First-Hire---Carmine-Paolino-e15sk4i @@ -14,6 +14,139 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Launching-a-Startup-From-Idea-to-First-Hire---Carmine-Paolino-e15sk4i spotify: https://open.spotify.com/episode/2zlqwEOamFD8YVGkf4VsFW apple: https://podcasts.apple.com/us/podcast/launching-a-startup-from-idea-to-first-hire-carmine-paolino/id1541710331?i=1000531945076 + +description: Build a Grocery Retail OS to cut supermarket food waste, master JIT supply-chain forecasting, land pilots & investors, and scale your startup faster +intro: How do you build a grocery retail OS that actually cuts supermarket food waste while scaling a startup? In this episode, Carmine Paolino — CTO and co-founder of FreshFlow and former programmer/researcher in academia and data science — walks through translating technical expertise into a product that solves fresh-product challenges for retailers.

We cover FreshFlow’s mission and early problem discovery (including Edeka and Volg pilots), customer discovery techniques like shadowing store teams and The Mom Test, and how their idea evolved from computer vision to an ordering and inventory forecasting platform. Carmine explains Entrepreneur First’s role in co-founder matching and fundraising, pilot timelines and sales cycle realities, and risks around investor selection and board dynamics. He also shares technical lessons (moving off Kubeflow to managed GCP services), hiring priorities, building a product roadmap toward a grocery retail OS, and leveraging just-in-time supply chain and forecasting to reduce food waste.

Listen to learn practical guidance on pilot programs, prototype-before-pitch validation (banana ripeness demo), co-founder formation, and the operational and technical trade-offs when scaling a startup focused on supermarket food waste reduction +topics: +- startup +- founder +- leadership +- entrepreneurship +- product management +- tools +dateadded: 2021-08-15 + +duration: PT01H07M24S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 106 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=106 + endOffset: 136 +- name: 'Early Career: Programming, Academia, and Data Science' + startOffset: 136 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=136 + endOffset: 306 +- name: 'FreshFlow Overview: CTO Role and Ordering System Mission' + startOffset: 306 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=306 + endOffset: 346 +- name: 'Problem Discovery: Supermarket Fresh-Product Challenges & Edeka Pilot' + startOffset: 346 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=346 + endOffset: 433 +- name: 'Customer Discovery: Shadowing Store Teams and Research Methods (The Mom Test)' + startOffset: 433 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=433 + endOffset: 796 +- name: 'Entrepreneur First Experience: Program Structure and Benefits' + startOffset: 796 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=796 + endOffset: 955 +- name: 'Co-founder Matching: "Edges" Framework for Team Formation' + startOffset: 955 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=955 + endOffset: 1130 +- name: 'Mentorship & Investment Committee: Scoring, Feedback, and EF Phases' + startOffset: 1130 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1130 + endOffset: 1487 +- name: 'Idea Evolution: From Computer Vision App to Ordering Solution' + startOffset: 1487 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1487 + endOffset: 1714 +- name: 'Market Opportunity: Food Waste Impact and Competitive Landscape' + startOffset: 1714 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1714 + endOffset: 1859 +- name: 'Accelerator Value: Networking, Validation, and Founder Support' + startOffset: 1859 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1859 + endOffset: 2004 +- name: 'First Pilots & Clients: Volg and Edeka Engagements' + startOffset: 2004 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2004 + endOffset: 2090 +- name: 'Sales Cycle Realities: Time to First Client and Pilot Timelines' + startOffset: 2090 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2090 + endOffset: 2200 +- name: 'Fundraising Path: EF Investment, Angels, and Demo Day Strategy' + startOffset: 2200 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2200 + endOffset: 2413 +- name: 'Investor Selection Risks: Term Sheets, Board Dynamics, and Fit' + startOffset: 2413 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2413 + endOffset: 2544 +- name: 'Founder Roles: Splitting CTO and CEO Responsibilities' + startOffset: 2544 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2544 + endOffset: 2627 +- name: 'Hiring Strategy: First Hires, Freelancers, Delegation, and Remote Talent' + startOffset: 2627 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2627 + endOffset: 2932 +- name: 'Product Roadmap: Scaling to a Grocery Retail OS and Supply-Chain Expansion' + startOffset: 2932 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2932 + endOffset: 3014 +- name: 'Forecasting & Just-in-Time Supply Chain: Reducing Waste and Inventory' + startOffset: 3014 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3014 + endOffset: 3189 +- name: 'Tech Infrastructure Lesson: Kubeflow Challenges and Choosing Managed Cloud + (GCP)' + startOffset: 3189 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3189 + endOffset: 3351 +- name: 'Startup Advice: Resilience, Focus, and Emotional Intelligence' + startOffset: 3351 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3351 + endOffset: 3429 +- name: 'Hiring Criteria: Prioritizing Motivation and Behavior Over Skills' + startOffset: 3429 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3429 + endOffset: 3554 +- name: 'Sustaining Motivation: Mission-Driven Work on Food Waste and Climate' + startOffset: 3554 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3554 + endOffset: 3665 +- name: 'CTO Readiness: Skill Gaps, Learning, and Using Managed Services' + startOffset: 3665 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3665 + endOffset: 3825 +- name: 'MBA Relevance: Business School Not Required for Early-Stage Startups' + startOffset: 3825 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3825 + endOffset: 3907 +- name: 'Co-founder Imperative: Form Phase Advice and Team Formation Timing' + startOffset: 3907 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3907 + endOffset: 3957 +- name: 'Validating Pre-Existing Ideas: Why EF Helps Even with a Clear Idea' + startOffset: 3957 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3957 + endOffset: 4026 +- name: 'Prototype Before Pitch: Banana Ripeness Demo and Early Technical Traction' + startOffset: 4026 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4026 + endOffset: 4105 +- name: Contact Details & Episode Closing Remarks + startOffset: 4105 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4105 + endOffset: 4044 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we'll talk about building a startup as a technical person. And @@ -1053,144 +1186,6 @@ transcript: sec: 4150 time: '1:09:10' who: Alexey -description: Build a Grocery Retail OS to cut supermarket food waste, master JIT supply-chain - forecasting, land pilots & investors, and scale your startup faster. -intro: How do you build a grocery retail OS that actually cuts supermarket food waste - while scaling a startup? In this episode, Carmine Paolino — CTO and co-founder of - FreshFlow and former programmer/researcher in academia and data science — walks - through translating technical expertise into a product that solves fresh-product - challenges for retailers.

We cover FreshFlow’s mission and early problem - discovery (including Edeka and Volg pilots), customer discovery techniques like - shadowing store teams and The Mom Test, and how their idea evolved from computer - vision to an ordering and inventory forecasting platform. Carmine explains Entrepreneur - First’s role in co-founder matching and fundraising, pilot timelines and sales cycle - realities, and risks around investor selection and board dynamics. He also shares - technical lessons (moving off Kubeflow to managed GCP services), hiring priorities, - building a product roadmap toward a grocery retail OS, and leveraging just-in-time - supply chain and forecasting to reduce food waste.

Listen to learn practical - guidance on pilot programs, prototype-before-pitch validation (banana ripeness demo), - co-founder formation, and the operational and technical trade-offs when scaling - a startup focused on supermarket food waste reduction. -dateadded: '2021-08-15' -duration: PT01H07M24S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 106 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=106 - endOffset: 136 -- name: 'Early Career: Programming, Academia, and Data Science' - startOffset: 136 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=136 - endOffset: 306 -- name: 'FreshFlow Overview: CTO Role and Ordering System Mission' - startOffset: 306 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=306 - endOffset: 346 -- name: 'Problem Discovery: Supermarket Fresh-Product Challenges & Edeka Pilot' - startOffset: 346 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=346 - endOffset: 433 -- name: 'Customer Discovery: Shadowing Store Teams and Research Methods (The Mom Test)' - startOffset: 433 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=433 - endOffset: 796 -- name: 'Entrepreneur First Experience: Program Structure and Benefits' - startOffset: 796 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=796 - endOffset: 955 -- name: 'Co-founder Matching: "Edges" Framework for Team Formation' - startOffset: 955 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=955 - endOffset: 1130 -- name: 'Mentorship & Investment Committee: Scoring, Feedback, and EF Phases' - startOffset: 1130 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1130 - endOffset: 1487 -- name: 'Idea Evolution: From Computer Vision App to Ordering Solution' - startOffset: 1487 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1487 - endOffset: 1714 -- name: 'Market Opportunity: Food Waste Impact and Competitive Landscape' - startOffset: 1714 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1714 - endOffset: 1859 -- name: 'Accelerator Value: Networking, Validation, and Founder Support' - startOffset: 1859 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1859 - endOffset: 2004 -- name: 'First Pilots & Clients: Volg and Edeka Engagements' - startOffset: 2004 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2004 - endOffset: 2090 -- name: 'Sales Cycle Realities: Time to First Client and Pilot Timelines' - startOffset: 2090 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2090 - endOffset: 2200 -- name: 'Fundraising Path: EF Investment, Angels, and Demo Day Strategy' - startOffset: 2200 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2200 - endOffset: 2413 -- name: 'Investor Selection Risks: Term Sheets, Board Dynamics, and Fit' - startOffset: 2413 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2413 - endOffset: 2544 -- name: 'Founder Roles: Splitting CTO and CEO Responsibilities' - startOffset: 2544 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2544 - endOffset: 2627 -- name: 'Hiring Strategy: First Hires, Freelancers, Delegation, and Remote Talent' - startOffset: 2627 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2627 - endOffset: 2932 -- name: 'Product Roadmap: Scaling to a Grocery Retail OS and Supply-Chain Expansion' - startOffset: 2932 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2932 - endOffset: 3014 -- name: 'Forecasting & Just-in-Time Supply Chain: Reducing Waste and Inventory' - startOffset: 3014 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3014 - endOffset: 3189 -- name: 'Tech Infrastructure Lesson: Kubeflow Challenges and Choosing Managed Cloud - (GCP)' - startOffset: 3189 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3189 - endOffset: 3351 -- name: 'Startup Advice: Resilience, Focus, and Emotional Intelligence' - startOffset: 3351 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3351 - endOffset: 3429 -- name: 'Hiring Criteria: Prioritizing Motivation and Behavior Over Skills' - startOffset: 3429 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3429 - endOffset: 3554 -- name: 'Sustaining Motivation: Mission-Driven Work on Food Waste and Climate' - startOffset: 3554 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3554 - endOffset: 3665 -- name: 'CTO Readiness: Skill Gaps, Learning, and Using Managed Services' - startOffset: 3665 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3665 - endOffset: 3825 -- name: 'MBA Relevance: Business School Not Required for Early-Stage Startups' - startOffset: 3825 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3825 - endOffset: 3907 -- name: 'Co-founder Imperative: Form Phase Advice and Team Formation Timing' - startOffset: 3907 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3907 - endOffset: 3957 -- name: 'Validating Pre-Existing Ideas: Why EF Helps Even with a Clear Idea' - startOffset: 3957 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3957 - endOffset: 4026 -- name: 'Prototype Before Pitch: Banana Ripeness Demo and Early Technical Traction' - startOffset: 4026 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4026 - endOffset: 4105 -- name: Contact Details & Episode Closing Remarks - startOffset: 4105 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4105 - endOffset: 4044 --- diff --git a/_podcast/s13e07-mastering-self-learning-in-machine-learning.md b/_podcast/learn-machine-learning-self-taught-bioinformatics.md similarity index 97% rename from _podcast/s13e07-mastering-self-learning-in-machine-learning.md rename to _podcast/learn-machine-learning-self-taught-bioinformatics.md index ea05baeb..04ed2d7b 100644 --- a/_podcast/s13e07-mastering-self-learning-in-machine-learning.md +++ b/_podcast/learn-machine-learning-self-taught-bioinformatics.md @@ -1,22 +1,143 @@ --- +title: 'How to Teach Yourself Bioinformatics & ML: Project-First Learning, Resources, and MLOps' +short: Mastering Self-Learning in Machine Learning +season: 13 episode: 7 guests: - aaishamuhammad +image: images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Mastering-Self-Learning-in-Machine-Learning---Aaisha-Muhammad-e21ud62 youtube: Kc3Puh3UCRQ -image: images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Mastering-Self-Learning-in-Machine-Learning---Aaisha-Muhammad-e21ud62 apple: https://podcasts.apple.com/us/podcast/mastering-self-learning-in-machine-learning-aaisha/id1541710331?i=1000607892159 spotify: https://open.spotify.com/episode/2XdKHrmVuytXd5kzLVSbFn?si=ETbkUdT2Q1yJlKCI-d9Rcg youtube: https://www.youtube.com/watch?v=Kc3Puh3UCRQ -season: 13 -short: Mastering Self-Learning in Machine Learning -title: 'How to Teach Yourself Bioinformatics & ML: Project-First Learning, Resources, - and MLOps' + +description: Learn bioinformatics & machine learning via project-first workflows, dataset-first ideation, study hacks and MLOps deployment tips to gain practical skills +intro: How do you teach yourself bioinformatics and machine learning in a way that leads to real projects and deployable models? In this episode, Aaisha Muhammad — a self-taught bioinformatician, machine learning engineer and scientific illustrator from Johannesburg and a Datatalks.Club ML Zoomcamp graduate — walks through a project-first path for learning bioinformatics and ML. We cover prioritization and avoiding FOMO, open curricula like OSSU, skill mapping with ML Zoomcamp, and practical resource evaluation (free vs paid, syllabus skimming, instructor credibility). Aaisha explains dataset-first project ideation, finding datasets and papers via Google Scholar and PubMed, and building capstone projects such as frog toxicity and landscape classifiers. You’ll hear pragmatic study tactics — self-imposed deadlines, note-taking, time tracking, community study groups, and using ChatGPT as a study companion — plus strategies to approach PhD-level papers while avoiding burnout. For engineers interested in production, the conversation addresses deployment and MLOps basics including Docker and Kubernetes. Tune in to gain concrete guidance on projects, vetted resources, and the study habits that make self-directed bioinformatics and ML learning sustainable +topics: +- bioinformatics +- machine learning +- self-learning +dateadded: 2023-04-08 + +duration: PT00H58M57S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=0 + endOffset: 74 +- name: 'Guest Overview: Aaisha — self-taught bioinformatician, ML engineer, scientific + illustrator' + startOffset: 74 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=74 + endOffset: 137 +- name: 'Early Learning & Homeschooling: Python, web development, and flexible study' + startOffset: 137 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=137 + endOffset: 513 +- name: 'Choosing What to Learn: prioritization, filtering, and avoiding FOMO' + startOffset: 513 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=513 + endOffset: 561 +- name: 'Open Curricula: OSSU pathway for bioinformatics' + startOffset: 561 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=561 + endOffset: 768 +- name: 'Skill Mapping with ML Zoomcamp: building machine learning fundamentals' + startOffset: 768 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=768 + endOffset: 829 +- name: 'Evaluating Resources: syllabus skimming and instructor credibility' + startOffset: 829 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=829 + endOffset: 962 +- name: Free vs Paid Resources and Vetting Paid Courses + startOffset: 962 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=962 + endOffset: 1071 +- name: 'Practical Relevance: identifying industry-useful ML topics (SVM anecdote)' + startOffset: 1071 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1071 + endOffset: 1362 +- name: 'Learning Strategy: balancing theory and project-based practice' + startOffset: 1362 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1362 + endOffset: 1470 +- name: Project Selection & Dataset-First Ideation + startOffset: 1470 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1470 + endOffset: 1555 +- name: 'Research Papers & Dataset Discovery: Google Scholar, PubMed, citation graphs' + startOffset: 1555 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1555 + endOffset: 1718 +- name: 'ML Zoomcamp Experience: why the course appealed and structure' + startOffset: 1718 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1718 + endOffset: 1865 +- name: 'Zoomcamp Projects: frog toxicity capstone and landscape classifier' + startOffset: 1865 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1865 + endOffset: 2156 +- name: 'Bioinformatics Motivation: research interest meeting practical tech' + startOffset: 2156 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2156 + endOffset: 2215 +- name: 'Deadlines & Productivity Tactics: self-imposed deadlines and sticky-note + hacks' + startOffset: 2215 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2215 + endOffset: 2522 +- name: 'Study Habits: note-taking, time tracking, and personal workflow' + startOffset: 2522 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2522 + endOffset: 2630 +- name: 'Drawbacks of Independent Study: discipline risks and curriculum gaps' + startOffset: 2630 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2630 + endOffset: 2740 +- name: 'Community Learning: study groups, Slack, and teaching-to-learn' + startOffset: 2740 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2740 + endOffset: 2885 +- name: 'Deployment & MLOps: Docker, Kubernetes, and deployment discomfort' + startOffset: 2885 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2885 + endOffset: 3062 +- name: ChatGPT as a Study Companion and Pseudo Study Group + startOffset: 3062 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3062 + endOffset: 3114 +- name: 'Advanced Learning: approaching PhD-level topics via papers' + startOffset: 3114 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3114 + endOffset: 3218 +- name: 'Research Access & Publishing Challenges: paywalls and library access' + startOffset: 3218 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3218 + endOffset: 3386 +- name: 'Avoiding Burnout: switching topics and juggling parallel projects' + startOffset: 3386 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3386 + endOffset: 3509 +- name: 'Recommended Resources: Python for Everybody, ML Zoomcamp, further reading' + startOffset: 3509 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3509 + endOffset: 3593 +- name: Closing Remarks and Final Thoughts + startOffset: 3593 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3593 + endOffset: 3611 +- name: Episode Sign-Off + startOffset: 3611 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3611 + endOffset: 3537 + transcript: -- header: Podcast Introduction - header: 'Guest Overview: Aaisha — self-taught bioinformatician, ML engineer, scientific illustrator' - line: This week we'll talk about self-studying and continuous learning in machine @@ -1271,137 +1392,6 @@ transcript: sec: 3611 time: '1:00:11' who: Alexey -description: Learn bioinformatics & machine learning via project-first workflows, - dataset-first ideation, study hacks and MLOps deployment tips to gain practical - skills. -intro: How do you teach yourself bioinformatics and machine learning in a way that - leads to real projects and deployable models? In this episode, Aaisha Muhammad — - a self-taught bioinformatician, machine learning engineer and scientific illustrator - from Johannesburg and a Datatalks.Club ML Zoomcamp graduate — walks through a project-first - path for learning bioinformatics and ML. We cover prioritization and avoiding FOMO, - open curricula like OSSU, skill mapping with ML Zoomcamp, and practical resource - evaluation (free vs paid, syllabus skimming, instructor credibility). Aaisha explains - dataset-first project ideation, finding datasets and papers via Google Scholar and - PubMed, and building capstone projects such as frog toxicity and landscape classifiers. - You’ll hear pragmatic study tactics — self-imposed deadlines, note-taking, time - tracking, community study groups, and using ChatGPT as a study companion — plus - strategies to approach PhD-level papers while avoiding burnout. For engineers interested - in production, the conversation addresses deployment and MLOps basics including - Docker and Kubernetes. Tune in to gain concrete guidance on projects, vetted resources, - and the study habits that make self-directed bioinformatics and ML learning sustainable. -dateadded: '2023-04-08' -duration: PT00H58M57S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=0 - endOffset: 74 -- name: 'Guest Overview: Aaisha — self-taught bioinformatician, ML engineer, scientific - illustrator' - startOffset: 74 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=74 - endOffset: 137 -- name: 'Early Learning & Homeschooling: Python, web development, and flexible study' - startOffset: 137 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=137 - endOffset: 513 -- name: 'Choosing What to Learn: prioritization, filtering, and avoiding FOMO' - startOffset: 513 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=513 - endOffset: 561 -- name: 'Open Curricula: OSSU pathway for bioinformatics' - startOffset: 561 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=561 - endOffset: 768 -- name: 'Skill Mapping with ML Zoomcamp: building machine learning fundamentals' - startOffset: 768 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=768 - endOffset: 829 -- name: 'Evaluating Resources: syllabus skimming and instructor credibility' - startOffset: 829 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=829 - endOffset: 962 -- name: Free vs Paid Resources and Vetting Paid Courses - startOffset: 962 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=962 - endOffset: 1071 -- name: 'Practical Relevance: identifying industry-useful ML topics (SVM anecdote)' - startOffset: 1071 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1071 - endOffset: 1362 -- name: 'Learning Strategy: balancing theory and project-based practice' - startOffset: 1362 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1362 - endOffset: 1470 -- name: Project Selection & Dataset-First Ideation - startOffset: 1470 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1470 - endOffset: 1555 -- name: 'Research Papers & Dataset Discovery: Google Scholar, PubMed, citation graphs' - startOffset: 1555 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1555 - endOffset: 1718 -- name: 'ML Zoomcamp Experience: why the course appealed and structure' - startOffset: 1718 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1718 - endOffset: 1865 -- name: 'Zoomcamp Projects: frog toxicity capstone and landscape classifier' - startOffset: 1865 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1865 - endOffset: 2156 -- name: 'Bioinformatics Motivation: research interest meeting practical tech' - startOffset: 2156 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2156 - endOffset: 2215 -- name: 'Deadlines & Productivity Tactics: self-imposed deadlines and sticky-note - hacks' - startOffset: 2215 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2215 - endOffset: 2522 -- name: 'Study Habits: note-taking, time tracking, and personal workflow' - startOffset: 2522 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2522 - endOffset: 2630 -- name: 'Drawbacks of Independent Study: discipline risks and curriculum gaps' - startOffset: 2630 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2630 - endOffset: 2740 -- name: 'Community Learning: study groups, Slack, and teaching-to-learn' - startOffset: 2740 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2740 - endOffset: 2885 -- name: 'Deployment & MLOps: Docker, Kubernetes, and deployment discomfort' - startOffset: 2885 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2885 - endOffset: 3062 -- name: ChatGPT as a Study Companion and Pseudo Study Group - startOffset: 3062 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3062 - endOffset: 3114 -- name: 'Advanced Learning: approaching PhD-level topics via papers' - startOffset: 3114 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3114 - endOffset: 3218 -- name: 'Research Access & Publishing Challenges: paywalls and library access' - startOffset: 3218 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3218 - endOffset: 3386 -- name: 'Avoiding Burnout: switching topics and juggling parallel projects' - startOffset: 3386 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3386 - endOffset: 3509 -- name: 'Recommended Resources: Python for Everybody, ML Zoomcamp, further reading' - startOffset: 3509 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3509 - endOffset: 3593 -- name: Closing Remarks and Final Thoughts - startOffset: 3593 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3593 - endOffset: 3611 -- name: Episode Sign-Off - startOffset: 3611 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3611 - endOffset: 3537 --- Links: diff --git a/_podcast/s12e06-preparing-for-data-science-interview.md b/_podcast/machine-learning-data-science-interview-prep.md similarity index 97% rename from _podcast/s12e06-preparing-for-data-science-interview.md rename to _podcast/machine-learning-data-science-interview-prep.md index b928b713..10685971 100644 --- a/_podcast/s12e06-preparing-for-data-science-interview.md +++ b/_podcast/machine-learning-data-science-interview-prep.md @@ -1,46 +1,122 @@ --- +title: 'Master Machine Learning & Data Science Interviews: Recruiter-Proven Stages, Prep & Resources' +short: Master Machine Learning & Data Science Interviews +season: 12 episode: 6 guests: - lukewhipps -date: 2025-11-07 -topics: -- Job search -- Career Growth -- Hiring -- Technical Interviews -- Data Science -- Portfolio Building -- Interview -intro: How do you reliably prepare for ML and data science technical interviews — - from the initial recruiter screen to coding and scenario-based rounds? In this episode - Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast, draws - on 8+ years recruiting data scientists and AI professionals to lay out recruiter-proven - interview stages and practical prep tactics.

Luke walks through the full - interview lifecycle — Stage Zero recruiter screening and role-fit filtering, the - intro interview for relationship building, and the technical rounds that include - binary, scenario, example, and coding components. He explains how to research interviewers, - craft elevator pitches and STAR stories, and align expectations with recruiters - so you prepare to the right depth. You’ll learn how to prioritize fundamentals before - secondary skills, use question-flow strategies to probe deeper, and balance theory - versus practical math in machine learning interviews.

The episode also - covers recovering from failed interviews, targeted internal applications and outreach, - and concrete practice resources like LeetCode, HackerRank, Codeforces, and Educative. - If you’re preparing for ML technical interviews or data science interviews, this - episode gives recruiter-led structure, concrete prep priorities, and resource recommendations - to maximize your chances in each interview stage. +image: images/podcast/s12e06-preparing-for-data-science-interview.jpg ids: anchor: Preparing-for-a-Data-Science-Interview---Luke-Whipps-e1tsh5d youtube: NnZjlMowkWA -image: images/podcast/s12e06-preparing-for-data-science-interview.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Preparing-for-a-Data-Science-Interview---Luke-Whipps-e1tsh5d apple: https://podcasts.apple.com/us/podcast/preparing-for-a-data-science-interview-luke-whipps/id1541710331?i=1000596975225 spotify: https://open.spotify.com/episode/3JAmnWie8pS58Kok9Sjr2V?si=FDpX4O74Qi2kqzMGumqMpw youtube: https://www.youtube.com/watch?v=NnZjlMowkWA -season: 12 -short: Preparing for a Data Science Interview -title: 'Master ML & Data Science Technical Interviews: Recruiter-Proven Stages, Prep - & Resources' + +intro: How do you reliably prepare for ML and data science technical interviews — from the initial recruiter screen to coding and scenario-based rounds? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast, draws on 8+ years recruiting data scientists and AI professionals to lay out recruiter-proven interview stages and practical prep tactics.

Luke walks through the full interview lifecycle — Stage Zero recruiter screening and role-fit filtering, the intro interview for relationship building, and the technical rounds that include binary, scenario, example, and coding components. He explains how to research interviewers, craft elevator pitches and STAR stories, and align expectations with recruiters so you prepare to the right depth. You’ll learn how to prioritize fundamentals before secondary skills, use question-flow strategies to probe deeper, and balance theory versus practical math in machine learning interviews.

The episode also covers recovering from failed interviews, targeted internal applications and outreach, and concrete practice resources like LeetCode, HackerRank, Codeforces, and Educative. If you’re preparing for ML technical interviews or data science interviews, this episode gives recruiter-led structure, concrete prep priorities, and resource recommendations to maximize your chances in each interview stage +topics: +- job search +- career growth +- hiring +- data science + +dateadded: 2023-01-28 +date: 2025-11-07 + +duration: PT01H29S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=0 + endOffset: 101 +- name: 'Guest Introduction: Luke Whipps & Neural AI' + startOffset: 101 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=101 + endOffset: 183 +- name: 'Recruitment Career Overview: ML focus, startups, Germany' + startOffset: 183 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=183 + endOffset: 280 +- name: 'Remote Work & Client Geography: UK base serving German market' + startOffset: 280 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=280 + endOffset: 515 +- name: 'Podcast Purpose: AI Game Changers format and goals' + startOffset: 515 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=515 + endOffset: 714 +- name: 'Recruiter Strategy: Embedded talent specialist and candidate coaching' + startOffset: 714 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=714 + endOffset: 932 +- name: 'Market Snapshot: hiring trends, layoffs, and candidate concerns' + startOffset: 932 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=932 + endOffset: 1322 +- name: 'Interview Process Overview: stages, scope, and assumptions' + startOffset: 1322 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1322 + endOffset: 1550 +- name: 'Stage Zero: recruiter screening and role-fit filtering' + startOffset: 1550 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1550 + endOffset: 1686 +- name: 'Intro Interview Prep: objectives, structure, and relationship building' + startOffset: 1686 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1686 + endOffset: 1826 +- name: 'Interviewer Research: personality signals and communication matching' + startOffset: 1826 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1826 + endOffset: 2315 +- name: 'Message Preparation: elevator pitches and STAR storytelling' + startOffset: 2315 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2315 + endOffset: 2495 +- name: 'Technical Interview Components: binary, scenario, example, and coding' + startOffset: 2495 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2495 + endOffset: 2696 +- name: 'Aligning Expectations: clarifying technical depth with recruiters' + startOffset: 2696 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2696 + endOffset: 2890 +- name: 'Prep Prioritization: fundamentals first, then secondary and ideal skills' + startOffset: 2890 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2890 + endOffset: 3060 +- name: 'Question Flow Strategy: follow-ups to probe deeper understanding' + startOffset: 3060 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3060 + endOffset: 3178 +- name: 'Theory vs. Practice: relevance of mathematical and theoretical questions' + startOffset: 3178 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3178 + endOffset: 3317 +- name: 'Recovering from Failure: bombing interviews, feedback, and retakes' + startOffset: 3317 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3317 + endOffset: 3527 +- name: 'Applying Internally: focused applications and direct outreach tactics' + startOffset: 3527 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3527 + endOffset: 3605 +- name: 'Practice Resources: LeetCode, HackerRank, Codeforces, Educative' + startOffset: 3605 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3605 + endOffset: 3703 +- name: 'Supplemental Material: Luke’s interview prep document (show notes)' + startOffset: 3703 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3703 + endOffset: 3717 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3717 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3717 + endOffset: 3629 + transcript: - header: 'Guest Introduction: Luke Whipps & Neural AI' - header: 'Guest Introduction: Luke Whipps & Neural AI' @@ -1272,95 +1348,4 @@ transcript: sec: 3730 time: '1:02:10' who: Alexey -dateadded: '2023-01-28' -duration: PT01H29S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=0 - endOffset: 101 -- name: 'Guest Introduction: Luke Whipps & Neural AI' - startOffset: 101 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=101 - endOffset: 183 -- name: 'Recruitment Career Overview: ML focus, startups, Germany' - startOffset: 183 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=183 - endOffset: 280 -- name: 'Remote Work & Client Geography: UK base serving German market' - startOffset: 280 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=280 - endOffset: 515 -- name: 'Podcast Purpose: AI Game Changers format and goals' - startOffset: 515 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=515 - endOffset: 714 -- name: 'Recruiter Strategy: Embedded talent specialist and candidate coaching' - startOffset: 714 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=714 - endOffset: 932 -- name: 'Market Snapshot: hiring trends, layoffs, and candidate concerns' - startOffset: 932 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=932 - endOffset: 1322 -- name: 'Interview Process Overview: stages, scope, and assumptions' - startOffset: 1322 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1322 - endOffset: 1550 -- name: 'Stage Zero: recruiter screening and role-fit filtering' - startOffset: 1550 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1550 - endOffset: 1686 -- name: 'Intro Interview Prep: objectives, structure, and relationship building' - startOffset: 1686 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1686 - endOffset: 1826 -- name: 'Interviewer Research: personality signals and communication matching' - startOffset: 1826 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1826 - endOffset: 2315 -- name: 'Message Preparation: elevator pitches and STAR storytelling' - startOffset: 2315 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2315 - endOffset: 2495 -- name: 'Technical Interview Components: binary, scenario, example, and coding' - startOffset: 2495 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2495 - endOffset: 2696 -- name: 'Aligning Expectations: clarifying technical depth with recruiters' - startOffset: 2696 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2696 - endOffset: 2890 -- name: 'Prep Prioritization: fundamentals first, then secondary and ideal skills' - startOffset: 2890 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2890 - endOffset: 3060 -- name: 'Question Flow Strategy: follow-ups to probe deeper understanding' - startOffset: 3060 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3060 - endOffset: 3178 -- name: 'Theory vs. Practice: relevance of mathematical and theoretical questions' - startOffset: 3178 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3178 - endOffset: 3317 -- name: 'Recovering from Failure: bombing interviews, feedback, and retakes' - startOffset: 3317 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3317 - endOffset: 3527 -- name: 'Applying Internally: focused applications and direct outreach tactics' - startOffset: 3527 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3527 - endOffset: 3605 -- name: 'Practice Resources: LeetCode, HackerRank, Codeforces, Educative' - startOffset: 3605 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3605 - endOffset: 3703 -- name: 'Supplemental Material: Luke’s interview prep document (show notes)' - startOffset: 3703 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3703 - endOffset: 3717 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3717 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3717 - endOffset: 3629 --- diff --git a/_podcast/s02e06-decision-optimization.md b/_podcast/machine-learning-decision-optimization.md similarity index 75% rename from _podcast/s02e06-decision-optimization.md rename to _podcast/machine-learning-decision-optimization.md index f0900a4c..da05a916 100644 --- a/_podcast/s02e06-decision-optimization.md +++ b/_podcast/machine-learning-decision-optimization.md @@ -1,12 +1,11 @@ --- -title: 'Optimize Decisions with ML: Prescriptive & Robust Optimization for Supply - Chain and Pricing' +title: 'Optimize Decisions with ML: Prescriptive & Robust Optimization for Supply Chain and Pricing' short: Decision Optimization +season: 2 +episode: 6 guests: - danbecker image: images/podcast/s02e06-decision-optimization.jpg -season: 2 -episode: 6 ids: youtube: SJuzQ4bcU2c anchor: Translating-ML-Predictions-Into-Better-Real-World-Results-with-Decision-Optimization---Dan-Becker-eqk0b1/a-a4maq87 @@ -15,27 +14,15 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Translating-ML-Predictions-Into-Better-Real-World-Results-with-Decision-Optimization---Dan-Becker-eqk0b1/a-a4maq87 spotify: https://open.spotify.com/episode/42eAhI6F31DZ96Mnq2I4bJ apple: https://podcasts.apple.com/us/podcast/translating-ml-predictions-into-better-real-world-results/id1541710331?i=1000509855317 -description: 'Learn prescriptive analytics & robust optimization for supply chain - pricing: align ML predictions to decisions, scale models, pick solvers, and boost - revenue.' -intro: 'How do you turn machine learning predictions into better real-world decisions—especially - under uncertainty in supply chains and pricing? In this episode, Dan Becker, Founder - & CEO of Decision AI and former Google data scientist and Product Director at DataRobot, - walks through prescriptive analytics and decision optimization for practical business - impact. With a background that includes top Kaggle performance and contributions - to TensorFlow and Keras, Dan explains how to formulate optimization problems, choose - objectives and constraints, and integrate ML forecasts into prescriptive and robust - optimization models.

We cover robust vs. stochastic optimization, aligning - loss functions with business objectives, and the solvers and tools that make this - work—OR-Tools, Gurobi, Pyomo and open-source options. Dan also digs into scalability, - approximation techniques, and deployment: pipelines, monitoring, and feedback loops. - Use cases include supply chain optimization, resource allocation, and pricing/bidding - strategies, plus operational, legal, and ethical constraints. Listeners will get - practical guidance on evaluation metrics, common pitfalls like mis-specified objectives - and overfitting decisions, and the cross-functional skills needed—data science, - operations research, and software engineering—to get started with prescriptive optimization - projects.' -dateadded: '2021-02-23' + +description: 'Learn prescriptive analytics & robust optimization for supply chain pricing: align ML predictions to decisions, scale models, pick solvers, and boost revenue.' +intro: 'How do you turn machine learning predictions into better real-world decisions—especially under uncertainty in supply chains and pricing? In this episode, Dan Becker, Founder & CEO of Decision AI and former Google data scientist and Product Director at DataRobot, walks through prescriptive analytics and decision optimization for practical business impact. With a background that includes top Kaggle performance and contributions to TensorFlow and Keras, Dan explains how to formulate optimization problems, choose objectives and constraints, and integrate ML forecasts into prescriptive and robust optimization models.

We cover robust vs. stochastic optimization, aligning loss functions with business objectives, and the solvers and tools that make this work—OR-Tools, Gurobi, Pyomo and open-source options. Dan also digs into scalability, approximation techniques, and deployment: pipelines, monitoring, and feedback loops. Use cases include supply chain optimization, resource allocation, and pricing/bidding strategies, plus operational, legal, and ethical constraints. Listeners will get practical guidance on evaluation metrics, common pitfalls like mis-specified objectives and overfitting decisions, and the cross-functional skills needed—data science, operations research, and software engineering—to get started with prescriptive optimization projects.' +topics: +- machine learning +- decision optimization +dateadded: 2021-02-23 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -129,4 +116,5 @@ quotableClips: startOffset: 3720 url: https://www.youtube.com/watch?v=SJuzQ4bcU2c&t=3720 endOffset: 3720 + --- diff --git a/_podcast/s04e05-running-from-complexity.md b/_podcast/machine-learning-engineering-production-best-practices.md similarity index 98% rename from _podcast/s04e05-running-from-complexity.md rename to _podcast/machine-learning-engineering-production-best-practices.md index 0a1abbda..8b443ede 100644 --- a/_podcast/s04e05-running-from-complexity.md +++ b/_podcast/machine-learning-engineering-production-best-practices.md @@ -1,12 +1,11 @@ --- -title: 'Practical Machine Learning Engineering for Production: Ship Maintainable Models, - Avoid Complexity' +title: 'Practical Machine Learning Engineering for Production: Ship Maintainable Models, Avoid Complexity' short: Running from Complexity +season: 4 +episode: 5 guests: - benwilson image: images/podcast/s04e05-running-from-complexity.jpg -season: 4 -episode: 5 ids: youtube: sMy8NYZnsy8 anchor: Running-from-Complexity---Ben-Wilson-e14np51 @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Running-from-Complexity---Ben-Wilson-e14np51 spotify: https://open.spotify.com/episode/2TxcU3eF7hjkAEzAJcYMAg apple: https://podcasts.apple.com/us/podcast/running-from-complexity-ben-wilson/id1541710331?i=1000529834651 + +description: 'Learn practical ML engineering to ship maintainable machine learning models to production: avoid complexity, use prototypes, explainability, testing.' +intro: 'Are your ML projects collapsing under their own complexity—or never making it to production at all? In this episode, Ben Wilson, Practice Lead Resident Solutions Architect at Databricks and author of an upcoming Manning book, walks through practical machine learning engineering strategies for shipping maintainable models and avoiding needless complexity. Drawing on 12 years across industries, Ben emphasizes prioritizing maintainability over novelty: refactoring monolithic code into modular, testable components, running timeboxed experiments and bake-offs, and choosing SQL or statistical solutions before jumping to deep learning.

We cover why production failures often stem from lack of business buy-in and “search-driven” complexity, how to involve subject-matter experts and executives to simplify designs, and techniques for explainability that translate model behavior into business terms. Ben also discusses team composition (statistics plus ML engineering skills), agile sprints for feature engineering and testing, the IKEA effect of emotional attachment to complex systems, and pitfalls in reproducing academic papers in production. Listen to learn concrete practices—from experimentation limits to mentoring and deployment tradeoffs—that help you move ideas into production and keep models reliable and maintainable.' +topics: +- machine learning +- career growth +- production +dateadded: 2021-07-23 + +duration: PT01H11M41S + +quotableClips: +- name: 'Podcast Introduction: Running from Complexity' + startOffset: 0 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=0 + endOffset: 134 +- name: 'Guest Introduction: Ben Wilson, Databricks and ML engineering focus' + startOffset: 134 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=134 + endOffset: 186 +- name: 'Career Path: Navy nuclear tech to process engineering and data science' + startOffset: 186 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=186 + endOffset: 410 +- name: 'Consulting Approach: Prioritizing maintainability over novelty' + startOffset: 410 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=410 + endOffset: 529 +- name: 'Code Quality: Refactoring "walls of text" into modular, testable code' + startOffset: 529 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=529 + endOffset: 635 +- name: 'Production Failures: Lack of business buy-in and overcomplicated solutions' + startOffset: 635 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=635 + endOffset: 799 +- name: 'Motivation Drivers: Tech hype, "flexing," and engineering pragmatism' + startOffset: 799 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=799 + endOffset: 1097 +- name: 'Avoiding Search-Driven Complexity: Use experts, communities, and Bayesian + methods' + startOffset: 1097 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1097 + endOffset: 1299 +- name: 'Cross-Functional Collaboration: Involving SMEs to simplify solutions' + startOffset: 1299 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1299 + endOffset: 1564 +- name: 'Explainability: Translating models into business terms to build trust' + startOffset: 1564 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1564 + endOffset: 1746 +- name: 'From Idea to Production: Rapid prototypes, selling to SMEs, and executive + sponsorship' + startOffset: 1746 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1746 + endOffset: 1923 +- name: 'Experimentation Process: Timeboxed bake-offs and cost–benefit tradeoffs' + startOffset: 1923 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1923 + endOffset: 2173 +- name: 'The IKEA Effect: Emotional attachment to complex, hard-to-maintain systems' + startOffset: 2173 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2173 + endOffset: 2357 +- name: 'Novel Algorithm Risks: Transfer learning vs building white‑paper solutions' + startOffset: 2357 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2357 + endOffset: 2663 +- name: 'Prefer Simplicity First: Solve with SQL or stats before deep learning' + startOffset: 2663 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2663 + endOffset: 2782 +- name: 'Paper Pitfalls: Reproducibility, environment assumptions, and cloud cost' + startOffset: 2782 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2782 + endOffset: 2994 +- name: 'Team Composition: Importance of statistics expertise and coding/ML engineering + skills' + startOffset: 2994 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2994 + endOffset: 3134 +- name: 'Agile for ML: Iterative sprints, MVPs, feature engineering, and testing' + startOffset: 3134 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3134 + endOffset: 3341 +- name: 'Timeboxing Research: Limit experiments to avoid sunk-costs' + startOffset: 3341 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3341 + endOffset: 3458 +- name: 'Mentoring & Training: Databricks programs and production ML capstones' + startOffset: 3458 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3458 + endOffset: 3553 +- name: 'Book Summary: Machine Learning Engineering in Action — process, automation, + testing' + startOffset: 3553 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3553 + endOffset: 3747 +- name: 'AI‑First Tradeoffs: Talent needs, retention, and budget realities' + startOffset: 3747 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3747 + endOffset: 3874 +- name: 'Manager Enablement: Tech leads translating ML for nontechnical managers' + startOffset: 3874 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3874 + endOffset: 4078 +- name: 'Career Path Advice: Core fundamentals, specialization timeline, and leadership' + startOffset: 4078 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4078 + endOffset: 4371 +- name: 'Contact & Resources: LinkedIn, podcast appearances, and early‑access book' + startOffset: 4371 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4371 + endOffset: 4301 + transcript: - header: 'Podcast Introduction: Running from Complexity' - header: 'Guest Introduction: Ben Wilson, Databricks and ML engineering focus' @@ -1184,131 +1300,6 @@ transcript: sec: 4435 time: '1:13:55' who: Ben -description: 'Learn practical ML engineering to ship maintainable machine learning - models to production: avoid complexity, use prototypes, explainability, testing.' -intro: 'Are your ML projects collapsing under their own complexity—or never making - it to production at all? In this episode, Ben Wilson, Practice Lead Resident Solutions - Architect at Databricks and author of an upcoming Manning book, walks through practical - machine learning engineering strategies for shipping maintainable models and avoiding - needless complexity. Drawing on 12 years across industries, Ben emphasizes prioritizing - maintainability over novelty: refactoring monolithic code into modular, testable - components, running timeboxed experiments and bake-offs, and choosing SQL or statistical - solutions before jumping to deep learning.

We cover why production failures - often stem from lack of business buy-in and “search-driven” complexity, how to involve - subject-matter experts and executives to simplify designs, and techniques for explainability - that translate model behavior into business terms. Ben also discusses team composition - (statistics plus ML engineering skills), agile sprints for feature engineering and - testing, the IKEA effect of emotional attachment to complex systems, and pitfalls - in reproducing academic papers in production. Listen to learn concrete practices—from - experimentation limits to mentoring and deployment tradeoffs—that help you move - ideas into production and keep models reliable and maintainable.' -dateadded: '2021-07-23' -duration: PT01H11M41S -quotableClips: -- name: 'Podcast Introduction: Running from Complexity' - startOffset: 0 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=0 - endOffset: 134 -- name: 'Guest Introduction: Ben Wilson, Databricks and ML engineering focus' - startOffset: 134 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=134 - endOffset: 186 -- name: 'Career Path: Navy nuclear tech to process engineering and data science' - startOffset: 186 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=186 - endOffset: 410 -- name: 'Consulting Approach: Prioritizing maintainability over novelty' - startOffset: 410 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=410 - endOffset: 529 -- name: 'Code Quality: Refactoring "walls of text" into modular, testable code' - startOffset: 529 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=529 - endOffset: 635 -- name: 'Production Failures: Lack of business buy-in and overcomplicated solutions' - startOffset: 635 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=635 - endOffset: 799 -- name: 'Motivation Drivers: Tech hype, "flexing," and engineering pragmatism' - startOffset: 799 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=799 - endOffset: 1097 -- name: 'Avoiding Search-Driven Complexity: Use experts, communities, and Bayesian - methods' - startOffset: 1097 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1097 - endOffset: 1299 -- name: 'Cross-Functional Collaboration: Involving SMEs to simplify solutions' - startOffset: 1299 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1299 - endOffset: 1564 -- name: 'Explainability: Translating models into business terms to build trust' - startOffset: 1564 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1564 - endOffset: 1746 -- name: 'From Idea to Production: Rapid prototypes, selling to SMEs, and executive - sponsorship' - startOffset: 1746 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1746 - endOffset: 1923 -- name: 'Experimentation Process: Timeboxed bake-offs and cost–benefit tradeoffs' - startOffset: 1923 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1923 - endOffset: 2173 -- name: 'The IKEA Effect: Emotional attachment to complex, hard-to-maintain systems' - startOffset: 2173 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2173 - endOffset: 2357 -- name: 'Novel Algorithm Risks: Transfer learning vs building white‑paper solutions' - startOffset: 2357 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2357 - endOffset: 2663 -- name: 'Prefer Simplicity First: Solve with SQL or stats before deep learning' - startOffset: 2663 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2663 - endOffset: 2782 -- name: 'Paper Pitfalls: Reproducibility, environment assumptions, and cloud cost' - startOffset: 2782 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2782 - endOffset: 2994 -- name: 'Team Composition: Importance of statistics expertise and coding/ML engineering - skills' - startOffset: 2994 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2994 - endOffset: 3134 -- name: 'Agile for ML: Iterative sprints, MVPs, feature engineering, and testing' - startOffset: 3134 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3134 - endOffset: 3341 -- name: 'Timeboxing Research: Limit experiments to avoid sunk-costs' - startOffset: 3341 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3341 - endOffset: 3458 -- name: 'Mentoring & Training: Databricks programs and production ML capstones' - startOffset: 3458 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3458 - endOffset: 3553 -- name: 'Book Summary: Machine Learning Engineering in Action — process, automation, - testing' - startOffset: 3553 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3553 - endOffset: 3747 -- name: 'AI‑First Tradeoffs: Talent needs, retention, and budget realities' - startOffset: 3747 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3747 - endOffset: 3874 -- name: 'Manager Enablement: Tech leads translating ML for nontechnical managers' - startOffset: 3874 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3874 - endOffset: 4078 -- name: 'Career Path Advice: Core fundamentals, specialization timeline, and leadership' - startOffset: 4078 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4078 - endOffset: 4371 -- name: 'Contact & Resources: LinkedIn, podcast appearances, and early‑access book' - startOffset: 4371 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4371 - endOffset: 4301 --- Links: diff --git a/_podcast/s09e02-using-data-for-asteroid-mining.md b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md similarity index 98% rename from _podcast/s09e02-using-data-for-asteroid-mining.md rename to _podcast/machine-learning-for-asteroid-mining-and-water-detection.md index e106b24f..3929d1e8 100644 --- a/_podcast/s09e02-using-data-for-asteroid-mining.md +++ b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md @@ -1,20 +1,123 @@ --- +title: 'Asteroid Mining: Using ML & Hyperspectral Spectroscopy to Detect Water for ISRU' +short: Using Data for Asteroid Mining +season: 9 episode: 2 guests: - daynancrull +image: images/podcast/s09e02-using-data-for-asteroid-mining.jpg ids: anchor: Using-Data-for-Asteroid-Mining---Daynan-Crull-e1jbhr0 youtube: YxijEUoDCfw -image: images/podcast/s09e02-using-data-for-asteroid-mining.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Using-Data-for-Asteroid-Mining---Daynan-Crull-e1jbhr0 apple: https://podcasts.apple.com/us/podcast/machine-learning-in-marketing-juan-orduz/id1541710331?i=1000564219176 spotify: https://open.spotify.com/episode/7wjKCbCsD4ytuNrE8JrH2B?si=1WPAtw6PSZGVib0qSsoLvA youtube: https://www.youtube.com/watch?v=YxijEUoDCfw -season: 9 -short: Using Data for Asteroid Mining -title: 'Asteroid Mining: Using ML & Hyperspectral Spectroscopy to Detect Water for - ISRU' + +description: 'Discover asteroid mining: machine learning & hyperspectral spectroscopy to detect water for ISRU—learn detection methods, datasets, mission design & tools.' +intro: How can we reliably detect water on near‑Earth asteroids using machine learning and hyperspectral spectroscopy to enable in‑situ resource utilization (ISRU)? In this episode Daynan Crull—co‑founder of Karman+ and lead of its science and technology effort—walks through the science and engineering needed to find and characterize asteroid water for space missions. Drawing on his background in remote sensing and ML, Daynan explains hyperspectral infrared signatures for water detection, spectral classification approaches, and the limits of ground truth from returned samples and meteorites. Along the way we cover relevant astronomical data types (images, hyperspectral bands, time series), asteroid features like photometry and rotation, observability challenges, and ML tasks from signal processing to orbit linking and synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), sampling and extraction concepts, economic use cases for water‑as‑fuel, and the cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, machine learning for water detection, and the datasets and infrastructure to get involved in ISRU research and missions +topics: +- machine learning +- astronomy +dateadded: 2022-06-04 + +duration: PT01H16S + +quotableClips: +- name: Podcast Introduction + startOffset: 83 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=83 + endOffset: 111 +- name: 'Career & Data Science Pivot: From Astronomy to Asteroid Mining' + startOffset: 111 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=111 + endOffset: 292 +- name: 'Cosmology vs. Astronomy: Timescales, Theory & Observation' + startOffset: 292 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=292 + endOffset: 363 +- name: 'Machine Learning in Astronomy: Tasks, Signal Processing & Scaling' + startOffset: 363 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=363 + endOffset: 440 +- name: 'Gravitational Wave Detection: Signal, Noise & Instrument Glitches' + startOffset: 440 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=440 + endOffset: 765 +- name: 'Astronomical Data Types: Images, Hyperspectral Bands & Time Series' + startOffset: 765 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=765 + endOffset: 864 +- name: 'Hyperspectral Spectroscopy: Infrared Signatures & Water Detection' + startOffset: 864 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=864 + endOffset: 1004 +- name: 'Asteroid Features: Photometry, Light Curves, Rotation & Polarimetry' + startOffset: 1004 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1004 + endOffset: 1175 +- name: Spectral Classification & ML Approaches for Water Identification + startOffset: 1175 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1175 + endOffset: 1320 +- name: 'Ground Truth Limitations: Returned Samples, Meteorites & Validation' + startOffset: 1320 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1320 + endOffset: 1542 +- name: 'ISRU & Water-as-Fuel: Economics and Use Cases for Space Resources' + startOffset: 1542 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1542 + endOffset: 1818 +- name: 'Other Resources on Asteroids: Metals, Organics & Scientific Value' + startOffset: 1818 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1818 + endOffset: 1932 +- name: 'Asteroid Origins: Main Belt, Resonances & Near-Earth Populations' + startOffset: 1932 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1932 + endOffset: 2148 +- name: 'Observability Challenges: Angles, Dawn/Dusk Windows & Detection Biases' + startOffset: 2148 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2148 + endOffset: 2293 +- name: 'Data Organization: Team Roles, Data Engineering & Bayesian Engines' + startOffset: 2293 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2293 + endOffset: 2543 +- name: 'Cloud & Infrastructure: Storage, COGs/STAC and Querying Large Imagery' + startOffset: 2543 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2543 + endOffset: 2726 +- name: 'Open Datasets & APIs: Minor Planet Center, JPL Horizons, NEOWISE' + startOffset: 2726 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2726 + endOffset: 2956 +- name: 'Orbit Linking & Synthetic Tracking: ML for Large-Scale Detection' + startOffset: 2956 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2956 + endOffset: 3054 +- name: 'Mission Architecture: CubeSats, COTS Components & Partnership Strategy' + startOffset: 3054 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3054 + endOffset: 3202 +- name: 'Sampling & Extraction Methods: Scooping, Surface Interaction & R&D' + startOffset: 3202 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3202 + endOffset: 3436 +- name: 'Mathematical Models: Bayesian Frameworks, Thermal Models & Yarkovsky' + startOffset: 3436 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3436 + endOffset: 3611 +- name: 'Tools & Workflows: Notebooks, Reproducibility & Research Practices' + startOffset: 3611 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3611 + endOffset: 3668 +- name: 'Get Involved: Job Openings, Links, Contact & Further Resources' + startOffset: 3668 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3668 + endOffset: 3616 + transcript: - header: Podcast Introduction - line: This week, we'll talk about extracting space resources from asteroids. We @@ -1119,119 +1222,6 @@ transcript: sec: 3699 time: '1:01:39' who: Alexey -description: 'Discover asteroid mining: machine learning & hyperspectral spectroscopy - to detect water for ISRU—learn detection methods, datasets, mission design & tools.' -intro: How can we reliably detect water on near‑Earth asteroids using machine learning - and hyperspectral spectroscopy to enable in‑situ resource utilization (ISRU)? In - this episode Daynan Crull—co‑founder of Karman+ and lead of its science and technology - effort—walks through the science and engineering needed to find and characterize - asteroid water for space missions. Drawing on his background in remote sensing and - ML, Daynan explains hyperspectral infrared signatures for water detection, spectral - classification approaches, and the limits of ground truth from returned samples - and meteorites. Along the way we cover relevant astronomical data types (images, - hyperspectral bands, time series), asteroid features like photometry and rotation, - observability challenges, and ML tasks from signal processing to orbit linking and - synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), - sampling and extraction concepts, economic use cases for water‑as‑fuel, and the - cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. - Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, - machine learning for water detection, and the datasets and infrastructure to get - involved in ISRU research and missions. -dateadded: '2022-06-04' -duration: PT01H16S -quotableClips: -- name: Podcast Introduction - startOffset: 83 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=83 - endOffset: 111 -- name: 'Career & Data Science Pivot: From Astronomy to Asteroid Mining' - startOffset: 111 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=111 - endOffset: 292 -- name: 'Cosmology vs. Astronomy: Timescales, Theory & Observation' - startOffset: 292 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=292 - endOffset: 363 -- name: 'Machine Learning in Astronomy: Tasks, Signal Processing & Scaling' - startOffset: 363 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=363 - endOffset: 440 -- name: 'Gravitational Wave Detection: Signal, Noise & Instrument Glitches' - startOffset: 440 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=440 - endOffset: 765 -- name: 'Astronomical Data Types: Images, Hyperspectral Bands & Time Series' - startOffset: 765 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=765 - endOffset: 864 -- name: 'Hyperspectral Spectroscopy: Infrared Signatures & Water Detection' - startOffset: 864 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=864 - endOffset: 1004 -- name: 'Asteroid Features: Photometry, Light Curves, Rotation & Polarimetry' - startOffset: 1004 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1004 - endOffset: 1175 -- name: Spectral Classification & ML Approaches for Water Identification - startOffset: 1175 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1175 - endOffset: 1320 -- name: 'Ground Truth Limitations: Returned Samples, Meteorites & Validation' - startOffset: 1320 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1320 - endOffset: 1542 -- name: 'ISRU & Water-as-Fuel: Economics and Use Cases for Space Resources' - startOffset: 1542 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1542 - endOffset: 1818 -- name: 'Other Resources on Asteroids: Metals, Organics & Scientific Value' - startOffset: 1818 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1818 - endOffset: 1932 -- name: 'Asteroid Origins: Main Belt, Resonances & Near-Earth Populations' - startOffset: 1932 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1932 - endOffset: 2148 -- name: 'Observability Challenges: Angles, Dawn/Dusk Windows & Detection Biases' - startOffset: 2148 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2148 - endOffset: 2293 -- name: 'Data Organization: Team Roles, Data Engineering & Bayesian Engines' - startOffset: 2293 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2293 - endOffset: 2543 -- name: 'Cloud & Infrastructure: Storage, COGs/STAC and Querying Large Imagery' - startOffset: 2543 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2543 - endOffset: 2726 -- name: 'Open Datasets & APIs: Minor Planet Center, JPL Horizons, NEOWISE' - startOffset: 2726 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2726 - endOffset: 2956 -- name: 'Orbit Linking & Synthetic Tracking: ML for Large-Scale Detection' - startOffset: 2956 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2956 - endOffset: 3054 -- name: 'Mission Architecture: CubeSats, COTS Components & Partnership Strategy' - startOffset: 3054 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3054 - endOffset: 3202 -- name: 'Sampling & Extraction Methods: Scooping, Surface Interaction & R&D' - startOffset: 3202 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3202 - endOffset: 3436 -- name: 'Mathematical Models: Bayesian Frameworks, Thermal Models & Yarkovsky' - startOffset: 3436 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3436 - endOffset: 3611 -- name: 'Tools & Workflows: Notebooks, Reproducibility & Research Practices' - startOffset: 3611 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3611 - endOffset: 3668 -- name: 'Get Involved: Job Openings, Links, Contact & Further Resources' - startOffset: 3668 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3668 - endOffset: 3616 --- Links: diff --git a/_podcast/s09e01-machine-learning-in-marketing.md b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md similarity index 97% rename from _podcast/s09e01-machine-learning-in-marketing.md rename to _podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md index c281c72f..5a1561e3 100644 --- a/_podcast/s09e01-machine-learning-in-marketing.md +++ b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md @@ -1,20 +1,140 @@ --- +title: 'Marketing Data Science: Attribution, Media Mix Modeling, Uplift & Cookieless Tracking' +short: Machine Learning in Marketing +season: 9 episode: 1 guests: - juanorduz +image: images/podcast/s09e01-machine-learning-in-marketing.jpg ids: anchor: Machine-Learning-in-Marketing---Juan-Orduz-e1j1muj youtube: jsAxUd_bZpw -image: images/podcast/s09e01-machine-learning-in-marketing.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Machine-Learning-in-Marketing---Juan-Orduz-e1j1muj apple: https://podcasts.apple.com/us/podcast/machine-learning-in-marketing-juan-orduz/id1541710331?i=1000564219176 spotify: https://open.spotify.com/episode/0rc8zZjdxr5ncxqH9RDqBV?si=49feb89374554f65 youtube: https://www.youtube.com/watch?v=jsAxUd_bZpw -season: 9 -short: Machine Learning in Marketing -title: 'Marketing Data Science: Attribution, Media Mix Modeling, Uplift & Cookieless - Tracking' + +description: Learn attribution, media mix modeling & cookieless tracking to measure uplift, TV/offline impact and automate MMM for faster acquisition & retention +intro: How can marketing teams reliably measure ad impact, allocate budget across channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin‑based mathematician and data scientist specializing in statistical learning, time series, Bayesian and geometric methods — walks through practical marketing data science approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless tracking.

We cover attribution basics and multi‑channel ambiguity, MMM techniques including regression, ad‑stock and saturation, and campaign uplift estimation using time‑series counterfactuals. Juan explains measuring TV and offline channels, the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention and purchase‑frequency modeling. You’ll also hear about uplift A/B testing design, modeling benchmarks (start simple), MMM retraining cadence, learning decay rates with Bayesian regression, and building a marketing data function with the right data integrations and cross‑functional collaboration.

If you want actionable guidance on attribution models, media mix optimization, privacy‑aware tracking, and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, common pitfalls, and learning resources to help practitioners improve measurement and decision‑making +topics: +- marketing +- machine learning +dateadded: 2022-05-28 + +duration: PT00H59M31S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=0 + endOffset: 102 +- name: 'Introduction: Juan Orduz — mathematician and data scientist' + startOffset: 102 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=102 + endOffset: 167 +- name: 'Career Path: From geometric analysis to industry data science' + startOffset: 167 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=167 + endOffset: 309 +- name: Geometric Analysis Overview & connections to Bayesian sampling + startOffset: 309 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=309 + endOffset: 451 +- name: 'Machine Learning in Marketing: Key use cases (acquisition, retention, NLP)' + startOffset: 451 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=451 + endOffset: 618 +- name: 'Attribution Basics: Multi-channel user journeys and ambiguity' + startOffset: 618 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=618 + endOffset: 816 +- name: 'Media Mix Modeling: Regression, saturation and ad-stock transformations' + startOffset: 816 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=816 + endOffset: 898 +- name: 'Campaign Uplift Estimation: Time series counterfactuals and ad impact' + startOffset: 898 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=898 + endOffset: 1188 +- name: 'Measuring TV & Offline Channels: Aggregated impressions and time granularity' + startOffset: 1188 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1188 + endOffset: 1249 +- name: 'Privacy Changes and Cookieless Tracking: Impact of iOS 14.5' + startOffset: 1249 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1249 + endOffset: 1384 +- name: 'Retention Modeling: Contractual vs non-contractual churn strategies' + startOffset: 1384 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1384 + endOffset: 1537 +- name: 'Purchase Frequency Modeling: Detecting unusual inactivity patterns' + startOffset: 1537 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1537 + endOffset: 1753 +- name: 'Uplift Modeling: Targeted interventions versus churn prediction' + startOffset: 1753 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1753 + endOffset: 1854 +- name: 'A/B Testing for Uplift: Control/treatment design and data pitfalls' + startOffset: 1854 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1854 + endOffset: 2124 +- name: 'Modeling Benchmarks: Start simple with baselines before complex ML' + startOffset: 2124 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2124 + endOffset: 2225 +- name: 'MMM Retraining Cadence: Monthly updates and automation considerations' + startOffset: 2225 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2225 + endOffset: 2302 +- name: 'Attribution Baselines: Uniform allocation and look-alike approaches' + startOffset: 2302 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2302 + endOffset: 2381 +- name: 'Learning Decay Rates: Estimating channel decay with Bayesian regression' + startOffset: 2381 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2381 + endOffset: 2446 +- name: 'Learning Resources: Books, courses, talks and Juan’s blog' + startOffset: 2446 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2446 + endOffset: 2526 +- name: 'Bayesian vs Frequentist: When to use priors and hierarchical models' + startOffset: 2526 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2526 + endOffset: 2886 +- name: 'Building a Marketing Data Function: Data integrations and infrastructure + first' + startOffset: 2886 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2886 + endOffset: 3050 +- name: 'Cross-functional Collaboration: Analysts, engineers and marketing stakeholders' + startOffset: 3050 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3050 + endOffset: 3217 +- name: 'KPI Definition: Short-term vs long-term conversion objectives' + startOffset: 3217 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3217 + endOffset: 3312 +- name: 'Hard Problems in Marketing: Offline channels, data quality, creative solutions' + startOffset: 3312 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3312 + endOffset: 3422 +- name: 'Marketing Domain Knowledge: Stakeholder alignment and explainability' + startOffset: 3422 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3422 + endOffset: 3528 +- name: 'Find Juan Online: Blog, GitHub and contact links' + startOffset: 3528 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3528 + endOffset: 3622 +- name: Closing Remarks & resource links + startOffset: 3622 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3622 + endOffset: 3571 + transcript: - header: Podcast Introduction - header: 'Introduction: Juan Orduz — mathematician and data scientist' @@ -1100,137 +1220,6 @@ transcript: sec: 3673 time: '1:01:13' who: Alexey -description: Learn attribution, media mix modeling & cookieless tracking to measure - uplift, TV/offline impact and automate MMM for faster acquisition & retention -intro: How can marketing teams reliably measure ad impact, allocate budget across - channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin‑based - mathematician and data scientist specializing in statistical learning, time series, - Bayesian and geometric methods — walks through practical marketing data science - approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless - tracking.

We cover attribution basics and multi‑channel ambiguity, MMM - techniques including regression, ad‑stock and saturation, and campaign uplift estimation - using time‑series counterfactuals. Juan explains measuring TV and offline channels, - the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention - and purchase‑frequency modeling. You’ll also hear about uplift A/B testing design, - modeling benchmarks (start simple), MMM retraining cadence, learning decay rates - with Bayesian regression, and building a marketing data function with the right - data integrations and cross‑functional collaboration.

If you want actionable - guidance on attribution models, media mix optimization, privacy‑aware tracking, - and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, - common pitfalls, and learning resources to help practitioners improve measurement - and decision‑making. -dateadded: '2022-05-28' -duration: PT00H59M31S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=0 - endOffset: 102 -- name: 'Introduction: Juan Orduz — mathematician and data scientist' - startOffset: 102 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=102 - endOffset: 167 -- name: 'Career Path: From geometric analysis to industry data science' - startOffset: 167 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=167 - endOffset: 309 -- name: Geometric Analysis Overview & connections to Bayesian sampling - startOffset: 309 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=309 - endOffset: 451 -- name: 'Machine Learning in Marketing: Key use cases (acquisition, retention, NLP)' - startOffset: 451 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=451 - endOffset: 618 -- name: 'Attribution Basics: Multi-channel user journeys and ambiguity' - startOffset: 618 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=618 - endOffset: 816 -- name: 'Media Mix Modeling: Regression, saturation and ad-stock transformations' - startOffset: 816 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=816 - endOffset: 898 -- name: 'Campaign Uplift Estimation: Time series counterfactuals and ad impact' - startOffset: 898 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=898 - endOffset: 1188 -- name: 'Measuring TV & Offline Channels: Aggregated impressions and time granularity' - startOffset: 1188 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1188 - endOffset: 1249 -- name: 'Privacy Changes and Cookieless Tracking: Impact of iOS 14.5' - startOffset: 1249 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1249 - endOffset: 1384 -- name: 'Retention Modeling: Contractual vs non-contractual churn strategies' - startOffset: 1384 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1384 - endOffset: 1537 -- name: 'Purchase Frequency Modeling: Detecting unusual inactivity patterns' - startOffset: 1537 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1537 - endOffset: 1753 -- name: 'Uplift Modeling: Targeted interventions versus churn prediction' - startOffset: 1753 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1753 - endOffset: 1854 -- name: 'A/B Testing for Uplift: Control/treatment design and data pitfalls' - startOffset: 1854 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1854 - endOffset: 2124 -- name: 'Modeling Benchmarks: Start simple with baselines before complex ML' - startOffset: 2124 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2124 - endOffset: 2225 -- name: 'MMM Retraining Cadence: Monthly updates and automation considerations' - startOffset: 2225 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2225 - endOffset: 2302 -- name: 'Attribution Baselines: Uniform allocation and look-alike approaches' - startOffset: 2302 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2302 - endOffset: 2381 -- name: 'Learning Decay Rates: Estimating channel decay with Bayesian regression' - startOffset: 2381 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2381 - endOffset: 2446 -- name: 'Learning Resources: Books, courses, talks and Juan’s blog' - startOffset: 2446 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2446 - endOffset: 2526 -- name: 'Bayesian vs Frequentist: When to use priors and hierarchical models' - startOffset: 2526 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2526 - endOffset: 2886 -- name: 'Building a Marketing Data Function: Data integrations and infrastructure - first' - startOffset: 2886 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2886 - endOffset: 3050 -- name: 'Cross-functional Collaboration: Analysts, engineers and marketing stakeholders' - startOffset: 3050 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3050 - endOffset: 3217 -- name: 'KPI Definition: Short-term vs long-term conversion objectives' - startOffset: 3217 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3217 - endOffset: 3312 -- name: 'Hard Problems in Marketing: Offline channels, data quality, creative solutions' - startOffset: 3312 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3312 - endOffset: 3422 -- name: 'Marketing Domain Knowledge: Stakeholder alignment and explainability' - startOffset: 3422 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3422 - endOffset: 3528 -- name: 'Find Juan Online: Blog, GitHub and contact links' - startOffset: 3528 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3528 - endOffset: 3622 -- name: Closing Remarks & resource links - startOffset: 3622 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3622 - endOffset: 3571 --- Links: diff --git a/_podcast/s07e05-machine-learning-system-design-interview.md b/_podcast/machine-learning-system-design-interview.md similarity index 97% rename from _podcast/s07e05-machine-learning-system-design-interview.md rename to _podcast/machine-learning-system-design-interview.md index e50b7547..65f4f018 100644 --- a/_podcast/s07e05-machine-learning-system-design-interview.md +++ b/_podcast/machine-learning-system-design-interview.md @@ -1,42 +1,141 @@ --- +title: 'ML System Design Interviews: Production ML, Fraud Detection, Features, A/B Testing & MLOps' +short: Machine Learning System Design Interview +season: 7 episode: 5 guests: - valeriybabushkin -intro: 'How do you approach ML system design interviews that probe production constraints, - fraud detection trade-offs, and MLOps realities? In this episode, Valerii Babushkin - — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, - and author of Machine Learning System Design — walks through what interviewers look - for and how candidates should structure answers for real-world ML problems.

- We cover concrete topics you can use in interviews and on the job: distinguishing - software vs. ML system design; a fraud detection case study (probabilities, loss - functions, real-time requirements); label noise, class imbalance, and feature engineering - trade-offs; end-to-end pipeline items like metrics, baselines, A/B testing, and - validating in production; monitoring, distribution shift, fallbacks, and production - robustness; serving models, embeddings, and MLOps roles; plus when to avoid ML and - practical checklist items for core projects. Valerii also shares interview tactics - — signposting depth, stating assumptions, iterative baselines — and guidance for - new grads and career progression toward system design roles.

Listen to - learn actionable frameworks, example trade-offs, and preparation strategies to improve - your ML system design interviews and production ML decisions.' -description: 'Master ML system design: fraud detection, feature engineering & A/B - testing to ace interviews, build robust production models, monitoring and MLOps.' -date: 2025-11-07 -topics: -- machine learning -- career growth +image: images/podcast/s07e05-machine-learning-system-design-interview.jpg ids: anchor: Machine-Learning-System-Design-Interview---Valerii-Babushkin-e1ej65e youtube: 0RsmRjar66E -image: images/podcast/s07e05-machine-learning-system-design-interview.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Machine-Learning-System-Design-Interview---Valerii-Babushkin-e1ej65e apple: https://podcasts.apple.com/us/podcast/machine-learning-system-design-interview-valerii-babushkin/id1541710331?i=1000551566652 spotify: https://open.spotify.com/episode/5tSLFOh8PGe1NFFz1of9Xe youtube: https://www.youtube.com/watch?v=0RsmRjar66E -season: 7 -short: Machine Learning System Design Interview -title: 'ML System Design Interviews: Production ML, Fraud Detection, Features, A/B - Testing & MLOps' + +description: 'Master ML system design: fraud detection, feature engineering & A/B testing to ace interviews, build robust production models, monitoring and MLOps.' +intro: 'How do you approach ML system design interviews that probe production constraints, fraud detection trade-offs, and MLOps realities? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through what interviewers look for and how candidates should structure answers for real-world ML problems.

We cover concrete topics you can use in interviews and on the job: distinguishing software vs. ML system design; a fraud detection case study (probabilities, loss functions, real-time requirements); label noise, class imbalance, and feature engineering trade-offs; end-to-end pipeline items like metrics, baselines, A/B testing, and validating in production; monitoring, distribution shift, fallbacks, and production robustness; serving models, embeddings, and MLOps roles; plus when to avoid ML and practical checklist items for core projects. Valerii also shares interview tactics — signposting depth, stating assumptions, iterative baselines — and guidance for new grads and career progression toward system design roles.

Listen to learn actionable frameworks, example trade-offs, and preparation strategies to improve your ML system design interviews and production ML decisions.' +topics: +- machine learning +- career growth +dateadded: 2022-02-19 +date: 2025-11-07 + +duration: PT00H59M + +quotableClips: +- name: Podcast Introduction & Episode Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=0 + endOffset: 111 +- name: 'Valerii Background: Career Snapshot and Kaggle Achievements' + startOffset: 111 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=111 + endOffset: 201 +- name: 'Blockchain.com Role: Scope, Responsibilities, and Data Ownership' + startOffset: 201 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=201 + endOffset: 346 +- name: 'Transition to Meta: User Privacy Work and Large-Scale ML Experience' + startOffset: 346 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=346 + endOffset: 451 +- name: 'Hiring Experience: Conducting High-Volume Interviews and Team Leadership' + startOffset: 451 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=451 + endOffset: 552 +- name: 'Candidate Targeting: Who Faces ML System Design Interviews' + startOffset: 552 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=552 + endOffset: 683 +- name: 'Interview Structure: 45-Minute Narrative and Evaluation Goals' + startOffset: 683 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=683 + endOffset: 838 +- name: 'Contrast: Software System Design Versus ML System Design' + startOffset: 838 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 + endOffset: 1003 +- name: 'Fraud Detection Case Study: Probabilities, Loss Functions, and Real-Time + Needs' + startOffset: 838 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 + endOffset: 1003 +- name: Labeling, Class Imbalance, and Feature Engineering Tradeoffs + startOffset: 1003 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1003 + endOffset: 1233 +- name: 'Interview Tactics: Stating Assumptions and Getting Alignment' + startOffset: 1233 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1233 + endOffset: 1325 +- name: 'Example: Points-of-Interest System vs Personalized Recommender' + startOffset: 1325 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1325 + endOffset: 1468 +- name: 'End-to-End ML Pipeline: Metrics, Baselines, and A/B Testing' + startOffset: 1468 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1468 + endOffset: 1749 +- name: 'Securing the Interview: Iterative Baselines and Signposting Depth' + startOffset: 1749 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1749 + endOffset: 1918 +- name: 'Appropriate Depth: Practical ML Decisions vs Research-Level Detail' + startOffset: 1918 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1918 + endOffset: 2011 +- name: 'Preparation Strategies: Mock Interviews, Resources, and Experience' + startOffset: 2011 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2011 + endOffset: 2279 +- name: 'Industry Checklist: Core ML Project Review Items and Patterns' + startOffset: 2279 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2279 + endOffset: 2411 +- name: 'Defining Goals and Proxy Metrics: Business Alignment and Long-Term Health' + startOffset: 2411 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2411 + endOffset: 2651 +- name: Features, Labels, Model Selection, and Validation Workflow + startOffset: 2651 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2651 + endOffset: 2762 +- name: 'Production Robustness: Monitoring, Distribution Shift, and Fallbacks' + startOffset: 2762 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2762 + endOffset: 2872 +- name: 'System Components: Why Features Matter More Than Model Architecture' + startOffset: 2872 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2872 + endOffset: 3057 +- name: 'Engineering Integration: Serving Models, Embeddings, and MLOps Roles' + startOffset: 3057 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3057 + endOffset: 3145 +- name: When to Avoid ML and Useful Design Pattern References + startOffset: 3145 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3145 + endOffset: 3247 +- name: 'New Grad Expectations: Coding Focus and Limited System Design' + startOffset: 3247 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3247 + endOffset: 3443 +- name: 'Validating in Production: A/B Tests, Causality, and Human Labels' + startOffset: 3443 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3443 + endOffset: 3541 +- name: 'Career Path: Moving from Data Science Practice to System Design' + startOffset: 3541 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3541 + endOffset: 3603 +- name: Closing Remarks and Contact Information + startOffset: 3603 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3603 + endOffset: 3540 + transcript: - header: Podcast Introduction & Episode Overview - header: 'Valerii Background: Career Snapshot and Kaggle Achievements' @@ -1317,118 +1416,6 @@ transcript: sec: 3651 time: '1:00:51' who: Valerii -dateadded: '2022-02-19' -duration: PT00H59M -quotableClips: -- name: Podcast Introduction & Episode Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=0 - endOffset: 111 -- name: 'Valerii Background: Career Snapshot and Kaggle Achievements' - startOffset: 111 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=111 - endOffset: 201 -- name: 'Blockchain.com Role: Scope, Responsibilities, and Data Ownership' - startOffset: 201 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=201 - endOffset: 346 -- name: 'Transition to Meta: User Privacy Work and Large-Scale ML Experience' - startOffset: 346 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=346 - endOffset: 451 -- name: 'Hiring Experience: Conducting High-Volume Interviews and Team Leadership' - startOffset: 451 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=451 - endOffset: 552 -- name: 'Candidate Targeting: Who Faces ML System Design Interviews' - startOffset: 552 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=552 - endOffset: 683 -- name: 'Interview Structure: 45-Minute Narrative and Evaluation Goals' - startOffset: 683 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=683 - endOffset: 838 -- name: 'Contrast: Software System Design Versus ML System Design' - startOffset: 838 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 - endOffset: 1003 -- name: 'Fraud Detection Case Study: Probabilities, Loss Functions, and Real-Time - Needs' - startOffset: 838 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 - endOffset: 1003 -- name: Labeling, Class Imbalance, and Feature Engineering Tradeoffs - startOffset: 1003 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1003 - endOffset: 1233 -- name: 'Interview Tactics: Stating Assumptions and Getting Alignment' - startOffset: 1233 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1233 - endOffset: 1325 -- name: 'Example: Points-of-Interest System vs Personalized Recommender' - startOffset: 1325 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1325 - endOffset: 1468 -- name: 'End-to-End ML Pipeline: Metrics, Baselines, and A/B Testing' - startOffset: 1468 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1468 - endOffset: 1749 -- name: 'Securing the Interview: Iterative Baselines and Signposting Depth' - startOffset: 1749 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1749 - endOffset: 1918 -- name: 'Appropriate Depth: Practical ML Decisions vs Research-Level Detail' - startOffset: 1918 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1918 - endOffset: 2011 -- name: 'Preparation Strategies: Mock Interviews, Resources, and Experience' - startOffset: 2011 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2011 - endOffset: 2279 -- name: 'Industry Checklist: Core ML Project Review Items and Patterns' - startOffset: 2279 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2279 - endOffset: 2411 -- name: 'Defining Goals and Proxy Metrics: Business Alignment and Long-Term Health' - startOffset: 2411 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2411 - endOffset: 2651 -- name: Features, Labels, Model Selection, and Validation Workflow - startOffset: 2651 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2651 - endOffset: 2762 -- name: 'Production Robustness: Monitoring, Distribution Shift, and Fallbacks' - startOffset: 2762 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2762 - endOffset: 2872 -- name: 'System Components: Why Features Matter More Than Model Architecture' - startOffset: 2872 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2872 - endOffset: 3057 -- name: 'Engineering Integration: Serving Models, Embeddings, and MLOps Roles' - startOffset: 3057 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3057 - endOffset: 3145 -- name: When to Avoid ML and Useful Design Pattern References - startOffset: 3145 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3145 - endOffset: 3247 -- name: 'New Grad Expectations: Coding Focus and Limited System Design' - startOffset: 3247 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3247 - endOffset: 3443 -- name: 'Validating in Production: A/B Tests, Causality, and Human Labels' - startOffset: 3443 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3443 - endOffset: 3541 -- name: 'Career Path: Moving from Data Science Practice to System Design' - startOffset: 3541 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3541 - endOffset: 3603 -- name: Closing Remarks and Contact Information - startOffset: 3603 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3603 - endOffset: 3540 --- Links: diff --git a/_podcast/s02e09-roles-skills-monetizing-ml.md b/_podcast/make-money-with-machine-learning-roles-skills.md similarity index 98% rename from _podcast/s02e09-roles-skills-monetizing-ml.md rename to _podcast/make-money-with-machine-learning-roles-skills.md index 77516282..a922a1a9 100644 --- a/_podcast/s02e09-roles-skills-monetizing-ml.md +++ b/_podcast/make-money-with-machine-learning-roles-skills.md @@ -1,12 +1,11 @@ --- -title: 'Monetize Machine Learning: Convert Models to ARR/MRR with ML Product & MLOps - Strategy' +title: 'Monetize Machine Learning: Convert Models to ARR/MRR with ML Product & MLOps Strategy' short: New Roles and Key Skills to Monetize Machine Learning +season: 2 +episode: 9 guests: - vinvashishta image: images/podcast/s02e09-roles-skills-monetizing-ml.jpg -season: 2 -episode: 9 ids: youtube: xCjzA_8S4kI anchor: New-Roles-and-Key-Skills-to-Monetize-Machine-Learning---Vin-Vashishta-escer6 @@ -15,6 +14,109 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/New-Roles-and-Key-Skills-to-Monetize-Machine-Learning---Vin-Vashishta-escer6 spotify: https://open.spotify.com/episode/5u2WuUB8GBNE9qDsNR6mby apple: https://podcasts.apple.com/us/podcast/new-roles-key-skills-to-monetize-machine-learning-vin/id1541710331?i=1000512720281 + +description: 'Master monetize machine learning: convert ML models into ARR/MRR using MLOps and team roles to drive revenue, adoption and measurable business impact.' +intro: How do you turn machine learning models into recurring revenue—ARR and MRR—rather than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner and engineer strategist who has brought products to market with ARR in the $100’s of millions, breaks down practical steps to monetize machine learning.

We explore why a revenue-first mindset changes ML strategy, how to translate models into C‑suite metrics like ARR/MRR, and when to prioritize revenue versus cost‑savings. Vin outlines the three core team roles for monetization, the research artifacts and experimental process that make models production-ready, and real category-creation examples from companies such as Amazon, Google, and Stitch Fix. For startups he explains the “angry users + data scientists” product recipe.

You’ll also get frameworks for ML product management—turning strategy into researchable use cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, and product metrics for adoption (usage, task time, decision quality, pricing impact). This episode delivers actionable guidance for leaders, product managers, and engineers seeking to convert ML into sustainable ARR and MRR +topics: +- machine learning +- monetization +- product management +- strategy +dateadded: 2021-03-12 + +duration: PT01H19M01S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=0 + endOffset: 210 +- name: 'Career & technical background: Vin Vashishta''s journey in ML and strategy' + startOffset: 210 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=210 + endOffset: 477 +- name: 'Monetize machine learning: why revenue focus drives ML strategy' + startOffset: 477 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=477 + endOffset: 727 +- name: 'ARR & MRR: translating models into C‑suite revenue metrics' + startOffset: 727 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=727 + endOffset: 959 +- name: 'Revenue vs. cost‑savings: business model metrics for ML products' + startOffset: 959 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=959 + endOffset: 1215 +- name: 'Team capabilities for monetizing ML: three core roles overview' + startOffset: 1215 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1215 + endOffset: 1618 +- name: 'Machine learning research: artifacts, datasets, and experimental process' + startOffset: 1618 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1618 + endOffset: 1758 +- name: 'Category creation with ML: examples and market entry (Amazon, Google, Stitch + Fix)' + startOffset: 1758 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1758 + endOffset: 2037 +- name: 'Startups: the "angry users + data scientists" product recipe' + startOffset: 2037 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2037 + endOffset: 2170 +- name: 'Research skillset: hypothesis design, experimentation, explainability & advanced + math' + startOffset: 2170 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2170 + endOffset: 2608 +- name: 'Product management for ML: translating strategy into researchable use cases' + startOffset: 2608 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2608 + endOffset: 2934 +- name: 'Product manager ecosystem: gated decisions, feasibility studies and stakeholders' + startOffset: 2934 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2934 + endOffset: 3053 +- name: 'Career paths into ML product management: backgrounds and upskilling routes' + startOffset: 3053 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3053 + endOffset: 3290 +- name: 'Machine learning architecture: platform vision, cost estimation and production + path' + startOffset: 3290 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3290 + endOffset: 3484 +- name: 'Architecture skills & tools: cloud, MLOps, buy vs build tradeoffs' + startOffset: 3484 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3484 + endOffset: 3642 +- name: 'Transitioning into research & architecture roles: realistic career steps' + startOffset: 3642 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3642 + endOffset: 3792 +- name: 'Education gap & corporate upskilling: "farm club" pipelines and university + roles' + startOffset: 3792 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3792 + endOffset: 4416 +- name: 'MBA relevance: degrees vs. practical business fluency for ML product leaders' + startOffset: 4416 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4416 + endOffset: 4454 +- name: 'Role specialization trend: splitting data science into focused functions' + startOffset: 4454 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4454 + endOffset: 4514 +- name: 'Product metrics for adoption: usage, task time, decision quality and pricing + impact' + startOffset: 4514 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4514 + endOffset: 4692 +- name: Episode recap & next steps + startOffset: 4692 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4692 + endOffset: 4741 + transcript: - header: Podcast Introduction - line: Good morning. Now it's 7 AM for you or? @@ -1297,115 +1399,6 @@ transcript: sec: 4743 time: '1:19:03' who: Alexey -description: 'Master monetize machine learning: convert ML models into ARR/MRR using - MLOps and team roles to drive revenue, adoption and measurable business impact.' -intro: How do you turn machine learning models into recurring revenue—ARR and MRR—rather - than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner - and engineer strategist who has brought products to market with ARR in the $100’s - of millions, breaks down practical steps to monetize machine learning.

- We explore why a revenue-first mindset changes ML strategy, how to translate models - into C‑suite metrics like ARR/MRR, and when to prioritize revenue versus cost‑savings. - Vin outlines the three core team roles for monetization, the research artifacts - and experimental process that make models production-ready, and real category-creation - examples from companies such as Amazon, Google, and Stitch Fix. For startups he - explains the “angry users + data scientists” product recipe.

You’ll also - get frameworks for ML product management—turning strategy into researchable use - cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, - and product metrics for adoption (usage, task time, decision quality, pricing impact). - This episode delivers actionable guidance for leaders, product managers, and engineers - seeking to convert ML into sustainable ARR and MRR. -dateadded: '2021-03-12' -duration: PT01H19M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=0 - endOffset: 210 -- name: 'Career & technical background: Vin Vashishta''s journey in ML and strategy' - startOffset: 210 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=210 - endOffset: 477 -- name: 'Monetize machine learning: why revenue focus drives ML strategy' - startOffset: 477 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=477 - endOffset: 727 -- name: 'ARR & MRR: translating models into C‑suite revenue metrics' - startOffset: 727 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=727 - endOffset: 959 -- name: 'Revenue vs. cost‑savings: business model metrics for ML products' - startOffset: 959 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=959 - endOffset: 1215 -- name: 'Team capabilities for monetizing ML: three core roles overview' - startOffset: 1215 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1215 - endOffset: 1618 -- name: 'Machine learning research: artifacts, datasets, and experimental process' - startOffset: 1618 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1618 - endOffset: 1758 -- name: 'Category creation with ML: examples and market entry (Amazon, Google, Stitch - Fix)' - startOffset: 1758 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1758 - endOffset: 2037 -- name: 'Startups: the "angry users + data scientists" product recipe' - startOffset: 2037 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2037 - endOffset: 2170 -- name: 'Research skillset: hypothesis design, experimentation, explainability & advanced - math' - startOffset: 2170 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2170 - endOffset: 2608 -- name: 'Product management for ML: translating strategy into researchable use cases' - startOffset: 2608 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2608 - endOffset: 2934 -- name: 'Product manager ecosystem: gated decisions, feasibility studies and stakeholders' - startOffset: 2934 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2934 - endOffset: 3053 -- name: 'Career paths into ML product management: backgrounds and upskilling routes' - startOffset: 3053 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3053 - endOffset: 3290 -- name: 'Machine learning architecture: platform vision, cost estimation and production - path' - startOffset: 3290 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3290 - endOffset: 3484 -- name: 'Architecture skills & tools: cloud, MLOps, buy vs build tradeoffs' - startOffset: 3484 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3484 - endOffset: 3642 -- name: 'Transitioning into research & architecture roles: realistic career steps' - startOffset: 3642 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3642 - endOffset: 3792 -- name: 'Education gap & corporate upskilling: "farm club" pipelines and university - roles' - startOffset: 3792 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3792 - endOffset: 4416 -- name: 'MBA relevance: degrees vs. practical business fluency for ML product leaders' - startOffset: 4416 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4416 - endOffset: 4454 -- name: 'Role specialization trend: splitting data science into focused functions' - startOffset: 4454 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4454 - endOffset: 4514 -- name: 'Product metrics for adoption: usage, task time, decision quality and pricing - impact' - startOffset: 4514 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4514 - endOffset: 4692 -- name: Episode recap & next steps - startOffset: 4692 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4692 - endOffset: 4741 --- We discussed monetization roles and the capabilities people need to move into those roles. The key roles are ML Researcher, ML Architect, and ML Product Manager. diff --git a/_podcast/s05e03-metrics-and-kpis.md b/_podcast/ml-engineering-kpis-and-metrics-strategy.md similarity index 98% rename from _podcast/s05e03-metrics-and-kpis.md rename to _podcast/ml-engineering-kpis-and-metrics-strategy.md index ba675718..bbe20590 100644 --- a/_podcast/s05e03-metrics-and-kpis.md +++ b/_podcast/ml-engineering-kpis-and-metrics-strategy.md @@ -1,12 +1,11 @@ --- -title: 'KPI Design & Metrics Strategy: Prioritize Impact, Avoid Vanity Metrics, and - Prove ROI' +title: 'KPI Design & Metrics Strategy: Prioritize Impact, Avoid Vanity Metrics, and Prove ROI' short: 'Defining Success: Metrics and KPIs' +season: 5 +episode: 3 guests: - adamsroka image: images/podcast/s05e03-metrics-and-kpis.jpg -season: 5 -episode: 3 ids: youtube: H4P2RfKvXGs anchor: Defining-Success-Metrics-and-KPIs---Adam-Sroka-e17gfp0 @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Defining-Success-Metrics-and-KPIs---Adam-Sroka-e17gfp0 spotify: https://open.spotify.com/episode/5kTD7LjoXos1fm2LPD7nJc apple: https://podcasts.apple.com/us/podcast/defining-success-metrics-and-kpis-adam-sroka/id1541710331?i=1000535667935 + +description: Discover KPI design, metrics strategy & ROI proof - avoid vanity metrics, build dashboards, prioritize impact, and measure experiments to prove value +intro: How do you design KPIs that prioritize real impact, avoid vanity metrics, and actually prove ROI? In this episode, Dr. Adam Sroka — Head of Machine Learning Engineering at Origami Energy, with a background from a Physics PhD to data science, reinforcement learning, and consultancy — walks through a practical metrics strategy for data and product teams.

We cover why metrics matter (Drucker, merit functions), how to make metrics comparable (units), and concrete examples like weighted revenue for sales pipelines and burn-down/maintainability metrics for professional services. Adam explains top-down KPI alignment, avoiding vanity metrics and KPI gaming, and using derived/composite KPIs to capture margin trade-offs. You’ll hear a workshop case for grocery retail, guidance on KPI prioritization and review cadence, and tips for operationalizing metrics through dashboards, executive communication, and a North Star metric. We also dig into threshold, health & hygiene metrics, translating model performance into £/time saved, and robust experiment and model validation (A/B, randomization, backtesting, uplift).

If you’re responsible for KPI design, metrics strategy, or proving ROI from data work, this episode gives actionable frameworks to measure impact and reduce measurement risk +topics: +- machine learning +- leadership +- data science +- product management +- strategy +- metrics +- communication +dateadded: 2021-09-19 + +duration: PT01H02M30S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=0 + endOffset: 90 +- name: Guest Introduction & Career Path + startOffset: 90 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=90 + endOffset: 142 +- name: From Physics PhD to Data Science and Reinforcement Learning + startOffset: 142 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=142 + endOffset: 392 +- name: 'Moving into Consultancy: BI, Dashboards, and Client Workshops' + startOffset: 392 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=392 + endOffset: 540 +- name: Laser Research, Ray-Tracing Tools, and Early RL Experiments + startOffset: 540 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=540 + endOffset: 726 +- name: 'Why Metrics Matter: Drucker, Measurement, and Merit Functions' + startOffset: 726 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=726 + endOffset: 911 +- name: Merit Functions & Project Prioritization (Impact vs Cost) + startOffset: 911 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=911 + endOffset: 1011 +- name: Units & Comparability in Metric Design + startOffset: 1011 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1011 + endOffset: 1042 +- name: 'Sales Pipeline Metrics: Weighted Revenue and Lead Qualification' + startOffset: 1042 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1042 + endOffset: 1246 +- name: 'Professional Services Metrics: Burn-Down Rate & Maintainability of Earnings' + startOffset: 1246 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1246 + endOffset: 1361 +- name: 'KPIs Defined: Top-Down Alignment and Executive Decision Metrics' + startOffset: 1361 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1361 + endOffset: 1567 +- name: 'Avoiding Vanity Metrics: Make the Important Measurable' + startOffset: 1567 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1567 + endOffset: 1684 +- name: KPI Gaming Risks & Designing Competing KPIs + startOffset: 1684 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1684 + endOffset: 1830 +- name: 'Derived KPIs: Composite Metrics to Capture Margin and Trade-offs' + startOffset: 1830 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1830 + endOffset: 1964 +- name: 'Workshop Process: Designing Metrics for Grocery Retail' + startOffset: 1964 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1964 + endOffset: 2239 +- name: KPI Prioritization, Review Cadence, and Iteration Best Practices + startOffset: 2239 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2239 + endOffset: 2467 +- name: 'Operationalizing KPIs: Dashboards, Visibility, and Executive Communication' + startOffset: 2467 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2467 + endOffset: 2699 +- name: 'North Star Metric: Single Guiding Indicator for Strategy' + startOffset: 2699 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2699 + endOffset: 2794 +- name: 'Threshold Metrics: Alerts, Limits, and Safety Conditions' + startOffset: 2794 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2794 + endOffset: 2928 +- name: 'Health & Hygiene Metrics: Downtime and Service Reliability' + startOffset: 2928 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2928 + endOffset: 3072 +- name: 'Data Team Metrics: Translate Model Performance into £ / Time Saved' + startOffset: 3072 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3072 + endOffset: 3342 +- name: 'Experimentation & Measurement: A/B Testing and Champion–Challenger' + startOffset: 3342 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3342 + endOffset: 3395 +- name: 'Model Validation Techniques: Randomization, Backtesting, and Uplift' + startOffset: 3395 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3395 + endOffset: 3602 +- name: 'Timeboxing Data Work: Two-Week Spikes and Accelerate Metrics' + startOffset: 3602 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3602 + endOffset: 3795 +- name: Episode Wrap-Up, Contact Info, and Further Reading + startOffset: 3795 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3795 + endOffset: 3750 + transcript: - header: Podcast Introduction - header: Guest Introduction & Career Path @@ -1039,127 +1155,6 @@ transcript: sec: 3840 time: '1:04:00' who: Alexey -description: Discover KPI design, metrics strategy & ROI proof - avoid vanity metrics, - build dashboards, prioritize impact, and measure experiments to prove value. -intro: How do you design KPIs that prioritize real impact, avoid vanity metrics, and - actually prove ROI? In this episode, Dr. Adam Sroka — Head of Machine Learning Engineering - at Origami Energy, with a background from a Physics PhD to data science, reinforcement - learning, and consultancy — walks through a practical metrics strategy for data - and product teams.

We cover why metrics matter (Drucker, merit functions), - how to make metrics comparable (units), and concrete examples like weighted revenue - for sales pipelines and burn-down/maintainability metrics for professional services. - Adam explains top-down KPI alignment, avoiding vanity metrics and KPI gaming, and - using derived/composite KPIs to capture margin trade-offs. You’ll hear a workshop - case for grocery retail, guidance on KPI prioritization and review cadence, and - tips for operationalizing metrics through dashboards, executive communication, and - a North Star metric. We also dig into threshold, health & hygiene metrics, translating - model performance into £/time saved, and robust experiment and model validation - (A/B, randomization, backtesting, uplift).

If you’re responsible for KPI - design, metrics strategy, or proving ROI from data work, this episode gives actionable - frameworks to measure impact and reduce measurement risk. -dateadded: '2021-09-19' -duration: PT01H02M30S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=0 - endOffset: 90 -- name: Guest Introduction & Career Path - startOffset: 90 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=90 - endOffset: 142 -- name: From Physics PhD to Data Science and Reinforcement Learning - startOffset: 142 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=142 - endOffset: 392 -- name: 'Moving into Consultancy: BI, Dashboards, and Client Workshops' - startOffset: 392 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=392 - endOffset: 540 -- name: Laser Research, Ray-Tracing Tools, and Early RL Experiments - startOffset: 540 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=540 - endOffset: 726 -- name: 'Why Metrics Matter: Drucker, Measurement, and Merit Functions' - startOffset: 726 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=726 - endOffset: 911 -- name: Merit Functions & Project Prioritization (Impact vs Cost) - startOffset: 911 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=911 - endOffset: 1011 -- name: Units & Comparability in Metric Design - startOffset: 1011 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1011 - endOffset: 1042 -- name: 'Sales Pipeline Metrics: Weighted Revenue and Lead Qualification' - startOffset: 1042 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1042 - endOffset: 1246 -- name: 'Professional Services Metrics: Burn-Down Rate & Maintainability of Earnings' - startOffset: 1246 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1246 - endOffset: 1361 -- name: 'KPIs Defined: Top-Down Alignment and Executive Decision Metrics' - startOffset: 1361 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1361 - endOffset: 1567 -- name: 'Avoiding Vanity Metrics: Make the Important Measurable' - startOffset: 1567 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1567 - endOffset: 1684 -- name: KPI Gaming Risks & Designing Competing KPIs - startOffset: 1684 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1684 - endOffset: 1830 -- name: 'Derived KPIs: Composite Metrics to Capture Margin and Trade-offs' - startOffset: 1830 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1830 - endOffset: 1964 -- name: 'Workshop Process: Designing Metrics for Grocery Retail' - startOffset: 1964 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1964 - endOffset: 2239 -- name: KPI Prioritization, Review Cadence, and Iteration Best Practices - startOffset: 2239 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2239 - endOffset: 2467 -- name: 'Operationalizing KPIs: Dashboards, Visibility, and Executive Communication' - startOffset: 2467 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2467 - endOffset: 2699 -- name: 'North Star Metric: Single Guiding Indicator for Strategy' - startOffset: 2699 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2699 - endOffset: 2794 -- name: 'Threshold Metrics: Alerts, Limits, and Safety Conditions' - startOffset: 2794 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2794 - endOffset: 2928 -- name: 'Health & Hygiene Metrics: Downtime and Service Reliability' - startOffset: 2928 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2928 - endOffset: 3072 -- name: 'Data Team Metrics: Translate Model Performance into £ / Time Saved' - startOffset: 3072 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3072 - endOffset: 3342 -- name: 'Experimentation & Measurement: A/B Testing and Champion–Challenger' - startOffset: 3342 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3342 - endOffset: 3395 -- name: 'Model Validation Techniques: Randomization, Backtesting, and Uplift' - startOffset: 3395 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3395 - endOffset: 3602 -- name: 'Timeboxing Data Work: Two-Week Spikes and Accelerate Metrics' - startOffset: 3602 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3602 - endOffset: 3795 -- name: Episode Wrap-Up, Contact Info, and Further Reading - startOffset: 3795 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3795 - endOffset: 3750 --- diff --git a/_podcast/s06e07-product-management-for-machine-learning.md b/_podcast/ml-product-manager-and-mlops-platform-strategy.md similarity index 97% rename from _podcast/s06e07-product-management-for-machine-learning.md rename to _podcast/ml-product-manager-and-mlops-platform-strategy.md index 10ce5f48..593d9b03 100644 --- a/_podcast/s06e07-product-management-for-machine-learning.md +++ b/_podcast/ml-product-manager-and-mlops-platform-strategy.md @@ -1,11 +1,11 @@ --- title: 'Become an ML Product Manager: MLOps Platforms, Observability & Adoption' short: Product Management for Machine Learning +season: 6 +episode: 7 guests: - geojolly image: images/podcast/s06e07-product-management-for-machine-learning.jpg -season: 6 -episode: 7 ids: youtube: PjqjPvHliqg anchor: Product-Management-for-Machine-Learning---Geo-Jolly-e1brpvm @@ -14,6 +14,141 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Product-Management-for-Machine-Learning---Geo-Jolly-e1brpvm spotify: https://open.spotify.com/episode/7zfH4hagZKwoIWmee0AXBd apple: https://podcasts.apple.com/us/podcast/product-management-for-machine-learning-geo-jolly/id1541710331?i=1000545301034 + +description: 'Learn ML Product Manager tactics: MLOps platform strategy, observability KPIs & adoption playbooks to own roadmaps, governance, and stakeholder ROI.' +intro: How do you become an ML product manager and build MLOps platforms that teams actually use? In this episode, Geo Jolly, a Technical PM at Glovo with a background from web/dev to data science and product management, walks through the practical skills and decisions that define the role.

We cover MLOps platform strategy and vendor evaluation, treating internal platform users as customers, and the real productivity costs of poor tooling UX. Geo outlines product manager responsibilities—roadmaps, specs, backlog prioritization—and explains outcome-driven problem definition, avoiding solution bias, and running workshops and interviews to break down complex problems. You’ll hear about ML observability and KPIs to measure platform impact, release governance and rollout timing, adoption strategy for internal stakeholders, and engineering roles needed for platform delivery (CI/CD, K8s, syseng). Practical topics also include model validation and ML quality assurance, embedded data scientists as power users, Agile approaches for data science, and concrete transition paths from data scientist or Scrum Master into technical ML product roles.

Listen to gain actionable guidance on MLOps platforms, observability, adoption strategy, and the technical literacy required to succeed as an ML product manager +topics: +- product management +- machine learning +- MLOps +- leadership +- career growth +dateadded: 2021-12-17 + +duration: PT01H02M46S + +quotableClips: +- name: 'Episode Introduction: Product Management for Machine Learning' + startOffset: 0 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=0 + endOffset: 73 +- name: 'Guest Overview: Geo and episode focus on AI Product Manager role' + startOffset: 73 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=73 + endOffset: 116 +- name: 'Career Journey: From web/dev to data science to product management' + startOffset: 116 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=116 + endOffset: 388 +- name: 'Glovo Role: Leading ML platform strategy and team responsibilities' + startOffset: 388 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=388 + endOffset: 521 +- name: In-house MLOps Platform Strategy & Vendor Evaluation + startOffset: 521 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=521 + endOffset: 590 +- name: 'Product Manager Responsibilities: Roadmap, specs, and stakeholder balance' + startOffset: 590 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=590 + endOffset: 684 +- name: 'Internal Platform Users as Customers: ROI and adoption considerations' + startOffset: 684 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=684 + endOffset: 824 +- name: 'Platform Usability Costs: Productivity losses from poor tooling UX' + startOffset: 824 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=824 + endOffset: 919 +- name: 'Backlog Prioritization: Grooming with engineering and Agile practices' + startOffset: 919 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=919 + endOffset: 1004 +- name: 'Outcome-Driven Problem Definition: Metrics over immediate solutions' + startOffset: 1004 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1004 + endOffset: 1105 +- name: 'ML Observability: KPIs and measuring platform impact' + startOffset: 1105 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1105 + endOffset: 1169 +- name: 'Avoiding Solution Bias: Techniques to resist jumping into solutions' + startOffset: 1169 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1169 + endOffset: 1266 +- name: 'Collaborative Problem Breakdown: Workshops, interviews, and user input' + startOffset: 1266 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1266 + endOffset: 1335 +- name: 'Core PM Skills: Communication, prioritization, and continuous learning' + startOffset: 1335 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1335 + endOffset: 1408 +- name: 'Technical Literacy: Model architectures, data infra, and cloud concepts' + startOffset: 1408 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1408 + endOffset: 1531 +- name: 'Technical Background Value: Why platform PMs need tooling familiarity' + startOffset: 1531 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1531 + endOffset: 1717 +- name: 'Role Differences: Data Science Lead / Staff vs Technical ML Product Manager' + startOffset: 1717 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1717 + endOffset: 1888 +- name: 'Release Governance & Rollout Strategy: Approvals, compliance, and timing' + startOffset: 1888 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1888 + endOffset: 2118 +- name: 'Adoption Strategy: "Time to stakeholders" and internal rollouts' + startOffset: 2118 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2118 + endOffset: 2268 +- name: 'Engineering Roles in ML Platforms: Backend, syseng, CI/CD, and K8s' + startOffset: 2268 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2268 + endOffset: 2414 +- name: 'Embedded Data Scientists: Power users, developer advocates, and demos' + startOffset: 2414 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2414 + endOffset: 2534 +- name: 'Agile for Data Science: Kanban, Scrum, and adapting to research work' + startOffset: 2534 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2534 + endOffset: 2696 +- name: 'Transition Path: Moving from Data Scientist to Technical Product Manager' + startOffset: 2696 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2696 + endOffset: 2983 +- name: 'Recommended Resources: Books and communities for PM transition' + startOffset: 2983 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2983 + endOffset: 3165 +- name: 'Non‑Technical Transitions: Feasibility of moving into ML product roles' + startOffset: 3165 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3165 + endOffset: 3344 +- name: 'User Research for Internal Platforms: Surveys and Happiness Reports' + startOffset: 3344 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3344 + endOffset: 3440 +- name: 'ML Quality Assurance: Model validation, shadowing, and release checklists' + startOffset: 3440 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3440 + endOffset: 3592 +- name: 'Scrum Master to PM Advice: Leverage Agile skills and learn ML basics' + startOffset: 3592 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3592 + endOffset: 3711 +- name: 'Final Thoughts: PM demands, scope, and career realities' + startOffset: 3711 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3711 + endOffset: 3815 +- name: 'Contact & Hiring: Geo on LinkedIn and Glovo opportunities' + startOffset: 3815 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3815 + endOffset: 3766 + transcript: - header: 'Episode Introduction: Product Management for Machine Learning' - header: 'Guest Overview: Geo and episode focus on AI Product Manager role' @@ -1095,147 +1230,6 @@ transcript: sec: 3839 time: '1:03:59' who: Geo -description: 'Learn ML Product Manager tactics: MLOps platform strategy, observability - KPIs & adoption playbooks to own roadmaps, governance, and stakeholder ROI.' -intro: How do you become an ML product manager and build MLOps platforms that teams - actually use? In this episode, Geo Jolly, a Technical PM at Glovo with a background - from web/dev to data science and product management, walks through the practical - skills and decisions that define the role.

We cover MLOps platform strategy - and vendor evaluation, treating internal platform users as customers, and the real - productivity costs of poor tooling UX. Geo outlines product manager responsibilities—roadmaps, - specs, backlog prioritization—and explains outcome-driven problem definition, avoiding - solution bias, and running workshops and interviews to break down complex problems. - You’ll hear about ML observability and KPIs to measure platform impact, release - governance and rollout timing, adoption strategy for internal stakeholders, and - engineering roles needed for platform delivery (CI/CD, K8s, syseng). Practical topics - also include model validation and ML quality assurance, embedded data scientists - as power users, Agile approaches for data science, and concrete transition paths - from data scientist or Scrum Master into technical ML product roles.

Listen - to gain actionable guidance on MLOps platforms, observability, adoption strategy, - and the technical literacy required to succeed as an ML product manager. -dateadded: '2021-12-17' -duration: PT01H02M46S -quotableClips: -- name: 'Episode Introduction: Product Management for Machine Learning' - startOffset: 0 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=0 - endOffset: 73 -- name: 'Guest Overview: Geo and episode focus on AI Product Manager role' - startOffset: 73 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=73 - endOffset: 116 -- name: 'Career Journey: From web/dev to data science to product management' - startOffset: 116 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=116 - endOffset: 388 -- name: 'Glovo Role: Leading ML platform strategy and team responsibilities' - startOffset: 388 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=388 - endOffset: 521 -- name: In-house MLOps Platform Strategy & Vendor Evaluation - startOffset: 521 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=521 - endOffset: 590 -- name: 'Product Manager Responsibilities: Roadmap, specs, and stakeholder balance' - startOffset: 590 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=590 - endOffset: 684 -- name: 'Internal Platform Users as Customers: ROI and adoption considerations' - startOffset: 684 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=684 - endOffset: 824 -- name: 'Platform Usability Costs: Productivity losses from poor tooling UX' - startOffset: 824 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=824 - endOffset: 919 -- name: 'Backlog Prioritization: Grooming with engineering and Agile practices' - startOffset: 919 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=919 - endOffset: 1004 -- name: 'Outcome-Driven Problem Definition: Metrics over immediate solutions' - startOffset: 1004 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1004 - endOffset: 1105 -- name: 'ML Observability: KPIs and measuring platform impact' - startOffset: 1105 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1105 - endOffset: 1169 -- name: 'Avoiding Solution Bias: Techniques to resist jumping into solutions' - startOffset: 1169 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1169 - endOffset: 1266 -- name: 'Collaborative Problem Breakdown: Workshops, interviews, and user input' - startOffset: 1266 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1266 - endOffset: 1335 -- name: 'Core PM Skills: Communication, prioritization, and continuous learning' - startOffset: 1335 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1335 - endOffset: 1408 -- name: 'Technical Literacy: Model architectures, data infra, and cloud concepts' - startOffset: 1408 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1408 - endOffset: 1531 -- name: 'Technical Background Value: Why platform PMs need tooling familiarity' - startOffset: 1531 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1531 - endOffset: 1717 -- name: 'Role Differences: Data Science Lead / Staff vs Technical ML Product Manager' - startOffset: 1717 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1717 - endOffset: 1888 -- name: 'Release Governance & Rollout Strategy: Approvals, compliance, and timing' - startOffset: 1888 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1888 - endOffset: 2118 -- name: 'Adoption Strategy: "Time to stakeholders" and internal rollouts' - startOffset: 2118 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2118 - endOffset: 2268 -- name: 'Engineering Roles in ML Platforms: Backend, syseng, CI/CD, and K8s' - startOffset: 2268 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2268 - endOffset: 2414 -- name: 'Embedded Data Scientists: Power users, developer advocates, and demos' - startOffset: 2414 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2414 - endOffset: 2534 -- name: 'Agile for Data Science: Kanban, Scrum, and adapting to research work' - startOffset: 2534 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2534 - endOffset: 2696 -- name: 'Transition Path: Moving from Data Scientist to Technical Product Manager' - startOffset: 2696 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2696 - endOffset: 2983 -- name: 'Recommended Resources: Books and communities for PM transition' - startOffset: 2983 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2983 - endOffset: 3165 -- name: 'Non‑Technical Transitions: Feasibility of moving into ML product roles' - startOffset: 3165 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3165 - endOffset: 3344 -- name: 'User Research for Internal Platforms: Surveys and Happiness Reports' - startOffset: 3344 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3344 - endOffset: 3440 -- name: 'ML Quality Assurance: Model validation, shadowing, and release checklists' - startOffset: 3440 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3440 - endOffset: 3592 -- name: 'Scrum Master to PM Advice: Leverage Agile skills and learn ML basics' - startOffset: 3592 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3592 - endOffset: 3711 -- name: 'Final Thoughts: PM demands, scope, and career realities' - startOffset: 3711 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3711 - endOffset: 3815 -- name: 'Contact & Hiring: Geo on LinkedIn and Glovo opportunities' - startOffset: 3815 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3815 - endOffset: 3766 --- Links: diff --git a/_podcast/s15e01-why-machine-learning-design-broken.md b/_podcast/ml-system-design.md similarity index 97% rename from _podcast/s15e01-why-machine-learning-design-broken.md rename to _podcast/ml-system-design.md index 72d276fd..7ba823e8 100644 --- a/_podcast/s15e01-why-machine-learning-design-broken.md +++ b/_podcast/ml-system-design.md @@ -1,20 +1,107 @@ --- +title: 'ML System Design Playbook: Fail-Fast Design Docs, Modular Architecture & Data Drift Monitoring' +short: Why Machine Learning Design is Broken +season: 15 episode: 1 guests: - valeriybabushkin +image: images/podcast/s15e01-why-machine-learning-design-broken.jpg ids: anchor: atatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o youtube: 6YBMU6475KQ -image: images/podcast/s15e01-why-machine-learning-design-broken.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o apple: https://podcasts.apple.com/us/podcast/why-machine-learning-design-is-broken-valerii-babushkin/id1541710331?i=1000621176183 spotify: https://open.spotify.com/episode/3KfKptkWIa1hW1hSOvBQaO youtube: https://www.youtube.com/watch?v=6YBMU6475KQ -season: 15 -short: Why Machine Learning Design is Broken -title: 'ML System Design Playbook: Fail-Fast Design Docs, Modular Architecture & Data - Drift Monitoring' + +description: 'Master ML system design: fail-fast design docs, modular architecture & data drift monitoring to cut risk, assign ownership, speed experiments.' +intro: How do you design ML systems that fail fast, scale with modular architecture, and survive data drift in production? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through a practical playbook for ML system design.

We cover why fail-fast design docs act like blueprints to prevent wasted work, how shared and chapter-based design docs enable alignment and versioning, and the maintenance challenges of treating design docs as living artifacts. Valerii explains assigning ownership and mapping the bus factor for risk assessment, incentivizing documentation, and using a 16-chapter ML design template to standardize architecture. On the operational side we dig into monitoring strategies for data drift, concept drift, and prediction drift, plus fallback strategies — redundancy, simple baselines, and serving reliability — to keep models robust. He also points to tools and resources including Evidently AI, templates, and the book.

Listen to gain concrete tactics for fail-fast design docs, modular architecture, data drift monitoring, and baseline solutions you can apply to reduce risk and accelerate ML delivery +topics: +- machine learning +- system design +dateadded: 2023-07-16 + +duration: PT00H59M38S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=0 + endOffset: 126 +- name: 'Guest Introduction: Valerii Babushkin background' + startOffset: 126 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=126 + endOffset: 240 +- name: 'Book Announcement: Machine Learning System Design' + startOffset: 240 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=240 + endOffset: 426 +- name: 'Design Document Purpose: Fail-Fast Principle' + startOffset: 426 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=426 + endOffset: 519 +- name: 'Blueprint Analogy: Preventing Waste with Early Design' + startOffset: 519 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=519 + endOffset: 669 +- name: Prevalence of Missing Documentation in ML Projects + startOffset: 669 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=669 + endOffset: 876 +- name: 'Shared Design Docs: Alignment, Feedback, and Simplicity' + startOffset: 876 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=876 + endOffset: 1141 +- name: 'Design Doc as a Living Artifact: Maintenance Challenges' + startOffset: 1141 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1141 + endOffset: 1477 +- name: 'Accountability & Ownership: Assigning Areas of Responsibility' + startOffset: 1477 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1477 + endOffset: 1919 +- name: 'Bus Factor & Risk Assessment: People Dependency Mapping' + startOffset: 1919 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1919 + endOffset: 2210 +- name: 'Modularity: Chapter-Based Design Docs and Versioning Signals' + startOffset: 2210 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2210 + endOffset: 2461 +- name: 'Incentivizing Documentation: Metrics and Performance Reviews' + startOffset: 2461 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2461 + endOffset: 2633 +- name: 'ML Design Template Overview: 16-Chapter Book Outline' + startOffset: 2633 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2633 + endOffset: 2866 +- name: 'Monitoring: Detecting Data Drift, Concept Drift, and Prediction Drift' + startOffset: 2866 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2866 + endOffset: 3119 +- name: 'Fallback Strategies: Redundancy, Simple Baselines, and Serving Reliability' + startOffset: 3119 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3119 + endOffset: 3313 +- name: 'Baseline Solutions: Start Simple to Validate Hypotheses Quickly' + startOffset: 3313 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3313 + endOffset: 3495 +- name: 'Resources & Tools: Book Discount, Evidently AI, and Templates' + startOffset: 3495 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3495 + endOffset: 3644 +- name: 'Contact: Connect with Valerii on LinkedIn' + startOffset: 3644 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3644 + endOffset: 3704 +- name: Episode Close and Final Takeaways + startOffset: 3704 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3704 + endOffset: 3578 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Valerii Babushkin background' @@ -936,103 +1023,6 @@ transcript: sec: 3704 time: '1:01:44' who: Alexey -description: 'Master ML system design: fail-fast design docs, modular architecture - & data drift monitoring to cut risk, assign ownership, speed experiments.' -intro: How do you design ML systems that fail fast, scale with modular architecture, - and survive data drift in production? In this episode, Valerii Babushkin — Senior - Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and - author of Machine Learning System Design — walks through a practical playbook for - ML system design.

We cover why fail-fast design docs act like blueprints - to prevent wasted work, how shared and chapter-based design docs enable alignment - and versioning, and the maintenance challenges of treating design docs as living - artifacts. Valerii explains assigning ownership and mapping the bus factor for risk - assessment, incentivizing documentation, and using a 16-chapter ML design template - to standardize architecture. On the operational side we dig into monitoring strategies - for data drift, concept drift, and prediction drift, plus fallback strategies — - redundancy, simple baselines, and serving reliability — to keep models robust. He - also points to tools and resources including Evidently AI, templates, and the book. -

Listen to gain concrete tactics for fail-fast design docs, modular architecture, - data drift monitoring, and baseline solutions you can apply to reduce risk and accelerate - ML delivery. -dateadded: '2023-07-16' -duration: PT00H59M38S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=0 - endOffset: 126 -- name: 'Guest Introduction: Valerii Babushkin background' - startOffset: 126 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=126 - endOffset: 240 -- name: 'Book Announcement: Machine Learning System Design' - startOffset: 240 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=240 - endOffset: 426 -- name: 'Design Document Purpose: Fail-Fast Principle' - startOffset: 426 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=426 - endOffset: 519 -- name: 'Blueprint Analogy: Preventing Waste with Early Design' - startOffset: 519 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=519 - endOffset: 669 -- name: Prevalence of Missing Documentation in ML Projects - startOffset: 669 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=669 - endOffset: 876 -- name: 'Shared Design Docs: Alignment, Feedback, and Simplicity' - startOffset: 876 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=876 - endOffset: 1141 -- name: 'Design Doc as a Living Artifact: Maintenance Challenges' - startOffset: 1141 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1141 - endOffset: 1477 -- name: 'Accountability & Ownership: Assigning Areas of Responsibility' - startOffset: 1477 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1477 - endOffset: 1919 -- name: 'Bus Factor & Risk Assessment: People Dependency Mapping' - startOffset: 1919 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1919 - endOffset: 2210 -- name: 'Modularity: Chapter-Based Design Docs and Versioning Signals' - startOffset: 2210 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2210 - endOffset: 2461 -- name: 'Incentivizing Documentation: Metrics and Performance Reviews' - startOffset: 2461 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2461 - endOffset: 2633 -- name: 'ML Design Template Overview: 16-Chapter Book Outline' - startOffset: 2633 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2633 - endOffset: 2866 -- name: 'Monitoring: Detecting Data Drift, Concept Drift, and Prediction Drift' - startOffset: 2866 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2866 - endOffset: 3119 -- name: 'Fallback Strategies: Redundancy, Simple Baselines, and Serving Reliability' - startOffset: 3119 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3119 - endOffset: 3313 -- name: 'Baseline Solutions: Start Simple to Validate Hypotheses Quickly' - startOffset: 3313 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3313 - endOffset: 3495 -- name: 'Resources & Tools: Book Discount, Evidently AI, and Templates' - startOffset: 3495 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3495 - endOffset: 3644 -- name: 'Contact: Connect with Valerii on LinkedIn' - startOffset: 3644 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3644 - endOffset: 3704 -- name: Episode Close and Final Takeaways - startOffset: 3704 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3704 - endOffset: 3578 --- Links: diff --git a/_podcast/s02e12-communities.md b/_podcast/mlops-community-building-and-meetups.md similarity index 97% rename from _podcast/s02e12-communities.md rename to _podcast/mlops-community-building-and-meetups.md index 7fc4194e..d79eb1ac 100644 --- a/_podcast/s02e12-communities.md +++ b/_podcast/mlops-community-building-and-meetups.md @@ -1,11 +1,11 @@ --- title: 'MLOps Community Playbook: Launch, Grow & Retain Meetups, Members, and Contributors' short: Building Online Tech Communities +season: 2 +episode: 12 guests: - demetriosbrinkmann image: images/podcast/s02e12-communities.jpg -season: 2 -episode: 12 ids: youtube: ByCE1vSrIr8 anchor: Building-Online-Tech-Communities---Demetrios-Brinkmann-eu35fo @@ -14,6 +14,118 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-Online-Tech-Communities---Demetrios-Brinkmann-eu35fo spotify: https://open.spotify.com/episode/58Xe9PCfdz26CVuYKtZWUE apple: https://podcasts.apple.com/us/podcast/building-online-tech-communities-demetrios-brinkmann/id1541710331?i=1000515510103 + +description: 'Master MLOps meetups: launch communities, recruit contributors, and boost member retention with LinkedIn outreach, content strategy, and practical checklists.' +intro: 'How do you launch, grow, and retain an MLOps community that moves from meetups to a sustainable, contributor-led ecosystem? In this episode, Demetrios Brinkmann — who has led the MLOps community since April 2020 and now runs the largest active group with 2,500+ Slack members and 25k YouTube views — walks through a practical community playbook for MLOps meetups, members, and contributors.

We trace his origin story and pivot to meetups and podcasting, then dive into concrete tactics: recruiting speakers with sales techniques, LinkedIn outreach and cold DMs, weekly meetup and content strategies, editing and YouTube clips, and milestone growth from 500 to 3k members. Demetrios also addresses moderation challenges, evolving from founder-led to peer-to-peer governance, cultivating core contributors and advisory groups, and building belonging through Q&A, social channels, and non-technical spaces.

Listeners will get actionable retention strategies (giveaways, multi-format content, avoiding gamification), practical checklists for platform, purpose, audience, and content, and tips for member connections like Random Coffee and sprints. If you’re building an MLOps community or scaling technical meetups, this episode offers a focused, tactical roadmap. Find next steps at mlops.community.' +topics: +- MLOps +- community building +dateadded: 2021-04-02 + +duration: PT01H13M56S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=0 + endOffset: 88 +- name: 'Origin Story: Launching the MLOps community' + startOffset: 88 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=88 + endOffset: 126 +- name: Pivot to meetups and turning events into a podcast + startOffset: 126 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=126 + endOffset: 397 +- name: Early hosting lessons and interview craft + startOffset: 397 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=397 + endOffset: 501 +- name: 'Founder Background: teaching, sales, and career pivot' + startOffset: 501 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=501 + endOffset: 641 +- name: Sales techniques for recruiting speakers and guests + startOffset: 641 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=641 + endOffset: 789 +- name: 'Early growth tactics: LinkedIn outreach and cold DMs' + startOffset: 789 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=789 + endOffset: 842 +- name: 'Content Strategy: weekly meetups, editing, and YouTube clips' + startOffset: 842 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=842 + endOffset: 1101 +- name: 'Growth Milestones: hitting 500, 1k, 2k, 3k members' + startOffset: 1101 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1101 + endOffset: 1250 +- name: 'Moderation Challenges: vendors, spam, and code of conduct' + startOffset: 1250 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1250 + endOffset: 1497 +- name: 'Community Evolution: moving from founder-led to peer-to-peer' + startOffset: 1497 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1497 + endOffset: 1645 +- name: Cultivating Core Contributors and advisory groups + startOffset: 1645 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1645 + endOffset: 1751 +- name: 'Fostering Belonging: Q&A, social channels, and non-technical spaces' + startOffset: 1751 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1751 + endOffset: 2064 +- name: 'Introvert Founders: starting and sustaining communities as an introvert' + startOffset: 2064 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2064 + endOffset: 2436 +- name: 'Retention Strategies: giveaways, multi-format content, and avoiding gamification' + startOffset: 2436 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2436 + endOffset: 2745 +- name: 'Customer Development (custdev): surveys, incentives, and feedback cadence' + startOffset: 2745 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2745 + endOffset: 3051 +- name: 'Member Connections: Random Coffee, Donut, and one-on-ones' + startOffset: 3051 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3051 + endOffset: 3304 +- name: 'Organizing Initiatives: sprints, autonomy, and many-to-many engagement' + startOffset: 3304 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3304 + endOffset: 3617 +- name: 'Team Structure: core volunteers vs. broader contributors' + startOffset: 3617 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3617 + endOffset: 3652 +- name: 'Community Economics: treating a community as an educational business' + startOffset: 3652 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3652 + endOffset: 3851 +- name: 'Practical Checklist: platform, purpose, audience, and content plan' + startOffset: 3851 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3851 + endOffset: 4046 +- name: 'Sourcing Members: LinkedIn, Reddit, YouTube, and platform-specific channels' + startOffset: 4046 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4046 + endOffset: 4178 +- name: 'Final Advice: actionable takeaways for new community builders' + startOffset: 4178 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4178 + endOffset: 4348 +- name: 'Get Involved: where to find mlops.community and next steps' + startOffset: 4348 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4348 + endOffset: 4436 +- name: Podcast Closing Remarks + startOffset: 4436 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4436 + endOffset: 4436 + transcript: - header: Podcast Introduction - line: Should I start or you want to start? @@ -1093,127 +1205,6 @@ transcript: sec: 4436 time: '1:13:56' who: Alexey -description: 'Master MLOps meetups: launch communities, recruit contributors, and - boost member retention with LinkedIn outreach, content strategy, and practical checklists.' -intro: 'How do you launch, grow, and retain an MLOps community that moves from meetups - to a sustainable, contributor-led ecosystem? In this episode, Demetrios Brinkmann - — who has led the MLOps community since April 2020 and now runs the largest active - group with 2,500+ Slack members and 25k YouTube views — walks through a practical - community playbook for MLOps meetups, members, and contributors.

We trace - his origin story and pivot to meetups and podcasting, then dive into concrete tactics: - recruiting speakers with sales techniques, LinkedIn outreach and cold DMs, weekly - meetup and content strategies, editing and YouTube clips, and milestone growth from - 500 to 3k members. Demetrios also addresses moderation challenges, evolving from - founder-led to peer-to-peer governance, cultivating core contributors and advisory - groups, and building belonging through Q&A, social channels, and non-technical spaces. -

Listeners will get actionable retention strategies (giveaways, multi-format - content, avoiding gamification), practical checklists for platform, purpose, audience, - and content, and tips for member connections like Random Coffee and sprints. If - you’re building an MLOps community or scaling technical meetups, this episode offers - a focused, tactical roadmap. Find next steps at mlops.community.' -dateadded: '2021-04-02' -duration: PT01H13M56S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=0 - endOffset: 88 -- name: 'Origin Story: Launching the MLOps community' - startOffset: 88 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=88 - endOffset: 126 -- name: Pivot to meetups and turning events into a podcast - startOffset: 126 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=126 - endOffset: 397 -- name: Early hosting lessons and interview craft - startOffset: 397 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=397 - endOffset: 501 -- name: 'Founder Background: teaching, sales, and career pivot' - startOffset: 501 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=501 - endOffset: 641 -- name: Sales techniques for recruiting speakers and guests - startOffset: 641 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=641 - endOffset: 789 -- name: 'Early growth tactics: LinkedIn outreach and cold DMs' - startOffset: 789 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=789 - endOffset: 842 -- name: 'Content Strategy: weekly meetups, editing, and YouTube clips' - startOffset: 842 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=842 - endOffset: 1101 -- name: 'Growth Milestones: hitting 500, 1k, 2k, 3k members' - startOffset: 1101 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1101 - endOffset: 1250 -- name: 'Moderation Challenges: vendors, spam, and code of conduct' - startOffset: 1250 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1250 - endOffset: 1497 -- name: 'Community Evolution: moving from founder-led to peer-to-peer' - startOffset: 1497 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1497 - endOffset: 1645 -- name: Cultivating Core Contributors and advisory groups - startOffset: 1645 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1645 - endOffset: 1751 -- name: 'Fostering Belonging: Q&A, social channels, and non-technical spaces' - startOffset: 1751 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1751 - endOffset: 2064 -- name: 'Introvert Founders: starting and sustaining communities as an introvert' - startOffset: 2064 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2064 - endOffset: 2436 -- name: 'Retention Strategies: giveaways, multi-format content, and avoiding gamification' - startOffset: 2436 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2436 - endOffset: 2745 -- name: 'Customer Development (custdev): surveys, incentives, and feedback cadence' - startOffset: 2745 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2745 - endOffset: 3051 -- name: 'Member Connections: Random Coffee, Donut, and one-on-ones' - startOffset: 3051 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3051 - endOffset: 3304 -- name: 'Organizing Initiatives: sprints, autonomy, and many-to-many engagement' - startOffset: 3304 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3304 - endOffset: 3617 -- name: 'Team Structure: core volunteers vs. broader contributors' - startOffset: 3617 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3617 - endOffset: 3652 -- name: 'Community Economics: treating a community as an educational business' - startOffset: 3652 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3652 - endOffset: 3851 -- name: 'Practical Checklist: platform, purpose, audience, and content plan' - startOffset: 3851 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3851 - endOffset: 4046 -- name: 'Sourcing Members: LinkedIn, Reddit, YouTube, and platform-specific channels' - startOffset: 4046 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4046 - endOffset: 4178 -- name: 'Final Advice: actionable takeaways for new community builders' - startOffset: 4178 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4178 - endOffset: 4348 -- name: 'Get Involved: where to find mlops.community and next steps' - startOffset: 4348 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4348 - endOffset: 4436 -- name: Podcast Closing Remarks - startOffset: 4436 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4436 - endOffset: 4436 --- We talked about: diff --git a/_podcast/s02e05-feature-stores.md b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md similarity index 93% rename from _podcast/s02e05-feature-stores.md rename to _podcast/mlops-feature-stores-feature-stores-feast-tecton.md index 3fbdea0c..76956b35 100644 --- a/_podcast/s02e05-feature-stores.md +++ b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md @@ -1,24 +1,11 @@ --- title: 'Feature Stores for MLOps: Real-Time Feature Engineering, Feast & Tecton Guide' short: Feature Stores in MLOps Explained -description: Discover feature store use cases, real-time features with Feast & Tecton, - build scalable MLOps to speed production, cut duplication and detect drift. -tags: -- feature-stores -- feast -- tecton -- mlops -- machine-learning -- data-science -- ml-platform -- ml-architecture -- willem-pienaar -- gojek +season: 2 +episode: 5 guests: - willempienaar image: images/podcast/s02e05-feature-stores.jpg -season: 2 -episode: 5 ids: youtube: FQYTb4uWljQ anchor: Feature-Stores-Cutting-through-the-Hype---Willem-Pienaar-ept6m8/a-a4hlg3r @@ -27,23 +14,17 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Feature-Stores-Cutting-through-the-Hype---Willem-Pienaar-ept6m8/a-a4hlg3r spotify: https://open.spotify.com/episode/05YnfTWbplXwOwicR2doy3 apple: https://podcasts.apple.com/us/podcast/feature-stores-cutting-through-the-hype-willem-pienaar/id1541710331?i=1000508782957 -intro: How do you reliably build and serve real‑time features for production ML without - rework, duplication, or training/serving skew? In this episode, Willem Pienaar — - engineering lead at Tecton and creator of Feast — walks through what feature stores - solve in MLOps and how they enable real‑time feature engineering. We define feature - stores, compare feature creation vs retrieval (SQL, Python, APIs, on‑demand transforms), - and illustrate a production real‑time fraud detection lookup. Willem separates hype - from value, explains organizational challenges like team silos and speed to production, - and outlines the platform role across materialization, serving, and validation. -

You’ll get practical coverage of Feast (open‑source) and Tecton (enterprise), - architecture components (transform engine, storage, serving, registry, monitoring), - and when online tabular use cases require a feature store versus when it’s overkill. - The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch - (Flink, Spark), validation and monitoring (drift detection, Great Expectations, - TFDV), backfilling strategies, ownership and governance, and getting started resources - (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete - next steps for productionizing features in your MLOps stack. -dateadded: '2021-02-23' + +description: Discover feature store use cases, real-time features with Feast & Tecton, build scalable MLOps to speed production, cut duplication and detect drift +intro: How do you reliably build and serve real‑time features for production ML without rework, duplication, or training/serving skew? In this episode, Willem Pienaar — engineering lead at Tecton and creator of Feast — walks through what feature stores solve in MLOps and how they enable real‑time feature engineering. We define feature stores, compare feature creation vs retrieval (SQL, Python, APIs, on‑demand transforms), and illustrate a production real‑time fraud detection lookup. Willem separates hype from value, explains organizational challenges like team silos and speed to production, and outlines the platform role across materialization, serving, and validation.

You’ll get practical coverage of Feast (open‑source) and Tecton (enterprise), architecture components (transform engine, storage, serving, registry, monitoring), and when online tabular use cases require a feature store versus when it’s overkill. The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch (Flink, Spark), validation and monitoring (drift detection, Great Expectations, TFDV), backfilling strategies, ownership and governance, and getting started resources (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete next steps for productionizing features in your MLOps stack +topics: +- machine learning +- MLOps +- feature stores +- tools +dateadded: 2021-02-23 + + quotableClips: - name: 'Episode Introduction: Feature Stores in MLOps' startOffset: 0 @@ -133,6 +114,7 @@ quotableClips: startOffset: 3450 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=3450 endOffset: 3450 + --- In this episode, we dive deeper into feature stores with Willem, creator of Feast (an open-source feature store). Previously, Willem led the Data Science Platform team at Gojek and now works at Tecton, which develops feature store technology. diff --git a/_podcast/s02e04-mlops.md b/_podcast/mlops-kubeflow-model-monitoring.md similarity index 97% rename from _podcast/s02e04-mlops.md rename to _podcast/mlops-kubeflow-model-monitoring.md index d2ec47b3..0f8b9862 100644 --- a/_podcast/s02e04-mlops.md +++ b/_podcast/mlops-kubeflow-model-monitoring.md @@ -1,11 +1,11 @@ --- title: 'Mastering MLOps: Kubeflow Pipelines, Model Monitoring & Automated Retraining' short: The Rise of MLOps +season: 2 +episode: 4 guests: - theofilospapapanagiotou image: images/podcast/s02e04-mlops.jpg -season: 2 -episode: 4 ids: youtube: -i0fVp0ntYA anchor: The-Rise-of-MLOps---Theofilos-Papapanagiotou-ept67o @@ -14,6 +14,152 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-Rise-of-MLOps---Theofilos-Papapanagiotou-ept67o spotify: https://open.spotify.com/episode/3YPvzGQnfxl7Mo1VKE0l1K apple: https://podcasts.apple.com/us/podcast/the-rise-of-mlops-theofilos-papapanagiotou/id1541710331?i=1000507907719 + +description: Master MLOps with Kubeflow pipelines and automated retraining—detect drift, accelerate deployment, and boost production model reliability for faster iteration +intro: How do you build reliable, production-ready ML pipelines that detect model drift, monitor fairness, and trigger automated retraining? In this episode, Theofilos Papapanagiotou — a systems engineer with 20 years’ experience (from Unix engineering to ML engineering) now helping companies run ML workloads and a Kubeflow enthusiast — walks through practical MLOps strategies and tooling.

We define MLOps as culture, process, and technology and contrast it with DevOps across the model lifecycle. Key topics include model monitoring for drift and fairness, inference sensors and a Prometheus/Grafana monitoring stack, commoditizing inference monitoring for faster iteration, and maturity levels from manual training to automated retraining. Theofilos dives into the Kubeflow ecosystem — Pipelines, KFServing, Feast, Katib, and integrations with TFX — plus hyperparameter search, notebook→pipeline workflows, MLMD metadata for data and model versioning, and tradeoffs for small teams and edge/mobile deployments.

If you’re implementing Kubeflow pipelines, setting up model monitoring, or planning automated retraining, this episode offers practical guidance, maturity roadmaps, and resources to help you move from prototypes to reproducible, production ML +topics: +- MLOps +- machine learning +- production +- tools +dateadded: 2021-02-23 + +duration: PT01H02M48S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=0 + endOffset: 154 +- name: Episode Kickoff & Guest Overview + startOffset: 154 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=154 + endOffset: 210 +- name: 'Guest Background: From Unix Engineer to ML Engineering' + startOffset: 210 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=210 + endOffset: 314 +- name: 'Defining MLOps: Culture, Process, and Technology' + startOffset: 314 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=314 + endOffset: 448 +- name: 'DevOps vs MLOps: Model Lifecycle and Data Drift' + startOffset: 448 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=448 + endOffset: 677 +- name: 'Monitoring for MLOps: Drift, Fairness, and Retraining Triggers' + startOffset: 677 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=677 + endOffset: 784 +- name: 'Monitoring Stack: Prometheus/Grafana and Inference Sensors' + startOffset: 784 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=784 + endOffset: 884 +- name: Commoditizing Inference Monitoring for Faster Iteration + startOffset: 884 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=884 + endOffset: 929 +- name: 'Role Distinction: ML Engineer as Practitioner, MLOps as Practice' + startOffset: 929 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=929 + endOffset: 997 +- name: 'Team Composition: Developer, Operator, and Product in MLOps' + startOffset: 997 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=997 + endOffset: 1208 +- name: 'The "MLOps Engineer" Debate: Title vs. Cross‑Functional Teams' + startOffset: 1208 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1208 + endOffset: 1427 +- name: MLOps Job Signals & Maturity Models (Google and Microsoft) + startOffset: 1427 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1427 + endOffset: 1621 +- name: 'Maturity Levels: Manual Training → Pipeline Automation' + startOffset: 1621 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1621 + endOffset: 1808 +- name: 'Advanced Maturity: Data‑Driven Triggers and Automated Retraining' + startOffset: 1808 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1808 + endOffset: 2007 +- name: 'Cultural Shift: Monitoring as a Source of New Training Data' + startOffset: 2007 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2007 + endOffset: 2065 +- name: 'Tooling Landscape: Vendors, Open Source, and Kubernetes' + startOffset: 2065 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2065 + endOffset: 2226 +- name: 'Kubeflow Ecosystem: Pipelines, KFServing, Feast, and Katib' + startOffset: 2226 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2226 + endOffset: 2412 +- name: Hyperparameter Search with Katib and Notebook→Pipeline Workflows + startOffset: 2412 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2412 + endOffset: 2548 +- name: 'Kubeflow & TFX: ML Orchestration and Production Patterns' + startOffset: 2548 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2548 + endOffset: 2608 +- name: 'Learning Kubeflow: Docs, Workshops, and Community Resources' + startOffset: 2608 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2608 + endOffset: 2761 +- name: 'Getting Started: Cloud‑Managed Pipelines and Simple Projects' + startOffset: 2761 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2761 + endOffset: 2818 +- name: 'Data & Model Versioning: MLMD, Metadata, and Traceability' + startOffset: 2818 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2818 + endOffset: 3035 +- name: 'Relationship to DataOps: Continuation and Divergence' + startOffset: 3035 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3035 + endOffset: 3104 +- name: 'Edge & Mobile Deployment: Offline Models and Edge Kubernetes' + startOffset: 3104 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3104 + endOffset: 3258 +- name: 'MLOps Guidance: Maturity Roadmaps and Manifesto Alternatives' + startOffset: 3258 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3258 + endOffset: 3313 +- name: 'Why Learn Kubeflow: Community Contribution and Career Value' + startOffset: 3313 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3313 + endOffset: 3424 +- name: 'MLOps Benefits: Automation, Productization, and Productivity' + startOffset: 3424 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3424 + endOffset: 3505 +- name: 'AutoML & Katib: Commoditization vs. Empowering Data Scientists' + startOffset: 3505 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3505 + endOffset: 3589 +- name: 'Simplified Serving: KFServing and Faster Model Endpoints' + startOffset: 3589 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3589 + endOffset: 3684 +- name: 'Small Teams Adopting MLOps: Practical Examples and Tradeoffs' + startOffset: 3684 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3684 + endOffset: 3776 +- name: 'Breaking Silos: Language‑Agnostic Pipelines and Collaboration' + startOffset: 3776 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3776 + endOffset: 3899 +- name: Closing Remarks & Resource Links + startOffset: 3899 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3899 + endOffset: 3922 +- name: Episode End + startOffset: 3922 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3922 + endOffset: 3768 + transcript: - header: Podcast Introduction - header: Episode Kickoff & Guest Overview @@ -1117,158 +1263,4 @@ transcript: sec: 3922 time: '1:05:22' who: 'Theo:' -description: Master MLOps with Kubeflow pipelines and automated retraining—detect - drift, accelerate deployment, and boost production model reliability for faster - iteration. -intro: How do you build reliable, production-ready ML pipelines that detect model - drift, monitor fairness, and trigger automated retraining? In this episode, Theofilos - Papapanagiotou — a systems engineer with 20 years’ experience (from Unix engineering - to ML engineering) now helping companies run ML workloads and a Kubeflow enthusiast - — walks through practical MLOps strategies and tooling.

We define MLOps - as culture, process, and technology and contrast it with DevOps across the model - lifecycle. Key topics include model monitoring for drift and fairness, inference - sensors and a Prometheus/Grafana monitoring stack, commoditizing inference monitoring - for faster iteration, and maturity levels from manual training to automated retraining. - Theofilos dives into the Kubeflow ecosystem — Pipelines, KFServing, Feast, Katib, - and integrations with TFX — plus hyperparameter search, notebook→pipeline workflows, - MLMD metadata for data and model versioning, and tradeoffs for small teams and edge/mobile - deployments.

If you’re implementing Kubeflow pipelines, setting up model - monitoring, or planning automated retraining, this episode offers practical guidance, - maturity roadmaps, and resources to help you move from prototypes to reproducible, - production ML. -dateadded: '2021-02-23' -duration: PT01H02M48S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=0 - endOffset: 154 -- name: Episode Kickoff & Guest Overview - startOffset: 154 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=154 - endOffset: 210 -- name: 'Guest Background: From Unix Engineer to ML Engineering' - startOffset: 210 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=210 - endOffset: 314 -- name: 'Defining MLOps: Culture, Process, and Technology' - startOffset: 314 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=314 - endOffset: 448 -- name: 'DevOps vs MLOps: Model Lifecycle and Data Drift' - startOffset: 448 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=448 - endOffset: 677 -- name: 'Monitoring for MLOps: Drift, Fairness, and Retraining Triggers' - startOffset: 677 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=677 - endOffset: 784 -- name: 'Monitoring Stack: Prometheus/Grafana and Inference Sensors' - startOffset: 784 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=784 - endOffset: 884 -- name: Commoditizing Inference Monitoring for Faster Iteration - startOffset: 884 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=884 - endOffset: 929 -- name: 'Role Distinction: ML Engineer as Practitioner, MLOps as Practice' - startOffset: 929 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=929 - endOffset: 997 -- name: 'Team Composition: Developer, Operator, and Product in MLOps' - startOffset: 997 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=997 - endOffset: 1208 -- name: 'The "MLOps Engineer" Debate: Title vs. Cross‑Functional Teams' - startOffset: 1208 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1208 - endOffset: 1427 -- name: MLOps Job Signals & Maturity Models (Google and Microsoft) - startOffset: 1427 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1427 - endOffset: 1621 -- name: 'Maturity Levels: Manual Training → Pipeline Automation' - startOffset: 1621 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1621 - endOffset: 1808 -- name: 'Advanced Maturity: Data‑Driven Triggers and Automated Retraining' - startOffset: 1808 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1808 - endOffset: 2007 -- name: 'Cultural Shift: Monitoring as a Source of New Training Data' - startOffset: 2007 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2007 - endOffset: 2065 -- name: 'Tooling Landscape: Vendors, Open Source, and Kubernetes' - startOffset: 2065 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2065 - endOffset: 2226 -- name: 'Kubeflow Ecosystem: Pipelines, KFServing, Feast, and Katib' - startOffset: 2226 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2226 - endOffset: 2412 -- name: Hyperparameter Search with Katib and Notebook→Pipeline Workflows - startOffset: 2412 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2412 - endOffset: 2548 -- name: 'Kubeflow & TFX: ML Orchestration and Production Patterns' - startOffset: 2548 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2548 - endOffset: 2608 -- name: 'Learning Kubeflow: Docs, Workshops, and Community Resources' - startOffset: 2608 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2608 - endOffset: 2761 -- name: 'Getting Started: Cloud‑Managed Pipelines and Simple Projects' - startOffset: 2761 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2761 - endOffset: 2818 -- name: 'Data & Model Versioning: MLMD, Metadata, and Traceability' - startOffset: 2818 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2818 - endOffset: 3035 -- name: 'Relationship to DataOps: Continuation and Divergence' - startOffset: 3035 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3035 - endOffset: 3104 -- name: 'Edge & Mobile Deployment: Offline Models and Edge Kubernetes' - startOffset: 3104 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3104 - endOffset: 3258 -- name: 'MLOps Guidance: Maturity Roadmaps and Manifesto Alternatives' - startOffset: 3258 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3258 - endOffset: 3313 -- name: 'Why Learn Kubeflow: Community Contribution and Career Value' - startOffset: 3313 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3313 - endOffset: 3424 -- name: 'MLOps Benefits: Automation, Productization, and Productivity' - startOffset: 3424 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3424 - endOffset: 3505 -- name: 'AutoML & Katib: Commoditization vs. Empowering Data Scientists' - startOffset: 3505 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3505 - endOffset: 3589 -- name: 'Simplified Serving: KFServing and Faster Model Endpoints' - startOffset: 3589 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3589 - endOffset: 3684 -- name: 'Small Teams Adopting MLOps: Practical Examples and Tradeoffs' - startOffset: 3684 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3684 - endOffset: 3776 -- name: 'Breaking Silos: Language‑Agnostic Pipelines and Collaboration' - startOffset: 3776 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3776 - endOffset: 3899 -- name: Closing Remarks & Resource Links - startOffset: 3899 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3899 - endOffset: 3922 -- name: Episode End - startOffset: 3922 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3922 - endOffset: 3768 --- diff --git a/_podcast/s10e03-mlops-architect.md b/_podcast/mlops-model-monitoring-data-observability.md similarity index 98% rename from _podcast/s10e03-mlops-architect.md rename to _podcast/mlops-model-monitoring-data-observability.md index b946de87..5680e3cb 100644 --- a/_podcast/s10e03-mlops-architect.md +++ b/_podcast/mlops-model-monitoring-data-observability.md @@ -1,19 +1,136 @@ --- +title: 'MLOps Architect Guide: Production Model Monitoring, Data Observability & Tooling' +short: MLOps Architect +season: 10 episode: 3 guests: - dannyleybzon +image: images/podcast/s10e03-mlops-architect.jpg ids: anchor: MLOps-Architect---Danny-Leybzon-e1m81iu youtube: p1gVaS4Zx5M -image: images/podcast/s10e03-mlops-architect.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/MLOps-Architect---Danny-Leybzon-e1m81iu apple: https://podcasts.apple.com/us/podcast/mlops-architect-danny-leybzon/id1541710331?i=1000575901051 spotify: https://open.spotify.com/episode/5gz5lnS7onwRUtbcmpOSuU?si=8cbe799f284c4623 youtube: https://www.youtube.com/watch?v=p1gVaS4Zx5M -season: 10 -short: MLOps Architect -title: 'MLOps Architect Guide: Production Model Monitoring, Data Observability & Tooling' + +description: Master MLOps, model monitoring & data observability with guidance on production observability, ETL root causes, tooling trade-offs, ONNX, build vs buy +intro: How do you keep machine learning models reliable in production — what should you monitor, where do upstream failures originate, and which tooling decisions actually matter? In this episode, Danny Leybzon, MLOps Architect at WhyLabs and computational statistics alum of UCLA, walks through the practical challenges of production model monitoring, data observability, and tooling trade-offs. Drawing on his path from analyst and product roles at Qubole to field engineering at Imply and now advising customers on observability, Danny defines the MLOps Architect as a technical-business bridge and explains how to prioritize production-first monitoring efforts.

Topics covered include scope of observability across ETL and data pipelines, data profiling architecture (WhyLogs, profiles, Apache Druid), build vs buy decisions, platform-agnostic integrations and ONNX interoperability, and trends around cloud-native stacks and vendor lock-in. He also offers hiring and career perspectives for MLOps roles and research priorities like fairness and segmentation. Listen to get concrete guidance on designing model monitoring, choosing observability tooling, and identifying upstream root causes so you can reduce incidents and improve model reliability in production +topics: +- MLOps +- tools +- data engineering +dateadded: 2022-08-13 + +duration: PT00H57M51S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=0 + endOffset: 116 +- name: 'Guest Overview: Danny Leybzon, MLOps Architect at WhyLabs' + startOffset: 116 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=116 + endOffset: 192 +- name: 'Career Journey: From paralegal ambitions to statistics and machine learning' + startOffset: 192 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=192 + endOffset: 446 +- name: 'Prior Role: Field Engineer / Solutions Engineer experience' + startOffset: 446 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=446 + endOffset: 491 +- name: 'Role Definition: MLOps Architect as technical-business bridge' + startOffset: 491 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=491 + endOffset: 632 +- name: 'Architecture Advising: Tooling trade-offs and landscape navigation' + startOffset: 632 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=632 + endOffset: 770 +- name: 'Role Popularity: Uniqueness of the "MLOps Architect" title' + startOffset: 770 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=770 + endOffset: 830 +- name: 'Startup Reality: Wearing many hats in early-stage companies' + startOffset: 830 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=830 + endOffset: 935 +- name: 'Demonstrating Versatility: Convincing employers you can do it all' + startOffset: 935 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=935 + endOffset: 1101 +- name: 'Hiring Story: Cross-functional interview process at WhyLabs' + startOffset: 1101 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1101 + endOffset: 1324 +- name: 'Career Decision: Choosing startup risk for growth and learning' + startOffset: 1324 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1324 + endOffset: 1504 +- name: 'Prioritization Strategy: Focusing on production and model monitoring' + startOffset: 1504 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1504 + endOffset: 1655 +- name: 'Observability Scope: ETL, data pipelines, and upstream root causes' + startOffset: 1655 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1655 + endOffset: 1739 +- name: 'Customer Profiles: Production-first vs pre-deployment teams' + startOffset: 1739 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1739 + endOffset: 1839 +- name: 'Market Education: Shift from "why monitor" to "how to monitor"' + startOffset: 1839 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1839 + endOffset: 1910 +- name: 'Data Profiling Architecture: WhyLogs, profiles, and Apache Druid backend' + startOffset: 1910 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1910 + endOffset: 2065 +- name: 'Build vs Buy: Guiding customers on tooling and procurement decisions' + startOffset: 2065 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2065 + endOffset: 2207 +- name: 'Platform Agnostic Integrations: Serving and inference tooling realities' + startOffset: 2207 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2207 + endOffset: 2281 +- name: 'ONNX Adoption: Interoperability use cases and industry uptake' + startOffset: 2281 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2281 + endOffset: 2350 +- name: 'Tooling Trends: Cloud-native stacks, heterogeneity, and vendor lock-in' + startOffset: 2350 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2350 + endOffset: 2460 +- name: 'Research Focus: Fairness, bias, segmentation over explainability' + startOffset: 2460 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2460 + endOffset: 2587 +- name: 'Productivity Habits: Inbox zero, workspace windows, and task management' + startOffset: 2587 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2587 + endOffset: 2749 +- name: 'Career Strategy: Exploration vs exploitation and Thompson sampling analogy' + startOffset: 2749 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2749 + endOffset: 3023 +- name: 'Skillset Advice: Coding, communication, and being an effective Googler' + startOffset: 3023 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3023 + endOffset: 3350 +- name: 'WhyLogs vs WhyLabs: Open-source profiling vs SaaS observability' + startOffset: 3350 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3350 + endOffset: 3487 +- name: 'Closing Remarks: Final thoughts, contact info, and upcoming workshops' + startOffset: 3487 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3487 + endOffset: 3471 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Danny Leybzon, MLOps Architect at WhyLabs' @@ -1271,131 +1388,6 @@ transcript: sec: 3587 time: '59:47' who: Danny -description: Master MLOps, model monitoring & data observability with guidance on - production observability, ETL root causes, tooling trade-offs, ONNX, build vs buy. -intro: How do you keep machine learning models reliable in production — what should - you monitor, where do upstream failures originate, and which tooling decisions actually - matter? In this episode, Danny Leybzon, MLOps Architect at WhyLabs and computational - statistics alum of UCLA, walks through the practical challenges of production model - monitoring, data observability, and tooling trade-offs. Drawing on his path from - analyst and product roles at Qubole to field engineering at Imply and now advising - customers on observability, Danny defines the MLOps Architect as a technical-business - bridge and explains how to prioritize production-first monitoring efforts.

- Topics covered include scope of observability across ETL and data pipelines, data - profiling architecture (WhyLogs, profiles, Apache Druid), build vs buy decisions, - platform-agnostic integrations and ONNX interoperability, and trends around cloud-native - stacks and vendor lock-in. He also offers hiring and career perspectives for MLOps - roles and research priorities like fairness and segmentation. Listen to get concrete - guidance on designing model monitoring, choosing observability tooling, and identifying - upstream root causes so you can reduce incidents and improve model reliability in - production. -dateadded: '2022-08-13' -duration: PT00H57M51S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=0 - endOffset: 116 -- name: 'Guest Overview: Danny Leybzon, MLOps Architect at WhyLabs' - startOffset: 116 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=116 - endOffset: 192 -- name: 'Career Journey: From paralegal ambitions to statistics and machine learning' - startOffset: 192 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=192 - endOffset: 446 -- name: 'Prior Role: Field Engineer / Solutions Engineer experience' - startOffset: 446 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=446 - endOffset: 491 -- name: 'Role Definition: MLOps Architect as technical-business bridge' - startOffset: 491 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=491 - endOffset: 632 -- name: 'Architecture Advising: Tooling trade-offs and landscape navigation' - startOffset: 632 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=632 - endOffset: 770 -- name: 'Role Popularity: Uniqueness of the "MLOps Architect" title' - startOffset: 770 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=770 - endOffset: 830 -- name: 'Startup Reality: Wearing many hats in early-stage companies' - startOffset: 830 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=830 - endOffset: 935 -- name: 'Demonstrating Versatility: Convincing employers you can do it all' - startOffset: 935 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=935 - endOffset: 1101 -- name: 'Hiring Story: Cross-functional interview process at WhyLabs' - startOffset: 1101 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1101 - endOffset: 1324 -- name: 'Career Decision: Choosing startup risk for growth and learning' - startOffset: 1324 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1324 - endOffset: 1504 -- name: 'Prioritization Strategy: Focusing on production and model monitoring' - startOffset: 1504 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1504 - endOffset: 1655 -- name: 'Observability Scope: ETL, data pipelines, and upstream root causes' - startOffset: 1655 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1655 - endOffset: 1739 -- name: 'Customer Profiles: Production-first vs pre-deployment teams' - startOffset: 1739 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1739 - endOffset: 1839 -- name: 'Market Education: Shift from "why monitor" to "how to monitor"' - startOffset: 1839 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1839 - endOffset: 1910 -- name: 'Data Profiling Architecture: WhyLogs, profiles, and Apache Druid backend' - startOffset: 1910 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1910 - endOffset: 2065 -- name: 'Build vs Buy: Guiding customers on tooling and procurement decisions' - startOffset: 2065 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2065 - endOffset: 2207 -- name: 'Platform Agnostic Integrations: Serving and inference tooling realities' - startOffset: 2207 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2207 - endOffset: 2281 -- name: 'ONNX Adoption: Interoperability use cases and industry uptake' - startOffset: 2281 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2281 - endOffset: 2350 -- name: 'Tooling Trends: Cloud-native stacks, heterogeneity, and vendor lock-in' - startOffset: 2350 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2350 - endOffset: 2460 -- name: 'Research Focus: Fairness, bias, segmentation over explainability' - startOffset: 2460 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2460 - endOffset: 2587 -- name: 'Productivity Habits: Inbox zero, workspace windows, and task management' - startOffset: 2587 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2587 - endOffset: 2749 -- name: 'Career Strategy: Exploration vs exploitation and Thompson sampling analogy' - startOffset: 2749 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2749 - endOffset: 3023 -- name: 'Skillset Advice: Coding, communication, and being an effective Googler' - startOffset: 3023 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3023 - endOffset: 3350 -- name: 'WhyLogs vs WhyLabs: Open-source profiling vs SaaS observability' - startOffset: 3350 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3350 - endOffset: 3487 -- name: 'Closing Remarks: Final thoughts, contact info, and upcoming workshops' - startOffset: 3487 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3487 - endOffset: 3471 --- Links: diff --git a/_podcast/s14e07-from-mlops-to-dataops.md b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md similarity index 97% rename from _podcast/s14e07-from-mlops-to-dataops.md rename to _podcast/modern-data-pipelines-orchestration-ingestion-modeling.md index 5d662040..bb73f0a4 100644 --- a/_podcast/s14e07-from-mlops-to-dataops.md +++ b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md @@ -1,20 +1,115 @@ --- +title: 'Modern Data Pipeline Architecture: Ingestion, Orchestration, Transformation & MLOps Systems' +short: Modern Data Pipelines +season: 14 episode: 7 guests: - santonatuli +image: images/podcast/s14e07-from-mlops-to-dataops.jpg ids: anchor: ow/datatalksclub/episodes/From-MLOps-to-DataOps---Santona-Tuli-e25vb0q youtube: kSTfhQ_SZgc -image: images/podcast/s14e07-from-mlops-to-dataops.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/From-MLOps-to-DataOps---Santona-Tuli-e25vb0q apple: https://podcasts.apple.com/us/podcast/from-mlops-to-dataops-santona-tuli/id1541710331?i=1000618121008 spotify: https://open.spotify.com/episode/0inhE28kLI4T1AsSjgwnL8?si=WeFES7dXRxqSK_SKonBejw youtube: https://www.youtube.com/watch?v=kSTfhQ_SZgc -season: 14 -short: From MLOps to DataOps -title: 'Build Modern Data Pipelines: Ingestion, dbt Transformations, Airflow Orchestration - & MLOps' + +description: Master modern data pipelines with dbt transforms and Airflow orchestration—streamline ingestion, speed feature engineering and analytics delivery +intro: How do you build a modern data pipeline that reliably moves raw events through ingestion, dbt transformations, Airflow orchestration and into production ML and analytics? In this episode, Santona Tuli — a former CERN researcher turned ML and data engineering lead at Upsolver — walks through practical patterns and trade-offs for end-to-end pipelines. Drawing on experience from particle-physics event analysis to NLP and workflow authoring with Airflow, Santona explains where ingestion engines and declarative SQL frameworks fit, and when dbt belongs in the stack.

Topics include Upsolver vs dbt (pipeline authoring, execution engine and ingestion focus), differences between ML pipelines and analytics pipelines, MLOps vs DataOps, and dbt’s role in analytics engineering. We cover tooling (orchestrators, Spark, Kafka/Kinesis, feature stores, vector DBs), modern data stack choices like Snowflake and Databricks, lakehouse and staging patterns, and ingestion pre-processing needs such as deduplication, ordering guarantees and PII masking. You’ll also hear about transformation and data modeling (entities, foreign keys, business mappings), marts and dashboards, feature engineering and model serving, persona-driven pipeline design, and career-learning recommendations. Listen to gain concrete design guidance, tooling trade-offs, and resources to build scalable data and MLOps pipelines +topics: +- data engineering +- MLOps +- tools +dateadded: 2023-06-24 + +duration: PT00H59M43S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=0 + endOffset: 90 +- name: 'Career journey: CERN researcher → NLP, ML engineering, Python, Astronomer, + Upsolver' + startOffset: 90 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=90 + endOffset: 428 +- name: Transition to workflow authoring and orchestration (Airflow, Astronomer) + startOffset: 428 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=428 + endOffset: 648 +- name: 'Upsolver vs DBT: pipeline authoring, execution engine, and ingestion focus' + startOffset: 648 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=648 + endOffset: 805 +- name: Comparing ML pipelines and analytics data pipelines + startOffset: 805 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=805 + endOffset: 1124 +- name: 'MLOps vs DataOps: operationalizing models vs business data' + startOffset: 1124 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1124 + endOffset: 1497 +- name: Analytics engineering and DBT's role in the modern data workflow + startOffset: 1497 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1497 + endOffset: 1603 +- name: 'Tooling landscape: orchestrators, Spark, Kafka/Kinesis, feature stores, vector + DBs' + startOffset: 1603 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1603 + endOffset: 1756 +- name: 'Modern data stack choices: Upsolver, Snowflake, Databricks, build vs buy' + startOffset: 1756 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1756 + endOffset: 1977 +- name: Data staging and lakehouse patterns; managed ingestion hiding the stage + startOffset: 1977 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1977 + endOffset: 2230 +- name: 'Ingestion pre-processing: deduplication, ordering guarantees, PII masking' + startOffset: 2230 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2230 + endOffset: 2363 +- name: 'Transformation and data modeling: entities, foreign keys, and business mappings' + startOffset: 2363 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2363 + endOffset: 2585 +- name: Marts, dashboards and translating business questions into metrics + startOffset: 2585 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2585 + endOffset: 2697 +- name: 'ML pipeline specifics: feature engineering, model training, and serving' + startOffset: 2697 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2697 + endOffset: 2877 +- name: Translating academic data/physics skills to industry pipelines + startOffset: 2877 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2877 + endOffset: 3174 +- name: Persona-driven pipeline design and real use-case examples + startOffset: 3174 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3174 + endOffset: 3356 +- name: 'Career advice: value of being a generalist and closing skill gaps' + startOffset: 3356 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3356 + endOffset: 3409 +- name: 'Learning strategy: vetting sources, networking, and engineering blogs' + startOffset: 3409 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3409 + endOffset: 3556 +- name: 'Recommended resources: Fundamentals of Data Engineering, Airflow guides, + whitepapers' + startOffset: 3556 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3556 + endOffset: 3673 +- name: Episode Closing and links + startOffset: 3673 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3673 + endOffset: 3583 + transcript: - header: Episode Introduction - header: 'Career journey: CERN researcher → NLP, ML engineering, Python, Astronomer, @@ -1001,111 +1096,6 @@ transcript: sec: 3673 time: '1:01:13' who: Santona -description: Master modern data pipelines with dbt transforms and Airflow orchestration—streamline - ingestion, speed feature engineering and analytics delivery. -intro: How do you build a modern data pipeline that reliably moves raw events through - ingestion, dbt transformations, Airflow orchestration and into production ML and - analytics? In this episode, Santona Tuli — a former CERN researcher turned ML and - data engineering lead at Upsolver — walks through practical patterns and trade-offs - for end-to-end pipelines. Drawing on experience from particle-physics event analysis - to NLP and workflow authoring with Airflow, Santona explains where ingestion engines - and declarative SQL frameworks fit, and when dbt belongs in the stack.

- Topics include Upsolver vs dbt (pipeline authoring, execution engine and ingestion - focus), differences between ML pipelines and analytics pipelines, MLOps vs DataOps, - and dbt’s role in analytics engineering. We cover tooling (orchestrators, Spark, - Kafka/Kinesis, feature stores, vector DBs), modern data stack choices like Snowflake - and Databricks, lakehouse and staging patterns, and ingestion pre-processing needs - such as deduplication, ordering guarantees and PII masking. You’ll also hear about - transformation and data modeling (entities, foreign keys, business mappings), marts - and dashboards, feature engineering and model serving, persona-driven pipeline design, - and career-learning recommendations. Listen to gain concrete design guidance, tooling - trade-offs, and resources to build scalable data and MLOps pipelines. -dateadded: '2023-06-24' -duration: PT00H59M43S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=0 - endOffset: 90 -- name: 'Career journey: CERN researcher → NLP, ML engineering, Python, Astronomer, - Upsolver' - startOffset: 90 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=90 - endOffset: 428 -- name: Transition to workflow authoring and orchestration (Airflow, Astronomer) - startOffset: 428 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=428 - endOffset: 648 -- name: 'Upsolver vs DBT: pipeline authoring, execution engine, and ingestion focus' - startOffset: 648 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=648 - endOffset: 805 -- name: Comparing ML pipelines and analytics data pipelines - startOffset: 805 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=805 - endOffset: 1124 -- name: 'MLOps vs DataOps: operationalizing models vs business data' - startOffset: 1124 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1124 - endOffset: 1497 -- name: Analytics engineering and DBT's role in the modern data workflow - startOffset: 1497 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1497 - endOffset: 1603 -- name: 'Tooling landscape: orchestrators, Spark, Kafka/Kinesis, feature stores, vector - DBs' - startOffset: 1603 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1603 - endOffset: 1756 -- name: 'Modern data stack choices: Upsolver, Snowflake, Databricks, build vs buy' - startOffset: 1756 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1756 - endOffset: 1977 -- name: Data staging and lakehouse patterns; managed ingestion hiding the stage - startOffset: 1977 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1977 - endOffset: 2230 -- name: 'Ingestion pre-processing: deduplication, ordering guarantees, PII masking' - startOffset: 2230 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2230 - endOffset: 2363 -- name: 'Transformation and data modeling: entities, foreign keys, and business mappings' - startOffset: 2363 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2363 - endOffset: 2585 -- name: Marts, dashboards and translating business questions into metrics - startOffset: 2585 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2585 - endOffset: 2697 -- name: 'ML pipeline specifics: feature engineering, model training, and serving' - startOffset: 2697 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2697 - endOffset: 2877 -- name: Translating academic data/physics skills to industry pipelines - startOffset: 2877 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2877 - endOffset: 3174 -- name: Persona-driven pipeline design and real use-case examples - startOffset: 3174 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3174 - endOffset: 3356 -- name: 'Career advice: value of being a generalist and closing skill gaps' - startOffset: 3356 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3356 - endOffset: 3409 -- name: 'Learning strategy: vetting sources, networking, and engineering blogs' - startOffset: 3409 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3409 - endOffset: 3556 -- name: 'Recommended resources: Fundamentals of Data Engineering, Airflow guides, - whitepapers' - startOffset: 3556 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3556 - endOffset: 3673 -- name: Episode Closing and links - startOffset: 3673 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3673 - endOffset: 3583 --- Links: diff --git a/_podcast/s10e07-dataset-creation-and-curation.md b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md similarity index 97% rename from _podcast/s10e07-dataset-creation-and-curation.md rename to _podcast/nlp-dataset-creation-annotation-tools-workflows.md index c7523333..6ade50e1 100644 --- a/_podcast/s10e07-dataset-creation-and-curation.md +++ b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md @@ -1,20 +1,115 @@ --- +title: 'Practical Guide to Dataset Creation & Annotation for NLP: Active Learning, Weak Supervision, Tools' +short: Dataset Creation and Curation +season: 10 episode: 7 guests: - christiannswart +image: images/podcast/s10e07-dataset-creation-and-curation.jpg ids: anchor: Dataset-Creation-and-Curation---Christiaan-Swart-e1nd1f6 youtube: QggWydGrWoo -image: images/podcast/s10e07-dataset-creation-and-curation.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Dataset-Creation-and-Curation---Christiaan-Swart-e1nd1f6 apple: https://podcasts.apple.com/us/podcast/dataset-creation-and-curation-christiaan-swart/id1541710331?i=1000578975804 spotify: https://open.spotify.com/episode/26K8JrQXKwLpQelo4n4Kdi?si=e2ad35c4941446c4 youtube: https://www.youtube.com/watch?v=QggWydGrWoo -season: 10 -short: Dataset Creation and Curation -title: 'Practical Guide to Dataset Creation & Annotation for NLP: Active Learning, - Weak Supervision, Tools' + +description: 'Discover dataset creation, annotation & active learning: practical annotation UX, quality metrics, prototyping tips and tooling to accelerate NLP models.' +intro: How do you create high‑quality NLP datasets without breaking the budget? In this episode Christiaan Swart — an NLP practitioner with six years’ experience across email, complaints, pharma, and sales who cofounded Comtura (born from sales call transcription and CRM integration) — walks through practical methods for dataset creation and annotation.

We cover automated, manual, and hybrid pipelines; stakeholder alignment to de‑risk projects; in‑house vs. crowdsourcing trade‑offs; and building a living annotation guidebook for ambiguous cases. Chris explains model‑assisted annotation (pre‑labeling and interpretability layers), capturing expert knowledge, establishing human baselines, and improving annotation UX and productivity. You’ll also hear about annotation quality metrics (inter‑annotator agreement, throughput, fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, Label Studio, Snorkel, and Rubrics. Quick‑start tips using IPython widgets and Fast.ai, plus privacy and multilingual considerations (GDPR, anonymization), round out the conversation.

Listen to learn actionable strategies for cost‑effective dataset creation, annotation workflows, and tool choices that speed model development and produce reliable training data +topics: +- NLP +- data +dateadded: 2022-09-09 + +duration: PT01H03M40S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=0 + endOffset: 82 +- name: 'Episode Overview: Dataset creation, curation, and annotation' + startOffset: 82 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=82 + endOffset: 144 +- name: Guest Background & Career in NLP and bio‑NLP + startOffset: 144 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=144 + endOffset: 312 +- name: 'Comtura Origin: Sales call transcription and CRM integration' + startOffset: 312 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=312 + endOffset: 411 +- name: 'Dataset Creation Approaches: Automated, manual, and hybrid pipelines' + startOffset: 411 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=411 + endOffset: 542 +- name: 'Stakeholder Alignment: Top‑down framing to de‑risk projects' + startOffset: 542 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=542 + endOffset: 939 +- name: 'Annotation Strategy: In‑house vs. crowdsourcing trade‑offs' + startOffset: 939 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=939 + endOffset: 1116 +- name: 'Annotation Guidebook: Living documentation and ambiguous cases' + startOffset: 1116 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1116 + endOffset: 1257 +- name: 'Model‑Assisted Annotation: Pre‑labeling and interpretability layers' + startOffset: 1257 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1257 + endOffset: 1441 +- name: 'Expert Knowledge Capture: Mind maps and task translation for annotators' + startOffset: 1441 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1441 + endOffset: 1768 +- name: 'Human Baseline & Prototyping: Validating feasibility and business value' + startOffset: 1768 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1768 + endOffset: 2102 +- name: 'Annotation UX & Productivity: Hotkeys, interfaces, and iterative gains' + startOffset: 2102 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2102 + endOffset: 2262 +- name: 'Annotation Quality Metrics: Inter‑annotator agreement, throughput, fatigue' + startOffset: 2262 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2262 + endOffset: 2571 +- name: 'Active Learning in Practice: Expectations and typical gains' + startOffset: 2571 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2571 + endOffset: 2697 +- name: 'Distance Supervision & Weak Supervision: Labeling functions and Snorkel' + startOffset: 2697 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2697 + endOffset: 2904 +- name: 'Programmatic Heuristics: Entity/verb patterns and weak label design' + startOffset: 2904 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2904 + endOffset: 3037 +- name: 'Tooling Recommendations: Prodigy, Docanno, Label Studio, Snorkel, Rubrics' + startOffset: 3037 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3037 + endOffset: 3154 +- name: 'Portfolio Advice: Building career projects via dataset creation' + startOffset: 3154 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3154 + endOffset: 3438 +- name: 'Quick‑start Collection: IPython widgets and Fast.ai for beginners' + startOffset: 3438 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3438 + endOffset: 3506 +- name: 'Privacy & Multilingual NLP: GDPR, anonymization, and language challenges' + startOffset: 3506 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3506 + endOffset: 3820 +- name: 'Contact & Resources: Blog, company, and social links' + startOffset: 3820 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3820 + endOffset: 3820 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Dataset creation, curation, and annotation' @@ -1048,112 +1143,6 @@ transcript: sec: 3902 time: '1:05:02' who: Alexey -description: 'Discover dataset creation, annotation & active learning: practical annotation - UX, quality metrics, prototyping tips and tooling to accelerate NLP models.' -intro: How do you create high‑quality NLP datasets without breaking the budget? In - this episode Christiaan Swart — an NLP practitioner with six years’ experience across - email, complaints, pharma, and sales who cofounded Comtura (born from sales call - transcription and CRM integration) — walks through practical methods for dataset - creation and annotation.

We cover automated, manual, and hybrid pipelines; - stakeholder alignment to de‑risk projects; in‑house vs. crowdsourcing trade‑offs; - and building a living annotation guidebook for ambiguous cases. Chris explains model‑assisted - annotation (pre‑labeling and interpretability layers), capturing expert knowledge, - establishing human baselines, and improving annotation UX and productivity. You’ll - also hear about annotation quality metrics (inter‑annotator agreement, throughput, - fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling - functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, - Label Studio, Snorkel, and Rubrics. Quick‑start tips using IPython widgets and Fast.ai, - plus privacy and multilingual considerations (GDPR, anonymization), round out the - conversation.

Listen to learn actionable strategies for cost‑effective - dataset creation, annotation workflows, and tool choices that speed model development - and produce reliable training data. -dateadded: '2022-09-09' -duration: PT01H03M40S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=0 - endOffset: 82 -- name: 'Episode Overview: Dataset creation, curation, and annotation' - startOffset: 82 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=82 - endOffset: 144 -- name: Guest Background & Career in NLP and bio‑NLP - startOffset: 144 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=144 - endOffset: 312 -- name: 'Comtura Origin: Sales call transcription and CRM integration' - startOffset: 312 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=312 - endOffset: 411 -- name: 'Dataset Creation Approaches: Automated, manual, and hybrid pipelines' - startOffset: 411 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=411 - endOffset: 542 -- name: 'Stakeholder Alignment: Top‑down framing to de‑risk projects' - startOffset: 542 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=542 - endOffset: 939 -- name: 'Annotation Strategy: In‑house vs. crowdsourcing trade‑offs' - startOffset: 939 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=939 - endOffset: 1116 -- name: 'Annotation Guidebook: Living documentation and ambiguous cases' - startOffset: 1116 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1116 - endOffset: 1257 -- name: 'Model‑Assisted Annotation: Pre‑labeling and interpretability layers' - startOffset: 1257 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1257 - endOffset: 1441 -- name: 'Expert Knowledge Capture: Mind maps and task translation for annotators' - startOffset: 1441 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1441 - endOffset: 1768 -- name: 'Human Baseline & Prototyping: Validating feasibility and business value' - startOffset: 1768 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1768 - endOffset: 2102 -- name: 'Annotation UX & Productivity: Hotkeys, interfaces, and iterative gains' - startOffset: 2102 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2102 - endOffset: 2262 -- name: 'Annotation Quality Metrics: Inter‑annotator agreement, throughput, fatigue' - startOffset: 2262 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2262 - endOffset: 2571 -- name: 'Active Learning in Practice: Expectations and typical gains' - startOffset: 2571 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2571 - endOffset: 2697 -- name: 'Distance Supervision & Weak Supervision: Labeling functions and Snorkel' - startOffset: 2697 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2697 - endOffset: 2904 -- name: 'Programmatic Heuristics: Entity/verb patterns and weak label design' - startOffset: 2904 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2904 - endOffset: 3037 -- name: 'Tooling Recommendations: Prodigy, Docanno, Label Studio, Snorkel, Rubrics' - startOffset: 3037 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3037 - endOffset: 3154 -- name: 'Portfolio Advice: Building career projects via dataset creation' - startOffset: 3154 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3154 - endOffset: 3438 -- name: 'Quick‑start Collection: IPython widgets and Fast.ai for beginners' - startOffset: 3438 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3438 - endOffset: 3506 -- name: 'Privacy & Multilingual NLP: GDPR, anonymization, and language challenges' - startOffset: 3506 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3506 - endOffset: 3820 -- name: 'Contact & Resources: Blog, company, and social links' - startOffset: 3820 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3820 - endOffset: 3820 --- Links: diff --git a/_podcast/s06e08-nlp-teams.md b/_podcast/nlp-team-hiring-and-production-mlops.md similarity index 97% rename from _podcast/s06e08-nlp-teams.md rename to _podcast/nlp-team-hiring-and-production-mlops.md index 6c8b527f..3617a6e7 100644 --- a/_podcast/s06e08-nlp-teams.md +++ b/_podcast/nlp-team-hiring-and-production-mlops.md @@ -1,12 +1,11 @@ --- -title: 'Lead NLP Teams: Hiring, Production Pipelines, MLOps & LLM Tradeoffs (GPT-3, - spaCy)' +title: 'Lead NLP Teams: Hiring, Production Pipelines, MLOps & LLM Tradeoffs (GPT-3, spaCy)' short: Leading NLP Teams +season: 6 +episode: 8 guests: - ivanbilan image: images/podcast/s06e08-nlp-teams.jpg -season: 6 -episode: 8 ids: youtube: RJEf6mzxh1w anchor: Leading-NLP-Teams---Ivan-Bilan-e1c4929 @@ -15,6 +14,127 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Leading-NLP-Teams---Ivan-Bilan-e1c4929 spotify: https://open.spotify.com/episode/0jE1rpmLCYkD3GnUa2E7E3 apple: https://podcasts.apple.com/us/podcast/leading-nlp-teams-ivan-bilan/id1541710331?i=1000546053682 + +description: Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 & spaCy tactics to deploy, monitor and scale reliable LLM systems +intro: How do you structure an NLP team and build reliable production pipelines while weighing the tradeoffs between GPT‑3 and in‑house models? In this episode, Ivan Bilan, Engineering Manager at Personio working on Identity and Access Management, walks through practical answers from his transition from linguistics to production NLP and MLOps.

We cover hiring and team models (centralized vs cross‑disciplinary), what to look for in NLP engineers (tokenization, linguistics, deployment skills), and when to bring in linguists or conversational designers. Ivan breaks down the anatomy of an NLP production pipeline—data annotation, task engineering, testing, deployment, observability—and contrasts using GPT‑3 with building in‑house pipelines and open‑source tools like spaCy and Hugging Face for MVPs. He discusses inference optimization, privacy and bias risks with large language models, benchmarking limits, and practical microservice patterns for data‑intensive apps.

Listen to learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production +topics: +- NLP +- machine learning +- MLOps +- data teams +- LLMs +- leadership +- career growth +- production +dateadded: 2021-12-26 + +duration: PT00H59M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=0 + endOffset: 114 +- name: 'Episode Overview: Leading NLP Teams & Ivan''s Current Role' + startOffset: 114 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=114 + endOffset: 175 +- name: 'Personio Role: Identity and Access Management Responsibilities' + startOffset: 175 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=175 + endOffset: 279 +- name: 'Career Origins: From Linguistics to Computational NLP' + startOffset: 279 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=279 + endOffset: 442 +- name: 'Early Tech Stack: From Perl to Python and Web Scraping' + startOffset: 442 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=442 + endOffset: 522 +- name: 'Technical Management Study: CDTM, Internships, and Organizational Learning' + startOffset: 522 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=522 + endOffset: 714 +- name: 'Management Transition: From ML Teams to Web Product Engineering & Observability' + startOffset: 714 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=714 + endOffset: 847 +- name: 'Defining NLP Teams: Centralized vs Cross‑disciplinary Structures' + startOffset: 847 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=847 + endOffset: 1005 +- name: 'NLP Engineer Role: Skills, Linguistics Background, and Tokenization Expertise' + startOffset: 1005 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1005 + endOffset: 1156 +- name: 'Path to Becoming an NLP Engineer: Practical Resources, spaCy & Hugging Face' + startOffset: 1156 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1156 + endOffset: 1351 +- name: 'Vision vs Text: Comparing Computer Vision and NLP Challenges' + startOffset: 1351 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1351 + endOffset: 1476 +- name: 'NLP Engineer vs ML Engineer: Inference Optimization, Deployment & MLOps' + startOffset: 1476 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1476 + endOffset: 1579 +- name: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non‑coding Roles' + startOffset: 1579 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1579 + endOffset: 1718 +- name: 'Linguists in NLP: Parsing, Information Extraction & Multilingual Needs' + startOffset: 1718 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1718 + endOffset: 1811 +- name: 'When to Hire NLP Specialists: Task Complexity, Data Needs & Feature Engineering' + startOffset: 1811 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1811 + endOffset: 1941 +- name: 'Future of NLP: Library Ecosystem, AutoML & Research Velocity' + startOffset: 1941 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1941 + endOffset: 2097 +- name: 'NLP Pipeline Anatomy: Data Annotation, Task Engineering, Testing, Production + & Observability' + startOffset: 2097 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2097 + endOffset: 2335 +- name: 'Large Language Models & Prompting: GPT‑3 Capabilities and Simplification' + startOffset: 2335 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2335 + endOffset: 2585 +- name: 'GPT‑3 Limitations: Cost, Control, Bias & Privacy Risks' + startOffset: 2585 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2585 + endOffset: 2770 +- name: 'GPT‑3 vs In‑house Pipelines: MVP Strategy, Control & Open‑Source Alternatives' + startOffset: 2770 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2770 + endOffset: 2919 +- name: 'What NLP Really Is: Industry Productization vs Academic Linguistic Research' + startOffset: 2919 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2919 + endOffset: 3177 +- name: 'AI Benchmarking: Human‑level Claims, Dataset Limits & Real‑world Gaps' + startOffset: 3177 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3177 + endOffset: 3225 +- name: 'Machine Translation State: Google Translate, DeepL, Data Coverage & Language + Pairs' + startOffset: 3225 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3225 + endOffset: 3488 +- name: 'NLP Pandect & Related Projects: GitHub Resources for NLP, Microservices & + Engineering Managers' + startOffset: 3488 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3488 + endOffset: 3641 +- name: 'Contact & Resources: LinkedIn, Presentation Links and Further Reading' + startOffset: 3641 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3641 + endOffset: 3549 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Leading NLP Teams & Ivan''s Current Role' @@ -1083,128 +1203,6 @@ transcript: sec: 3663 time: '1:01:03' who: Ivan -description: Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 - & spaCy tactics to deploy, monitor and scale reliable LLM systems. -intro: How do you structure an NLP team and build reliable production pipelines while - weighing the tradeoffs between GPT‑3 and in‑house models? In this episode, Ivan Bilan, - Engineering Manager at Personio working on Identity and Access Management, walks - through practical answers from his transition from linguistics to production NLP - and MLOps.

We cover hiring and team models (centralized vs cross‑disciplinary), - what to look for in NLP engineers (tokenization, linguistics, deployment skills), - and when to bring in linguists or conversational designers. Ivan breaks down the - anatomy of an NLP production pipeline—data annotation, task engineering, testing, - deployment, observability—and contrasts using GPT‑3 with building in‑house pipelines - and open‑source tools like spaCy and Hugging Face for MVPs. He discusses inference - optimization, privacy and bias risks with large language models, benchmarking limits, - and practical microservice patterns for data‑intensive apps.

Listen to - learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing - between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production. -dateadded: '2021-12-26' -duration: PT00H59M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=0 - endOffset: 114 -- name: 'Episode Overview: Leading NLP Teams & Ivan''s Current Role' - startOffset: 114 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=114 - endOffset: 175 -- name: 'Personio Role: Identity and Access Management Responsibilities' - startOffset: 175 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=175 - endOffset: 279 -- name: 'Career Origins: From Linguistics to Computational NLP' - startOffset: 279 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=279 - endOffset: 442 -- name: 'Early Tech Stack: From Perl to Python and Web Scraping' - startOffset: 442 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=442 - endOffset: 522 -- name: 'Technical Management Study: CDTM, Internships, and Organizational Learning' - startOffset: 522 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=522 - endOffset: 714 -- name: 'Management Transition: From ML Teams to Web Product Engineering & Observability' - startOffset: 714 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=714 - endOffset: 847 -- name: 'Defining NLP Teams: Centralized vs Cross‑disciplinary Structures' - startOffset: 847 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=847 - endOffset: 1005 -- name: 'NLP Engineer Role: Skills, Linguistics Background, and Tokenization Expertise' - startOffset: 1005 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1005 - endOffset: 1156 -- name: 'Path to Becoming an NLP Engineer: Practical Resources, spaCy & Hugging Face' - startOffset: 1156 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1156 - endOffset: 1351 -- name: 'Vision vs Text: Comparing Computer Vision and NLP Challenges' - startOffset: 1351 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1351 - endOffset: 1476 -- name: 'NLP Engineer vs ML Engineer: Inference Optimization, Deployment & MLOps' - startOffset: 1476 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1476 - endOffset: 1579 -- name: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non‑coding Roles' - startOffset: 1579 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1579 - endOffset: 1718 -- name: 'Linguists in NLP: Parsing, Information Extraction & Multilingual Needs' - startOffset: 1718 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1718 - endOffset: 1811 -- name: 'When to Hire NLP Specialists: Task Complexity, Data Needs & Feature Engineering' - startOffset: 1811 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1811 - endOffset: 1941 -- name: 'Future of NLP: Library Ecosystem, AutoML & Research Velocity' - startOffset: 1941 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1941 - endOffset: 2097 -- name: 'NLP Pipeline Anatomy: Data Annotation, Task Engineering, Testing, Production - & Observability' - startOffset: 2097 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2097 - endOffset: 2335 -- name: 'Large Language Models & Prompting: GPT‑3 Capabilities and Simplification' - startOffset: 2335 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2335 - endOffset: 2585 -- name: 'GPT‑3 Limitations: Cost, Control, Bias & Privacy Risks' - startOffset: 2585 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2585 - endOffset: 2770 -- name: 'GPT‑3 vs In‑house Pipelines: MVP Strategy, Control & Open‑Source Alternatives' - startOffset: 2770 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2770 - endOffset: 2919 -- name: 'What NLP Really Is: Industry Productization vs Academic Linguistic Research' - startOffset: 2919 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2919 - endOffset: 3177 -- name: 'AI Benchmarking: Human‑level Claims, Dataset Limits & Real‑world Gaps' - startOffset: 3177 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3177 - endOffset: 3225 -- name: 'Machine Translation State: Google Translate, DeepL, Data Coverage & Language - Pairs' - startOffset: 3225 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3225 - endOffset: 3488 -- name: 'NLP Pandect & Related Projects: GitHub Resources for NLP, Microservices & - Engineering Managers' - startOffset: 3488 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3488 - endOffset: 3641 -- name: 'Contact & Resources: LinkedIn, Presentation Links and Further Reading' - startOffset: 3641 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3641 - endOffset: 3549 --- Links: diff --git a/_podcast/s02e03-open-source.md b/_podcast/open-source-ml-contributions.md similarity index 92% rename from _podcast/s02e03-open-source.md rename to _podcast/open-source-ml-contributions.md index 03a7ba43..e80de2c9 100644 --- a/_podcast/s02e03-open-source.md +++ b/_podcast/open-source-ml-contributions.md @@ -1,23 +1,11 @@ --- -title: 'Contribute to Open Source ML: scikit-learn Pipelines, PRs, Docs & Rasa Conversational - AI' +title: 'Contribute to Open Source ML: scikit-learn Pipelines, PRs, Docs & Rasa Conversational AI' short: Getting Started with Open Source -description: 'Learn open source contribution tactics for scikit-learn pipelines and - Rasa: make solid PRs, write docs & tests, boost your OSS skills and career visibility.' +season: 2 +episode: 3 guests: - vincentwarmerdam -tags: -- open-source -- python -- data-science -- career-development -- contributing -- scikit-learn -- machine-learning -category: Data Science Career image: images/podcast/s02e03-open-source.jpg -season: 2 -episode: 3 ids: youtube: IxV9EH-tphQ anchor: Getting-Started-with-Open-Source---Vincent-Warmerdam-epk60j @@ -26,24 +14,19 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Getting-Started-with-Open-Source---Vincent-Warmerdam-epk60j spotify: https://open.spotify.com/episode/1dsbDeVncfsEg3m3cYB927 apple: https://podcasts.apple.com/us/podcast/getting-started-with-open-source-vincent-warmerdam/id1541710331?i=1000507024598 -intro: 'How do you start contributing to open source ML projects like scikit-learn - pipelines—or move from curious user to confident contributor on Rasa’s conversational - AI stack? In this episode, Vincent Warmerdam, Research Advocate at Rasa and creator - of The Algorithm Whiteboard and calmcode.io, walks through practical, hands-on advice - for contributing to open source ML.

Vincent shares his career pivot from - design student to data scientist and highlights projects (evol, clumper, memo, whatlies, - scikit-lego) that illustrate small-tools-to-impact workflows. We deep-dive into - scikit-learn–compatible pipeline components, design principles for low-maintenance - APIs, and common mistakes such as publishing to PyPI too early. You’ll get a documentation - checklist (README, guides, API reference, examples), guidance on filing reproducible - issues, and step-by-step preparation for pull requests: testing, CI, packaging, - and pre-commit hooks.

Listeners will leave with concrete strategies for - finding the right project, balancing large vs. small repositories, community stewardship - and contribution etiquette, and ways OSS work can boost career visibility through - talks, blogs, and meetups. If you want actionable next steps for contributing to - open source ML, scikit-learn pipelines, PRs, docs, or Rasa conversational AI, this - episode maps the path.' -dateadded: '2021-02-23' + +description: 'Learn open source contribution tactics for scikit-learn pipelines and Rasa: make solid PRs, write docs & tests, boost your OSS skills and career visibility.' +intro: 'How do you start contributing to open source ML projects like scikit-learn pipelines—or move from curious user to confident contributor on Rasa’s conversational AI stack? In this episode, Vincent Warmerdam, Research Advocate at Rasa and creator of The Algorithm Whiteboard and calmcode.io, walks through practical, hands-on advice for contributing to open source ML.

Vincent shares his career pivot from design student to data scientist and highlights projects (evol, clumper, memo, whatlies, scikit-lego) that illustrate small-tools-to-impact workflows. We deep-dive into scikit-learn–compatible pipeline components, design principles for low-maintenance APIs, and common mistakes such as publishing to PyPI too early. You’ll get a documentation checklist (README, guides, API reference, examples), guidance on filing reproducible issues, and step-by-step preparation for pull requests: testing, CI, packaging, and pre-commit hooks.

Listeners will leave with concrete strategies for finding the right project, balancing large vs. small repositories, community stewardship and contribution etiquette, and ways OSS work can boost career visibility through talks, blogs, and meetups. If you want actionable next steps for contributing to open source ML, scikit-learn pipelines, PRs, docs, or Rasa conversational AI, this episode maps the path.' +topics: +- open-source +- data science +- career development +- contributing +- machine learning +- tools +dateadded: 2021-02-23 + + quotableClips: - name: Podcast Introduction and Episode Overview startOffset: 0 @@ -141,6 +124,16 @@ quotableClips: startOffset: 2280 url: https://www.youtube.com/watch?v=IxV9EH-tphQ&t=2280 endOffset: 2280 + +category: Data Science Career +tags: +- open-source +- python +- data-science +- career-development +- contributing +- scikit-learn +- machine-learning --- Today we're talking open source with our guest, **Vincent Warmerdam**. Vincent is a Research Advocate at Rasa. If you check his LinkedIn, you'll see a lot: he's made Reddit's front page, runs calmcode.io for learning to code, has organized PyData Amsterdam and AI Saturdays Amsterdam, and he's a data evangelist and open-source enthusiast who's created and maintains several open-source packages. And—last but not least—he has over 80 LinkedIn endorsements for "awesomeness." Welcome, Vincent! diff --git a/_podcast/s09e08-from-open-source-maintainer-to-founder.md b/_podcast/open-source-turned-into-career-and-startup-creation.md similarity index 97% rename from _podcast/s09e08-from-open-source-maintainer-to-founder.md rename to _podcast/open-source-turned-into-career-and-startup-creation.md index 444c9d07..87d03dc1 100644 --- a/_podcast/s09e08-from-open-source-maintainer-to-founder.md +++ b/_podcast/open-source-turned-into-career-and-startup-creation.md @@ -1,22 +1,126 @@ --- +title: "From Developer to Startup Founder: Building a Career Through Open Source" +short: From Open-Source Maintainer to Founder +season: 9 episode: 8 guests: - willmcgugan +image: images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg ids: anchor: From-Open-Source-Maintainer-to-Founder---Will-McGugan-e1kqtu5 youtube: bwfR9dyxf1M -image: images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Open-Source-Maintainer-to-Founder---Will-McGugan-e1kqtu5 apple: https://podcasts.apple.com/us/podcast/designing-a-data-science-organization-lisa-cohen/id1541710331?i=1000569172916 spotify: https://open.spotify.com/episode/4JAwU2jQuXu4MoMucsE899?si=6ed45b98dd4a415a youtube: https://www.youtube.com/watch?v=bwfR9dyxf1M -season: 9 -short: From Open-Source Maintainer to Founder -title: Build Rich Terminal UIs with Textual in Python and Turn Open Source into a - Startup + +description: Discover how to turn open source work into a sustainable career and even a startup. Learn about terminal apps, fundraising, community growth & hiring signals. +intro: "How do you turn open source work into a sustainable career and even a startup? In this episode Will McGugan — a Python open source maintainer and creator of PyFilesystem, Rich, and Textual — walks through his path from video game developer to founder of Textualize. We trace his early projects (BBCode parser, chess libraries), the design of PyFilesystem and S3 integrations, and how solving personal needs led to learning by building.

Will breaks down the technical and product journey: Rich’s terminal styling, tables, progress bars and observability features; the Textual framework for terminal GUIs; and the moment of founding Textualize after a viral tweet. He explains building in public, community growth via demos and social media, hiring through open source signals, and practical dev workflows with GitHub, PR reviews, and releases. He also outlines Textualize’s positioning and web hosting business model for terminal apps, plus contribution channels like Discourse and Discord.

Listen to learn concrete, repeatable steps for leveraging Python open source to find freelance freedom, attract users, raise pre-seed interest, and transition from developer to startup founder." +topics: +- open-source +- startups +- career growth +- entrepreneurship +dateadded: 2022-07-15 + +duration: PT00H58M41S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=0 + endOffset: 99 +- name: 'Guest Introduction: Will McGugan, Python Open Source Maintainer' + startOffset: 99 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=99 + endOffset: 127 +- name: 'Career Path: From Video Games to Textualize Founder' + startOffset: 127 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=127 + endOffset: 258 +- name: 'Early Open Source Tools: BBCode Parser & Chess Libraries' + startOffset: 258 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=258 + endOffset: 412 +- name: 'PyFilesystem: Virtual File System Abstraction (fs)' + startOffset: 412 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=412 + endOffset: 575 +- name: 'S3 Integration: S3Fs and Cloud File Interfaces (Pandas integration)' + startOffset: 575 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=575 + endOffset: 689 +- name: 'Project Origins: Solving Personal Needs & Learning by Building' + startOffset: 689 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=689 + endOffset: 907 +- name: 'Freelance Career: Contracting, Long-Term Contracts, and Independence' + startOffset: 907 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=907 + endOffset: 1068 +- name: 'Open Source as Creative Outlet: Freedom Beyond Client Work' + startOffset: 1068 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1068 + endOffset: 1166 +- name: 'Rich Library: Terminal Styling, Tables, Progress, and CLI UX' + startOffset: 1166 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1166 + endOffset: 1476 +- name: 'Rich for Observability: Log Formatting and Readability' + startOffset: 1476 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1476 + endOffset: 1599 +- name: 'Textual Framework: Building Terminal GUIs on Rich' + startOffset: 1599 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1599 + endOffset: 1688 +- name: 'Founding Textualize: Pre-Seed Fundraising After Tweeting' + startOffset: 1688 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1688 + endOffset: 1900 +- name: 'Building in Public: Social Media, Demos, and Community Growth' + startOffset: 1900 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1900 + endOffset: 2238 +- name: 'Team & Roadmap: Early Hires, Roles, and Product Vision' + startOffset: 2238 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2238 + endOffset: 2312 +- name: 'Business Model: Web Hosting for Terminal Apps & Generous Free Tier' + startOffset: 2312 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2312 + endOffset: 2493 +- name: 'Market Comparison: Streamlit Analogy and Positioning' + startOffset: 2493 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2493 + endOffset: 2678 +- name: 'Hiring Signals: Open Source Contributions as a Recruiter’s Lens' + startOffset: 2678 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2678 + endOffset: 2760 +- name: 'Development Workflow: GitHub Projects, PR Reviews, and Releases' + startOffset: 2760 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2760 + endOffset: 2977 +- name: 'Community Channels: Discourse, Discord, and How to Contribute' + startOffset: 2977 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2977 + endOffset: 3005 +- name: 'Project Promotion: Getting GitHub Stars and Viral Reach' + startOffset: 3005 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3005 + endOffset: 3440 +- name: 'Advice for New Open Source Authors: Solve Your Own Problem' + startOffset: 3440 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3440 + endOffset: 3571 +- name: 'Closing & Contact: Find Will on Twitter and Textualize links' + startOffset: 3571 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3571 + endOffset: 3521 + transcript: -- header: Podcast Introduction - header: 'Guest Introduction: Will McGugan, Python Open Source Maintainer' - line: This week, we'll talk about working on open source. We have a special guest today, Will. Will is a software engineer and author. He's quite an enthusiastic @@ -1274,120 +1378,6 @@ transcript: sec: 3620 time: '1:00:20' who: Alexey -description: Learn Textual, Rich, and open source strategies to build terminal UIs - in Python and turn projects into a startup - fundraising, community growth, hosting - tips. -intro: How do you build expressive terminal UIs in Python and turn open source work - into a sustainable company? In this episode, Will McGugan — a software engineer from - Edinburgh and creator of PyFilesystem, Rich, and Textual — walks through that exact - journey. We cover his career path from game development to founding Textualize, - the origins of projects born from solving personal problems, and early libraries - like BBCode parsers and chess tools.

Listen for deep technical discussion - about PyFilesystem and S3 integration (S3Fs and Pandas workflows), Rich’s capabilities - for terminal styling, tables, progress bars and observability-focused log formatting, - and how Textual layers on Rich to enable full terminal GUIs. Will also explains - the transition from open source maintainer to founder — pre-seed fundraising after - tweeting, building in public, community growth, hiring early team members, product - roadmap, and a hosting-based business model with a generous free tier (Streamlit-style - positioning). Practical takeaways include development workflow, recruiting via open - source contributions, community channels, and advice for new OSS authors. If you - build Python CLI tools or want to commercialize open source, this episode offers - concrete technical and business insights. -dateadded: '2022-07-15' -duration: PT00H58M41S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=0 - endOffset: 99 -- name: 'Guest Introduction: Will McGugan, Python Open Source Maintainer' - startOffset: 99 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=99 - endOffset: 127 -- name: 'Career Path: From Video Games to Textualize Founder' - startOffset: 127 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=127 - endOffset: 258 -- name: 'Early Open Source Tools: BBCode Parser & Chess Libraries' - startOffset: 258 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=258 - endOffset: 412 -- name: 'PyFilesystem: Virtual File System Abstraction (fs)' - startOffset: 412 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=412 - endOffset: 575 -- name: 'S3 Integration: S3Fs and Cloud File Interfaces (Pandas integration)' - startOffset: 575 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=575 - endOffset: 689 -- name: 'Project Origins: Solving Personal Needs & Learning by Building' - startOffset: 689 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=689 - endOffset: 907 -- name: 'Freelance Career: Contracting, Long-Term Contracts, and Independence' - startOffset: 907 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=907 - endOffset: 1068 -- name: 'Open Source as Creative Outlet: Freedom Beyond Client Work' - startOffset: 1068 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1068 - endOffset: 1166 -- name: 'Rich Library: Terminal Styling, Tables, Progress, and CLI UX' - startOffset: 1166 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1166 - endOffset: 1476 -- name: 'Rich for Observability: Log Formatting and Readability' - startOffset: 1476 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1476 - endOffset: 1599 -- name: 'Textual Framework: Building Terminal GUIs on Rich' - startOffset: 1599 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1599 - endOffset: 1688 -- name: 'Founding Textualize: Pre-Seed Fundraising After Tweeting' - startOffset: 1688 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1688 - endOffset: 1900 -- name: 'Building in Public: Social Media, Demos, and Community Growth' - startOffset: 1900 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1900 - endOffset: 2238 -- name: 'Team & Roadmap: Early Hires, Roles, and Product Vision' - startOffset: 2238 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2238 - endOffset: 2312 -- name: 'Business Model: Web Hosting for Terminal Apps & Generous Free Tier' - startOffset: 2312 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2312 - endOffset: 2493 -- name: 'Market Comparison: Streamlit Analogy and Positioning' - startOffset: 2493 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2493 - endOffset: 2678 -- name: 'Hiring Signals: Open Source Contributions as a Recruiter’s Lens' - startOffset: 2678 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2678 - endOffset: 2760 -- name: 'Development Workflow: GitHub Projects, PR Reviews, and Releases' - startOffset: 2760 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2760 - endOffset: 2977 -- name: 'Community Channels: Discourse, Discord, and How to Contribute' - startOffset: 2977 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2977 - endOffset: 3005 -- name: 'Project Promotion: Getting GitHub Stars and Viral Reach' - startOffset: 3005 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3005 - endOffset: 3440 -- name: 'Advice for New Open Source Authors: Solve Your Own Problem' - startOffset: 3440 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3440 - endOffset: 3571 -- name: 'Closing & Contact: Find Will on Twitter and Textualize links' - startOffset: 3571 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3571 - endOffset: 3521 --- Links: diff --git a/_podcast/s02e08-personal-branding.md b/_podcast/personal-brand-for-data-professionals.md similarity index 76% rename from _podcast/s02e08-personal-branding.md rename to _podcast/personal-brand-for-data-professionals.md index c3592b4f..9ae16329 100644 --- a/_podcast/s02e08-personal-branding.md +++ b/_podcast/personal-brand-for-data-professionals.md @@ -1,12 +1,11 @@ --- -title: 'Build a Personal Brand: Publish on LinkedIn/Medium, Grow Audience, Monetize - with Online Courses' +title: 'Build a Personal Brand: Publish on LinkedIn/Medium, Grow Audience, Monetize with Online Courses' short: Personal Branding +season: 2 +episode: 8 guests: - admondleekinlim image: images/podcast/s02e08-personal-branding.jpg -season: 2 -episode: 8 ids: youtube: tQRQnz_aHYQ anchor: Personal-Branding---Admond-Lee-Kin-Lim-ern77e @@ -15,25 +14,16 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Personal-Branding---Admond-Lee-Kin-Lim-ern77e spotify: https://open.spotify.com/episode/61Yv13MISTeP5nOVAZpY88 apple: https://podcasts.apple.com/us/podcast/personal-branding-admond-lee-kin-lim/id1541710331?i=1000511761026 -description: 'Build a personal brand: publish on LinkedIn & Medium, grow your audience, - and monetize with online courses—publishing best practices, course design, growth - tips.' -intro: How do you build a personal brand that actually attracts an audience and turns - into revenue? In this episode, Admond Lee Kin Lim — data scientist, writer, speaker, - and Data Science Instructor at Hackwagon Academy — breaks down a practical path - from first posts to monetizing with online courses. Drawing on his experience at - Micron and as an independent consultant and communicator featured in KDnuggets and - Medium, Admond defines personal brand purpose and positioning, then walks through - the first steps and mindset to start publishing on LinkedIn and Medium.

- You’ll get concrete guidance on formats and best practices for LinkedIn and Medium, - idea generation and content frequency, and the tools he uses (BuzzSumo, Feedly and - alternatives). We also cover when to add podcasting or audio, offline networking - (Lunchclub, 1x1s), conference speaking, and overcoming imposter syndrome. Finally, - Admond explains monetization strategies for selling online courses, course design - focused on student outcomes, aligning content with your values, balancing frequency - vs. quality, and metrics for iteration. Listen for actionable tips, tools, and resource - recommendations to grow your audience and monetize your expertise. -dateadded: '2021-03-05' + +description: 'Build a personal brand: publish on LinkedIn & Medium, grow your audience, and monetize with online courses—publishing best practices, course design, growth tips.' +intro: How do you build a personal brand that actually attracts an audience and turns into revenue? In this episode, Admond Lee Kin Lim — data scientist, writer, speaker, and Data Science Instructor at Hackwagon Academy — breaks down a practical path from first posts to monetizing with online courses. Drawing on his experience at Micron and as an independent consultant and communicator featured in KDnuggets and Medium, Admond defines personal brand purpose and positioning, then walks through the first steps and mindset to start publishing on LinkedIn and Medium.

You’ll get concrete guidance on formats and best practices for LinkedIn and Medium, idea generation and content frequency, and the tools he uses (BuzzSumo, Feedly and alternatives). We also cover when to add podcasting or audio, offline networking (Lunchclub, 1x1s), conference speaking, and overcoming imposter syndrome. Finally, Admond explains monetization strategies for selling online courses, course design focused on student outcomes, aligning content with your values, balancing frequency vs. quality, and metrics for iteration. Listen for actionable tips, tools, and resource recommendations to grow your audience and monetize your expertise +topics: +- personal brand +- career growth +- monetization +dateadded: 2021-03-05 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -111,6 +101,7 @@ quotableClips: startOffset: 3030 url: https://www.youtube.com/watch?v=tQRQnz_aHYQ&t=3030 endOffset: 3030 + --- We talked about: diff --git a/_podcast/s06e06-from-academia-to-industry.md b/_podcast/postdoc-to-data-science-lead-career-transition.md similarity index 98% rename from _podcast/s06e06-from-academia-to-industry.md rename to _podcast/postdoc-to-data-science-lead-career-transition.md index 66e2fdc4..d8cd95bb 100644 --- a/_podcast/s06e06-from-academia-to-industry.md +++ b/_podcast/postdoc-to-data-science-lead-career-transition.md @@ -1,12 +1,11 @@ --- -title: 'From Postdoc to Data Science Lead: ML Foundations, Docker Deployment & Hiring - Tips' +title: 'From Postdoc to Data Science Lead: ML Foundations, Docker Deployment & Hiring Tips' short: Moving from Academia to Industry +season: 6 +episode: 6 guests: - cjjenkins image: images/podcast/s06e06-from-academia-to-industry.jpg -season: 6 -episode: 6 ids: youtube: m4F651BpUFk anchor: Moving-from-Academia-to-Industry---CJ-Jenkins-e1bh84o @@ -15,6 +14,136 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Moving-from-Academia-to-Industry---CJ-Jenkins-e1bh84o spotify: https://open.spotify.com/episode/5Jvo53ibSoX6rfkfdGq5pJ apple: https://podcasts.apple.com/us/podcast/moving-from-academia-to-industry-cj-jenkins/id1541710331?i=1000544589971 + +description: 'Learn a one-year roadmap from postdoc to data science lead: machine learning foundations, Docker deployment, resume and hiring tips to land jobs.' +intro: 'How do you go from a postdoc to a data science lead while mastering machine learning foundations and deployment? In this episode, CJ Jenkins — a PhD-turned-data science lead working on credit risk modeling, with published research and a textbook used in academia — walks through that transition. We trace CJ’s roots in evolutionary biology and genomics, the statistical ML foundations (GLMs, population dynamics), and practical tools like Bash, R, Python, and SQL. Key topics include Docker deployment and bridging the gap between research and production, hiring signals and interview assessment techniques that prioritize learning agility and humility, and concrete career tactics: a one-year Coursera sprint (Johns Hopkins, Andrew Ng), resume rewrites (14 CV iterations), LinkedIn keyword strategy, and selective application versus volume. CJ also discusses location and networking strategies (Berlin, Stockholm, Klarna onboarding), technical expectations for juniors, code quality, and building psychological safety on teams. Listen to learn actionable steps for skills-first resumes, interview preparation, deployment basics, and how to translate academic output into industry impact. Find CJ on LinkedIn for follow-up questions.' +topics: +- career transition +- machine learning +- academia +- career growth +dateadded: 2021-12-11 + +duration: PT00H58M44S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=0 + endOffset: 58 +- name: 'Career Journey: Postdoc to Data Science Lead' + startOffset: 58 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=58 + endOffset: 88 +- name: 'Evolutionary Biology: Statistics & Population Dynamics' + startOffset: 88 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=88 + endOffset: 196 +- name: 'Academic Research as Data Science Practice: Genomics & Bash' + startOffset: 196 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=196 + endOffset: 285 +- name: 'Statistical Machine Learning: GLMs and Foundations' + startOffset: 285 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=285 + endOffset: 370 +- name: 'Bridging Gaps: Deployment, Docker, and Python Learning' + startOffset: 370 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=370 + endOffset: 521 +- name: 'Hiring Signals: Smartness, Ambition, and Receptiveness to Feedback' + startOffset: 521 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=521 + endOffset: 642 +- name: 'Interview Assessment: Testing Learning Agility and Humility' + startOffset: 642 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=642 + endOffset: 719 +- name: 'First Tech Interview: Referral, Case Study in R, and Honesty' + startOffset: 719 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=719 + endOffset: 936 +- name: 'Transition Timeline: One-Year Plan and Coursera Sprint' + startOffset: 936 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=936 + endOffset: 1034 +- name: 'Resume Strategy: Skills-First Rewriting and LinkedIn Keywords' + startOffset: 1034 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1034 + endOffset: 1240 +- name: 'Refining Applications: 14 CV Iterations, Recruiter Tips, and ATS' + startOffset: 1240 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1240 + endOffset: 1366 +- name: 'Learning Resources: John Hopkins Specialization and Andrew Ng' + startOffset: 1366 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1366 + endOffset: 1537 +- name: 'Location Strategy: Choosing Berlin and Targeting Companies' + startOffset: 1537 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1537 + endOffset: 1716 +- name: 'Application Strategy: Selective Research vs. Volume Applications' + startOffset: 1716 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1716 + endOffset: 1860 +- name: 'Job Move: Klarna Experience and Onboarding Challenges' + startOffset: 1860 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1860 + endOffset: 1968 +- name: 'Internal Mobility: Relocating to Stockholm Within the Company' + startOffset: 1968 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1968 + endOffset: 2028 +- name: 'Market Entry: Networking, Meetups, and Community Engagement' + startOffset: 2028 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2028 + endOffset: 2203 +- name: 'Technical Expectations: Clean Code and Coding Proficiency for Juniors' + startOffset: 2203 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2203 + endOffset: 2259 +- name: 'Skill Building: Pair Programming, LeetCode, and Code Reviews' + startOffset: 2259 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2259 + endOffset: 2402 +- name: 'Research vs Industry: Publications, Portfolios, and Relevance' + startOffset: 2402 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2402 + endOffset: 2472 +- name: 'Real-World Data Work: Cleaning, Bash, and Domain Translation' + startOffset: 2472 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2472 + endOffset: 2624 +- name: 'Communication Shift: Simplifying Explanations and Office Culture' + startOffset: 2624 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2624 + endOffset: 2838 +- name: 'Team Dynamics: Open Offices, Proximity, and Social Bonding' + startOffset: 2838 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2838 + endOffset: 2930 +- name: 'Counterproductive Habits: Competitiveness vs. Collaboration' + startOffset: 2930 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2930 + endOffset: 3065 +- name: 'Psychological Safety: Team Rituals, Sharing Failures, and Trust' + startOffset: 3065 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3065 + endOffset: 3165 +- name: 'Long-Term Learning: NLP, Kaggle as a Learning Resource' + startOffset: 3165 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3165 + endOffset: 3328 +- name: 'Academic Output: Writing a Textbook on Parasitology' + startOffset: 3328 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3328 + endOffset: 3525 +- name: Closing Remarks and Contact Info (Find CJ on LinkedIn) + startOffset: 3525 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3525 + endOffset: 3524 + transcript: - header: Podcast Introduction - header: 'Career Journey: Postdoc to Data Science Lead' @@ -1223,142 +1352,6 @@ transcript: sec: 3582 time: '59:42' who: Alexey -description: 'Learn a one-year roadmap from postdoc to data science lead: machine - learning foundations, Docker deployment, resume and hiring tips to land jobs.' -intro: 'How do you go from a postdoc to a data science lead while mastering machine - learning foundations and deployment? In this episode, CJ Jenkins — a PhD-turned-data - science lead working on credit risk modeling, with published research and a textbook - used in academia — walks through that transition. We trace CJ’s roots in evolutionary - biology and genomics, the statistical ML foundations (GLMs, population dynamics), - and practical tools like Bash, R, Python, and SQL. Key topics include Docker deployment - and bridging the gap between research and production, hiring signals and interview - assessment techniques that prioritize learning agility and humility, and concrete - career tactics: a one-year Coursera sprint (Johns Hopkins, Andrew Ng), resume rewrites - (14 CV iterations), LinkedIn keyword strategy, and selective application versus - volume. CJ also discusses location and networking strategies (Berlin, Stockholm, - Klarna onboarding), technical expectations for juniors, code quality, and building - psychological safety on teams. Listen to learn actionable steps for skills-first - resumes, interview preparation, deployment basics, and how to translate academic - output into industry impact. Find CJ on LinkedIn for follow-up questions.' -dateadded: '2021-12-11' -duration: PT00H58M44S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=0 - endOffset: 58 -- name: 'Career Journey: Postdoc to Data Science Lead' - startOffset: 58 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=58 - endOffset: 88 -- name: 'Evolutionary Biology: Statistics & Population Dynamics' - startOffset: 88 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=88 - endOffset: 196 -- name: 'Academic Research as Data Science Practice: Genomics & Bash' - startOffset: 196 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=196 - endOffset: 285 -- name: 'Statistical Machine Learning: GLMs and Foundations' - startOffset: 285 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=285 - endOffset: 370 -- name: 'Bridging Gaps: Deployment, Docker, and Python Learning' - startOffset: 370 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=370 - endOffset: 521 -- name: 'Hiring Signals: Smartness, Ambition, and Receptiveness to Feedback' - startOffset: 521 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=521 - endOffset: 642 -- name: 'Interview Assessment: Testing Learning Agility and Humility' - startOffset: 642 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=642 - endOffset: 719 -- name: 'First Tech Interview: Referral, Case Study in R, and Honesty' - startOffset: 719 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=719 - endOffset: 936 -- name: 'Transition Timeline: One-Year Plan and Coursera Sprint' - startOffset: 936 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=936 - endOffset: 1034 -- name: 'Resume Strategy: Skills-First Rewriting and LinkedIn Keywords' - startOffset: 1034 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1034 - endOffset: 1240 -- name: 'Refining Applications: 14 CV Iterations, Recruiter Tips, and ATS' - startOffset: 1240 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1240 - endOffset: 1366 -- name: 'Learning Resources: John Hopkins Specialization and Andrew Ng' - startOffset: 1366 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1366 - endOffset: 1537 -- name: 'Location Strategy: Choosing Berlin and Targeting Companies' - startOffset: 1537 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1537 - endOffset: 1716 -- name: 'Application Strategy: Selective Research vs. Volume Applications' - startOffset: 1716 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1716 - endOffset: 1860 -- name: 'Job Move: Klarna Experience and Onboarding Challenges' - startOffset: 1860 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1860 - endOffset: 1968 -- name: 'Internal Mobility: Relocating to Stockholm Within the Company' - startOffset: 1968 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1968 - endOffset: 2028 -- name: 'Market Entry: Networking, Meetups, and Community Engagement' - startOffset: 2028 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2028 - endOffset: 2203 -- name: 'Technical Expectations: Clean Code and Coding Proficiency for Juniors' - startOffset: 2203 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2203 - endOffset: 2259 -- name: 'Skill Building: Pair Programming, LeetCode, and Code Reviews' - startOffset: 2259 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2259 - endOffset: 2402 -- name: 'Research vs Industry: Publications, Portfolios, and Relevance' - startOffset: 2402 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2402 - endOffset: 2472 -- name: 'Real-World Data Work: Cleaning, Bash, and Domain Translation' - startOffset: 2472 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2472 - endOffset: 2624 -- name: 'Communication Shift: Simplifying Explanations and Office Culture' - startOffset: 2624 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2624 - endOffset: 2838 -- name: 'Team Dynamics: Open Offices, Proximity, and Social Bonding' - startOffset: 2838 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2838 - endOffset: 2930 -- name: 'Counterproductive Habits: Competitiveness vs. Collaboration' - startOffset: 2930 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2930 - endOffset: 3065 -- name: 'Psychological Safety: Team Rituals, Sharing Failures, and Trust' - startOffset: 3065 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3065 - endOffset: 3165 -- name: 'Long-Term Learning: NLP, Kaggle as a Learning Resource' - startOffset: 3165 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3165 - endOffset: 3328 -- name: 'Academic Output: Writing a Textbook on Parasitology' - startOffset: 3328 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3328 - endOffset: 3525 -- name: Closing Remarks and Contact Info (Find CJ on LinkedIn) - startOffset: 3525 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3525 - endOffset: 3524 --- Links: diff --git a/_podcast/s15e04-good-bad-and-ugly-of-gpt.md b/_podcast/practical-llm-use-cases-and-product-patterns.md similarity index 97% rename from _podcast/s15e04-good-bad-and-ugly-of-gpt.md rename to _podcast/practical-llm-use-cases-and-product-patterns.md index ce8a0e76..c42ded32 100644 --- a/_podcast/s15e04-good-bad-and-ugly-of-gpt.md +++ b/_podcast/practical-llm-use-cases-and-product-patterns.md @@ -1,19 +1,117 @@ --- +title: 'LLM Value Creation: GPT Communities, Business Use Cases & Human-in-the-Loop AI Applications' +short: The Good, the Bad and the Ugly of GPT +season: 15 episode: 4 guests: - sandrakublik +image: images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg ids: anchor: atatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 youtube: bM6AR4A-f98 -image: images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 apple: https://podcasts.apple.com/us/podcast/the-good-the-bad-and-the-ugly-of-gpt-sandra-kublik/id1541710331?i=1000623464507 spotify: https://open.spotify.com/episode/5fZ89re1YLiVZ7QNxdoKVH?si=pD96Dv_tRvaHci5N8PZv9g youtube: https://www.youtube.com/watch?v=bM6AR4A-f98 -season: 15 -short: The Good, the Bad and the Ugly of GPT -title: 'Build Secure LLM Apps: GPT, Prompt Engineering, Embeddings & Semantic Search' + +description: "Create real business value with LLMs: from early GPT communities to production applications that actually work." +intro: "How do you create real business value with LLMs — from early GPT communities to production applications that actually work? In this episode, Sandra Kublik — AI entrepreneur, community builder, and author on GPT — shares a practical, entrepreneurial perspective on building LLM-powered products that deliver results.

Sandra traces her journey through the early GPT community (Nextgrid, Lablab.AI, YouTube) and breaks down proven business use cases: text generation workflows, semantic search with embeddings, and domain-specific AI assistants. You'll get actionable guidance on product patterns, prompt engineering techniques, and the critical human-in-the-loop requirements for reliable AI applications. The conversation covers real-world trade-offs between proprietary and open-source models, security considerations for enterprise deployment, and practical strategies to mitigate hallucinations while maintaining brand safety.

Listen for concrete frameworks to evaluate LLM integration opportunities, a 7-day experiment to test LLMs in your workflow, and proven patterns for scaling AI applications from prototype to production. Sandra shares resources on YouTube, X, and LinkedIn for continued learning and implementation examples." +topics: +- LLMs +dateadded: 2023-08-06 +duration: PT01H09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=0 + endOffset: 75 +- name: 'Guest Introduction: Sandra Kublik, AI entrepreneur and GPT-3 author' + startOffset: 75 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=75 + endOffset: 146 +- name: 'LLM Landscape: Why GPT and large language models are everywhere' + startOffset: 146 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=146 + endOffset: 185 +- name: 'Career Journey: Nextgrid, Lablab.AI and YouTube entry into AI' + startOffset: 185 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=185 + endOffset: 488 +- name: 'Early GPT Community: Gaining access and demo-driven growth' + startOffset: 488 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=488 + endOffset: 600 +- name: 'GPT & LLM Business Use Cases: Text generation, embeddings, and semantic search' + startOffset: 600 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=600 + endOffset: 953 +- name: 'Cohere Focus: Community building and LLM education' + startOffset: 953 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=953 + endOffset: 1002 +- name: 'Market Adoption: Startups, VC interest, and generative AI trends' + startOffset: 1002 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1002 + endOffset: 1162 +- name: 'LLMs as Amplifiers: Impact on authenticity and content scaling' + startOffset: 1162 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1162 + endOffset: 1409 +- name: 'Human-in-the-Loop: Hallucinations, brand safety, and editorial curation' + startOffset: 1409 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1409 + endOffset: 1676 +- name: 'Specialist Assistants: Secure, domain-specific chatbots for professionals' + startOffset: 1676 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1676 + endOffset: 1948 +- name: 'Building LLM Apps: Model choice, architecture, and integration trade-offs' + startOffset: 1948 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1948 + endOffset: 2128 +- name: 'Proprietary vs Open Source: Cost, latency, IP and data risk considerations' + startOffset: 2128 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2128 + endOffset: 2241 +- name: 'Prompt Engineering: Iteration, examples, and prompt whisperer techniques' + startOffset: 2241 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2241 + endOffset: 2421 +- name: 'Fine-Tuning & Embeddings: Domain adaptation and semantic retrieval' + startOffset: 2421 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2421 + endOffset: 2672 +- name: 'Prompt Tips: Providing examples, context, and SEO-focused instructions' + startOffset: 2672 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2672 + endOffset: 3061 +- name: '7-Day LLM Experiment: Integrating language models into daily workflow' + startOffset: 3061 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3061 + endOffset: 3363 +- name: 'Productivity Tools: Email assistants and content automation extensions' + startOffset: 3363 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3363 + endOffset: 3484 +- name: 'Learning Resources: LLM University, Cohere blog, and recommended readings' + startOffset: 3484 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3484 + endOffset: 3630 +- name: 'Contact & Social: Where to find Sandra online (YouTube, X, LinkedIn)' + startOffset: 3630 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3630 + endOffset: 3664 +- name: Episode Wrap-Up and Next Steps + startOffset: 3664 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3664 + endOffset: 3684 +- name: Closing Remarks + startOffset: 3684 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3684 + endOffset: 3609 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Sandra Kublik, AI entrepreneur and GPT-3 author' @@ -900,114 +998,6 @@ transcript: sec: 3684 time: '1:01:24' who: Sandra -description: 'Build secure LLM apps with GPT: master prompt engineering and embeddings - to cut hallucinations, protect data, scale workflows, and boost content ROI.' -intro: 'How do you build secure LLM apps that use GPT, embeddings and semantic search - while avoiding hallucinations and data risk? In this episode, Sandra Kublik — AI - entrepreneur, community builder, and author on GPT — walks through practical trade-offs - for building production LLM systems.

Sandra traces the LLM landscape and - her career (Nextgrid, Lablab.AI, YouTube), then digs into real-world use cases like - text generation, semantic retrieval with embeddings, and domain-specific chatbots. - You’ll hear guidance on model choice, architecture, proprietary vs open source trade-offs - (cost, latency, IP and data risk), and concrete prompt engineering techniques including - examples, iteration strategies, and “prompt whisperer” tips. The conversation covers - security and quality: human-in-the-loop workflows to mitigate hallucinations, brand - safety, and editorial curation, plus fine-tuning and semantic search strategies - for domain adaptation.

Listeners get a practical value proposition: frameworks - to evaluate LLM security and integration trade-offs, a 7-day experiment to embed - LLMs into your workflow, and pointers to productivity tools and learning resources. - Find Sandra on YouTube, X, and LinkedIn for follow-up resources and examples.' -dateadded: '2023-08-06' -duration: PT01H09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=0 - endOffset: 75 -- name: 'Guest Introduction: Sandra Kublik, AI entrepreneur and GPT-3 author' - startOffset: 75 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=75 - endOffset: 146 -- name: 'LLM Landscape: Why GPT and large language models are everywhere' - startOffset: 146 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=146 - endOffset: 185 -- name: 'Career Journey: Nextgrid, Lablab.AI and YouTube entry into AI' - startOffset: 185 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=185 - endOffset: 488 -- name: 'Early GPT Community: Gaining access and demo-driven growth' - startOffset: 488 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=488 - endOffset: 600 -- name: 'GPT & LLM Business Use Cases: Text generation, embeddings, and semantic search' - startOffset: 600 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=600 - endOffset: 953 -- name: 'Cohere Focus: Community building and LLM education' - startOffset: 953 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=953 - endOffset: 1002 -- name: 'Market Adoption: Startups, VC interest, and generative AI trends' - startOffset: 1002 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1002 - endOffset: 1162 -- name: 'LLMs as Amplifiers: Impact on authenticity and content scaling' - startOffset: 1162 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1162 - endOffset: 1409 -- name: 'Human-in-the-Loop: Hallucinations, brand safety, and editorial curation' - startOffset: 1409 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1409 - endOffset: 1676 -- name: 'Specialist Assistants: Secure, domain-specific chatbots for professionals' - startOffset: 1676 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1676 - endOffset: 1948 -- name: 'Building LLM Apps: Model choice, architecture, and integration trade-offs' - startOffset: 1948 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1948 - endOffset: 2128 -- name: 'Proprietary vs Open Source: Cost, latency, IP and data risk considerations' - startOffset: 2128 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2128 - endOffset: 2241 -- name: 'Prompt Engineering: Iteration, examples, and prompt whisperer techniques' - startOffset: 2241 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2241 - endOffset: 2421 -- name: 'Fine-Tuning & Embeddings: Domain adaptation and semantic retrieval' - startOffset: 2421 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2421 - endOffset: 2672 -- name: 'Prompt Tips: Providing examples, context, and SEO-focused instructions' - startOffset: 2672 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2672 - endOffset: 3061 -- name: '7-Day LLM Experiment: Integrating language models into daily workflow' - startOffset: 3061 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3061 - endOffset: 3363 -- name: 'Productivity Tools: Email assistants and content automation extensions' - startOffset: 3363 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3363 - endOffset: 3484 -- name: 'Learning Resources: LLM University, Cohere blog, and recommended readings' - startOffset: 3484 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3484 - endOffset: 3630 -- name: 'Contact & Social: Where to find Sandra online (YouTube, X, LinkedIn)' - startOffset: 3630 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3630 - endOffset: 3664 -- name: Episode Wrap-Up and Next Steps - startOffset: 3664 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3664 - endOffset: 3684 -- name: Closing Remarks - startOffset: 3684 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3684 - endOffset: 3609 --- Links: diff --git a/_podcast/s15e07-pragmatic-and-standardized-mlops.md b/_podcast/pragmatic-and-standardized-mlops.md similarity index 97% rename from _podcast/s15e07-pragmatic-and-standardized-mlops.md rename to _podcast/pragmatic-and-standardized-mlops.md index b0a00eea..e17a9b6a 100644 --- a/_podcast/s15e07-pragmatic-and-standardized-mlops.md +++ b/_podcast/pragmatic-and-standardized-mlops.md @@ -1,20 +1,157 @@ --- +title: 'Pragmatic MLOps: Build Standardized CI/CD, Model Registries, Monitoring & Org Best Practices' +short: Pragmatic and Standardized MLOps +season: 15 episode: 7 guests: - mariavechtomova +image: images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg ids: anchor: lub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv youtube: q3DTR3Od1MA -image: images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv apple: https://podcasts.apple.com/us/podcast/pragmatic-and-standardized-mlops-maria-vechtomova/id1541710331?i=1000627227242 spotify: https://open.spotify.com/episode/5UZPZTDllam3RrbI9sOyqS?si=Ghm1oD8bSFS6l0ULDlatpQ youtube: https://www.youtube.com/watch?v=q3DTR3Od1MA -season: 15 -short: Pragmatic and Standardized MLOps -title: 'Pragmatic MLOps: Build Standardized CI/CD, Model Registries, Monitoring & - Org Best Practices' + +description: 'Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring to boost reproducibility, deployment reliability, and team productivity.' +intro: 'How do you build pragmatic, standardized MLOps across teams without chasing every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager with roots in econometrics and early work moving from R to Python — tackles MLOps as an organizational challenge, not just a technology problem.

Maria walks through core, actionable topics: building reusable CI/CD and standardized repos, choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines central MLOps responsibilities — infrastructure, registries, deployment patterns, and monitoring — and contrasts centralized platform teams with embedded feature teams and guardrails. You’ll hear practical advice on moving logic out of notebooks into packages and pipelines, conducting maturity assessments (reproducibility, testing, documentation), and securing DevOps buy‑in. The conversation also covers monitoring standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus retail use cases like demand forecasting and personalization.

Listen to learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring — and how to prioritize organizational change to make MLOps work in production.' +topics: +- MLOps +dateadded: 2023-09-25 + +duration: PT00H57M05S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=0 + endOffset: 101 +- name: 'Episode Overview: Pragmatic and Standardized MLOps with Maria Vechtomova' + startOffset: 101 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=101 + endOffset: 187 +- name: 'Background: Early career in data, econometrics, R to Python, and early MLOps + work' + startOffset: 187 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=187 + endOffset: 345 +- name: 'Early MLOps stacks: Teradata Aster, custom metadata, and orchestration' + startOffset: 345 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=345 + endOffset: 483 +- name: 'Role Overview: MLOps Tech Lead / Manager of Machine Learning Engineering' + startOffset: 483 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=483 + endOffset: 585 +- name: 'Marvelous MLOps: blog, LinkedIn presence, and content cadence' + startOffset: 585 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=585 + endOffset: 670 +- name: 'Defining MLOps: enablement, reproducibility, and teaching data scientists' + startOffset: 670 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=670 + endOffset: 762 +- name: 'Central MLOps team responsibilities: infrastructure, reusable CI/CD, and + monitoring' + startOffset: 762 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=762 + endOffset: 885 +- name: 'Toollandscape overload: MAD landscape, FOMO, and organizational challenges' + startOffset: 885 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=885 + endOffset: 987 +- name: 'Pragmatic MLOps: leverage existing infra (Kubernetes, Git, CI/CD) not new + tools' + startOffset: 987 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=987 + endOffset: 1121 +- name: 'Essential MLOps stack: version control, CI/CD, registries, model registry, + deployment, monitoring' + startOffset: 1121 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1121 + endOffset: 1249 +- name: 'Model artifacts & registry options: Artifactory, S3, and MLflow alternatives' + startOffset: 1249 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1249 + endOffset: 1343 +- name: 'MLOps maturity assessment: documentation, reproducibility, code quality, + and testing' + startOffset: 1343 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1343 + endOffset: 1441 +- name: 'Startup priorities: reproducibility, versioning, traceability as first steps' + startOffset: 1441 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1441 + endOffset: 1626 +- name: 'Team organization: centralized MLOps vs. embedded feature teams and guardrails' + startOffset: 1626 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1626 + endOffset: 1795 +- name: 'Standardization: cookie‑cutter repos, service principals, and Databricks + integration' + startOffset: 1795 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1795 + endOffset: 2004 +- name: 'Production best practices: move logic from notebooks to packages and CI/CD + pipelines' + startOffset: 2004 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2004 + endOffset: 2069 +- name: 'Implementation timeline: technical build vs. organizational buy‑in and permissions' + startOffset: 2069 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2069 + endOffset: 2121 +- name: 'Securing DevOps buy‑in: expose pain, deliver standards, and enable internal + audit' + startOffset: 2121 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2121 + endOffset: 2281 +- name: 'Team composition: small senior ML engineering team building MLOps platform' + startOffset: 2281 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2281 + endOffset: 2369 +- name: 'Tool‑agnostic skills: learn fundamentals and stitch tools together end-to-end' + startOffset: 2369 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2369 + endOffset: 2573 +- name: 'Roadmap priorities: monitoring standardization, A/B testing, and LLM pilots' + startOffset: 2573 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2573 + endOffset: 2744 +- name: 'LLM Ops perspective: hype, cost, GPU constraints, and multilingual limits' + startOffset: 2744 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2744 + endOffset: 2982 +- name: 'Retail use cases: demand forecasting, personalization, and loyalty programs' + startOffset: 2982 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2982 + endOffset: 3084 +- name: 'Cross‑brand model: centralized MLOps support for smaller brands and cooperation + with large brands' + startOffset: 3084 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3084 + endOffset: 3245 +- name: 'Learning recommendations: hands‑on projects, MLOps Zoomcamp, and pairing + with engineers' + startOffset: 3245 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3245 + endOffset: 3368 +- name: 'Skill balance: ML fundamentals plus software engineering and system design' + startOffset: 3368 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3368 + endOffset: 3434 +- name: 'Data engineering importance: pipeline design, optimization, and data quality + for MLOps' + startOffset: 3434 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3434 + endOffset: 3496 +- name: 'Closing Remarks: upcoming course, LLM updates, and follow Marvelous MLOps' + startOffset: 3496 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3496 + endOffset: 3425 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Pragmatic and Standardized MLOps with Maria Vechtomova' @@ -1143,154 +1280,6 @@ transcript: sec: 3526 time: '58:46' who: Alexey -description: 'Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring - to boost reproducibility, deployment reliability, and team productivity.' -intro: 'How do you build pragmatic, standardized MLOps across teams without chasing - every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager - with roots in econometrics and early work moving from R to Python — tackles MLOps - as an organizational challenge, not just a technology problem.

Maria walks - through core, actionable topics: building reusable CI/CD and standardized repos, - choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), - and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines - central MLOps responsibilities — infrastructure, registries, deployment patterns, - and monitoring — and contrasts centralized platform teams with embedded feature - teams and guardrails. You’ll hear practical advice on moving logic out of notebooks - into packages and pipelines, conducting maturity assessments (reproducibility, testing, - documentation), and securing DevOps buy‑in. The conversation also covers monitoring - standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus - retail use cases like demand forecasting and personalization.

Listen to - learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring - — and how to prioritize organizational change to make MLOps work in production.' -dateadded: '2023-09-25' -duration: PT00H57M05S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=0 - endOffset: 101 -- name: 'Episode Overview: Pragmatic and Standardized MLOps with Maria Vechtomova' - startOffset: 101 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=101 - endOffset: 187 -- name: 'Background: Early career in data, econometrics, R to Python, and early MLOps - work' - startOffset: 187 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=187 - endOffset: 345 -- name: 'Early MLOps stacks: Teradata Aster, custom metadata, and orchestration' - startOffset: 345 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=345 - endOffset: 483 -- name: 'Role Overview: MLOps Tech Lead / Manager of Machine Learning Engineering' - startOffset: 483 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=483 - endOffset: 585 -- name: 'Marvelous MLOps: blog, LinkedIn presence, and content cadence' - startOffset: 585 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=585 - endOffset: 670 -- name: 'Defining MLOps: enablement, reproducibility, and teaching data scientists' - startOffset: 670 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=670 - endOffset: 762 -- name: 'Central MLOps team responsibilities: infrastructure, reusable CI/CD, and - monitoring' - startOffset: 762 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=762 - endOffset: 885 -- name: 'Toollandscape overload: MAD landscape, FOMO, and organizational challenges' - startOffset: 885 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=885 - endOffset: 987 -- name: 'Pragmatic MLOps: leverage existing infra (Kubernetes, Git, CI/CD) not new - tools' - startOffset: 987 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=987 - endOffset: 1121 -- name: 'Essential MLOps stack: version control, CI/CD, registries, model registry, - deployment, monitoring' - startOffset: 1121 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1121 - endOffset: 1249 -- name: 'Model artifacts & registry options: Artifactory, S3, and MLflow alternatives' - startOffset: 1249 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1249 - endOffset: 1343 -- name: 'MLOps maturity assessment: documentation, reproducibility, code quality, - and testing' - startOffset: 1343 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1343 - endOffset: 1441 -- name: 'Startup priorities: reproducibility, versioning, traceability as first steps' - startOffset: 1441 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1441 - endOffset: 1626 -- name: 'Team organization: centralized MLOps vs. embedded feature teams and guardrails' - startOffset: 1626 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1626 - endOffset: 1795 -- name: 'Standardization: cookie‑cutter repos, service principals, and Databricks - integration' - startOffset: 1795 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1795 - endOffset: 2004 -- name: 'Production best practices: move logic from notebooks to packages and CI/CD - pipelines' - startOffset: 2004 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2004 - endOffset: 2069 -- name: 'Implementation timeline: technical build vs. organizational buy‑in and permissions' - startOffset: 2069 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2069 - endOffset: 2121 -- name: 'Securing DevOps buy‑in: expose pain, deliver standards, and enable internal - audit' - startOffset: 2121 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2121 - endOffset: 2281 -- name: 'Team composition: small senior ML engineering team building MLOps platform' - startOffset: 2281 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2281 - endOffset: 2369 -- name: 'Tool‑agnostic skills: learn fundamentals and stitch tools together end-to-end' - startOffset: 2369 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2369 - endOffset: 2573 -- name: 'Roadmap priorities: monitoring standardization, A/B testing, and LLM pilots' - startOffset: 2573 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2573 - endOffset: 2744 -- name: 'LLM Ops perspective: hype, cost, GPU constraints, and multilingual limits' - startOffset: 2744 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2744 - endOffset: 2982 -- name: 'Retail use cases: demand forecasting, personalization, and loyalty programs' - startOffset: 2982 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2982 - endOffset: 3084 -- name: 'Cross‑brand model: centralized MLOps support for smaller brands and cooperation - with large brands' - startOffset: 3084 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3084 - endOffset: 3245 -- name: 'Learning recommendations: hands‑on projects, MLOps Zoomcamp, and pairing - with engineers' - startOffset: 3245 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3245 - endOffset: 3368 -- name: 'Skill balance: ML fundamentals plus software engineering and system design' - startOffset: 3368 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3368 - endOffset: 3434 -- name: 'Data engineering importance: pipeline design, optimization, and data quality - for MLOps' - startOffset: 3434 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3434 - endOffset: 3496 -- name: 'Closing Remarks: upcoming course, LLM updates, and follow Marvelous MLOps' - startOffset: 3496 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3496 - endOffset: 3425 --- Links: diff --git a/_podcast/s06e04-becoming-a-data-product-manager.md b/_podcast/product-designer-to-data-product-manager.md similarity index 97% rename from _podcast/s06e04-becoming-a-data-product-manager.md rename to _podcast/product-designer-to-data-product-manager.md index 601487c0..e2d56b90 100644 --- a/_podcast/s06e04-becoming-a-data-product-manager.md +++ b/_podcast/product-designer-to-data-product-manager.md @@ -1,12 +1,11 @@ --- -title: 'How to Transition from Design to Data Product Manager: SQL, Customer Discovery - & Data Quality' +title: 'How to Transition from Design to Data Product Manager: SQL, Customer Discovery & Data Quality' short: Becoming a Data Product Manager +season: 6 +episode: 4 guests: - saramenefee image: images/podcast/s06e04-becoming-a-data-product-manager.jpg -season: 6 -episode: 4 ids: youtube: nt__pVuuC-k anchor: Becoming-a-Data-Product-Manager---Sara-Menefee-e1arc4a @@ -15,6 +14,111 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-Product-Manager---Sara-Menefee-e1arc4a spotify: https://open.spotify.com/episode/3NZhd5kgQFpGckyxTQH9bF apple: https://podcasts.apple.com/us/podcast/becoming-a-data-product-manager-sara-menefee/id1541710331?i=1000543165093 + +description: 'Learn to transition from product design to Data Product Manager: master SQL, customer discovery, build a portfolio and lead analytics products.' +intro: 'How do you move from product design into a data product manager role — and which technical and discovery skills will make that transition practical and persuasive? Sara Menefee, a product manager at Meroxa and former product designer at Sora, Checkr, Change.org, and Zendesk, walks through her path and the concrete steps designers can take to become data-focused PMs.

This episode covers customer discovery and hypothesis formation, SQL and data engineering fundamentals, and the operational realities of data product management: data quality, PII/compliance, and the data lifecycle from sources to warehouses and apps. Sara explains how design thinking and PM–designer collaboration inform discovery and prioritization, and lays out a transition strategy that emphasizes networking, on-the-job learning, mentorship, and a portfolio built around case-study structure (problem, research, solution, outcome). You’ll also hear practical workflows — standups, analytics, customer development interviews — plus documentation-first practices (PRDs, knowledge bases), resource recommendations (including Reforge), and where ML and data science fit into the PM role.

Listen for actionable steps, portfolio guidance, and the technical literacy (SQL, documentation, data curiosity) you''ll need to move from design to data product manager.' +topics: +- career transition +- product design +- product management +dateadded: 2021-11-26 + +duration: PT01H01M07S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=0 + endOffset: 87 +- name: 'Career Path: From Technical Support to Product Design' + startOffset: 87 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=87 + endOffset: 298 +- name: 'Product Design: User Research, Prototyping & UX' + startOffset: 298 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=298 + endOffset: 424 +- name: 'Data Product Management: Customer Discovery & Hypothesis Formation' + startOffset: 424 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=424 + endOffset: 698 +- name: 'Product Lifecycle: Discovery, Planning, Engineering & Launch' + startOffset: 698 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=698 + endOffset: 910 +- name: 'Design Thinking: PM–Designer Collaboration in Ideation' + startOffset: 910 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=910 + endOffset: 986 +- name: 'Transition Motivation: Moving from Design to Product Management' + startOffset: 986 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=986 + endOffset: 1178 +- name: 'Data-focused PM: Data Quality, PII & Compliance Considerations' + startOffset: 1178 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1178 + endOffset: 1380 +- name: 'Core Technical Skills: SQL & Data Engineering Fundamentals' + startOffset: 1380 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1380 + endOffset: 1470 +- name: 'Essential Traits: Data Curiosity, Documentation Literacy & Empathy' + startOffset: 1470 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1470 + endOffset: 1593 +- name: 'Data Lifecycle: Sources, Transformation, Warehouses & Apps' + startOffset: 1593 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1593 + endOffset: 1710 +- name: 'Transition Strategy: Networking, On-the-Job Learning & Mentorship' + startOffset: 1710 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1710 + endOffset: 1980 +- name: 'Practical Steps: Building a Portfolio & Learning After the Switch' + startOffset: 1980 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1980 + endOffset: 2151 +- name: 'Case Study Structure: Problem, Research, Solution & Outcome' + startOffset: 2151 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2151 + endOffset: 2344 +- name: 'Learning Resources: Courses, Reforge & Recommended Reading' + startOffset: 2344 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2344 + endOffset: 2761 +- name: 'Daily Workflow: Standups, Analytics, CusDev & Context Switching' + startOffset: 2761 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2761 + endOffset: 2977 +- name: 'Customer Development: Interview Focus & Tactical Questions' + startOffset: 2977 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2977 + endOffset: 3115 +- name: 'Key Insight: Data Teams Spend Time Educating the Organization' + startOffset: 3115 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3115 + endOffset: 3249 +- name: 'Adopting New Tools: Documentation First, Pairing & Slack Help' + startOffset: 3249 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3249 + endOffset: 3368 +- name: 'Product Documentation: PRDs, Customer Notes & Knowledge Base' + startOffset: 3368 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3368 + endOffset: 3504 +- name: 'Idea Flow: Sources, Validation & Backlog Prioritization' + startOffset: 3504 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3504 + endOffset: 3640 +- name: 'Analytics vs Data Science: Where ML Fits in the PM Role' + startOffset: 3640 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3640 + endOffset: 3697 +- name: Closing Remarks & How to Reach Out + startOffset: 3697 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3697 + endOffset: 3667 + transcript: - header: Podcast Introduction - line: This week, we will talk about a data product manager. We have a special guest @@ -1057,120 +1161,6 @@ transcript: sec: 3722 time: '1:02:02' who: Alexey -description: 'Learn to transition from product design to Data Product Manager: master - SQL, customer discovery, build a portfolio and lead analytics products.' -intro: 'How do you move from product design into a data product manager role — and - which technical and discovery skills will make that transition practical and persuasive? - Sara Menefee, a product manager at Meroxa and former product designer at Sora, Checkr, - Change.org, and Zendesk, walks through her path and the concrete steps designers - can take to become data-focused PMs.

This episode covers customer discovery - and hypothesis formation, SQL and data engineering fundamentals, and the operational - realities of data product management: data quality, PII/compliance, and the data - lifecycle from sources to warehouses and apps. Sara explains how design thinking - and PM–designer collaboration inform discovery and prioritization, and lays out - a transition strategy that emphasizes networking, on-the-job learning, mentorship, - and a portfolio built around case-study structure (problem, research, solution, - outcome). You’ll also hear practical workflows — standups, analytics, customer development - interviews — plus documentation-first practices (PRDs, knowledge bases), resource - recommendations (including Reforge), and where ML and data science fit into the - PM role.

Listen for actionable steps, portfolio guidance, and the technical - literacy (SQL, documentation, data curiosity) you''ll need to move from design to - data product manager.' -dateadded: '2021-11-26' -duration: PT01H01M07S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=0 - endOffset: 87 -- name: 'Career Path: From Technical Support to Product Design' - startOffset: 87 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=87 - endOffset: 298 -- name: 'Product Design: User Research, Prototyping & UX' - startOffset: 298 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=298 - endOffset: 424 -- name: 'Data Product Management: Customer Discovery & Hypothesis Formation' - startOffset: 424 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=424 - endOffset: 698 -- name: 'Product Lifecycle: Discovery, Planning, Engineering & Launch' - startOffset: 698 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=698 - endOffset: 910 -- name: 'Design Thinking: PM–Designer Collaboration in Ideation' - startOffset: 910 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=910 - endOffset: 986 -- name: 'Transition Motivation: Moving from Design to Product Management' - startOffset: 986 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=986 - endOffset: 1178 -- name: 'Data-focused PM: Data Quality, PII & Compliance Considerations' - startOffset: 1178 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1178 - endOffset: 1380 -- name: 'Core Technical Skills: SQL & Data Engineering Fundamentals' - startOffset: 1380 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1380 - endOffset: 1470 -- name: 'Essential Traits: Data Curiosity, Documentation Literacy & Empathy' - startOffset: 1470 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1470 - endOffset: 1593 -- name: 'Data Lifecycle: Sources, Transformation, Warehouses & Apps' - startOffset: 1593 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1593 - endOffset: 1710 -- name: 'Transition Strategy: Networking, On-the-Job Learning & Mentorship' - startOffset: 1710 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1710 - endOffset: 1980 -- name: 'Practical Steps: Building a Portfolio & Learning After the Switch' - startOffset: 1980 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1980 - endOffset: 2151 -- name: 'Case Study Structure: Problem, Research, Solution & Outcome' - startOffset: 2151 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2151 - endOffset: 2344 -- name: 'Learning Resources: Courses, Reforge & Recommended Reading' - startOffset: 2344 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2344 - endOffset: 2761 -- name: 'Daily Workflow: Standups, Analytics, CusDev & Context Switching' - startOffset: 2761 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2761 - endOffset: 2977 -- name: 'Customer Development: Interview Focus & Tactical Questions' - startOffset: 2977 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2977 - endOffset: 3115 -- name: 'Key Insight: Data Teams Spend Time Educating the Organization' - startOffset: 3115 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3115 - endOffset: 3249 -- name: 'Adopting New Tools: Documentation First, Pairing & Slack Help' - startOffset: 3249 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3249 - endOffset: 3368 -- name: 'Product Documentation: PRDs, Customer Notes & Knowledge Base' - startOffset: 3368 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3368 - endOffset: 3504 -- name: 'Idea Flow: Sources, Validation & Backlog Prioritization' - startOffset: 3504 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3504 - endOffset: 3640 -- name: 'Analytics vs Data Science: Where ML Fits in the PM Role' - startOffset: 3640 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3640 - endOffset: 3697 -- name: Closing Remarks & How to Reach Out - startOffset: 3697 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3697 - endOffset: 3667 --- Links: diff --git a/_podcast/s05e07-ml-vs-analytics.md b/_podcast/production-ml-mlops-and-data-team-building.md similarity index 97% rename from _podcast/s05e07-ml-vs-analytics.md rename to _podcast/production-ml-mlops-and-data-team-building.md index f4f885b0..46a7c71f 100644 --- a/_podcast/s05e07-ml-vs-analytics.md +++ b/_podcast/production-ml-mlops-and-data-team-building.md @@ -1,12 +1,11 @@ --- -title: 'From Analytics to Production ML: Team Building, Experiments, MLOps & Fraud - Detection' +title: 'From Analytics to Production ML: Team Building, Experiments, MLOps & Fraud Detection' short: Similarities and Differences between ML and Analytics +season: 5 +episode: 7 guests: - rishabhbhargava image: images/podcast/s05e07-ml-vs-analytics.jpg -season: 5 -episode: 7 ids: youtube: rMRUa8WxDz4 anchor: Similarities-and-Differences-between-ML-and-Analytics---Rishabh-Bhargava-e18rcam @@ -15,6 +14,112 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Similarities-and-Differences-between-ML-and-Analytics---Rishabh-Bhargava-e18rcam spotify: https://open.spotify.com/episode/19fWdSuxTLwIdzVT45qF9x apple: https://podcasts.apple.com/us/podcast/similarities-and-differences-between-ml-and/id1541710331?i=1000538713607 + +description: Master building data teams, deploying production machine learning and MLOps, running A/B experiments and fraud detection to boost model reliability and ROI +intro: How do teams move beyond dashboards to reliable production ML—while organizing people, running experiments, and tackling use cases like fraud detection? In this episode Rishabh Bhargava (7+ years in analytics and ML, former Sales Engineering lead at Datacoral—acquired by Cloudera—and early Primer.ai engineer; MS CS Stanford) walks through the practical bridge from analytics to ML in production.

We cover data infrastructure and sales-engineering lessons (demos, POCs, integration), early NLP work (summarization, entity extraction), and the differences between prescriptive and predictive analytics. Rishabh explains day-to-day ML operations—models, APIs, SLAs—and the evolution of fraud detection from rule-based systems to machine learning. He digs into experimental workflows (A/B testing, shadow mode), experiment analysis (segmentation, uplift, root cause), and why documentation and analysts’ tribal knowledge matter. We also discuss hiring and team structure—hire data engineers, then analysts, then data scientists—and trade-offs between embedded versus centralized data roles.

If you’re responsible for data strategy, MLOps, or deploying fraud detection models, this episode provides actionable perspectives on experiments, team building, and moving ML into production +topics: +- machine learning +- production +- data analytics +- MLOps +- team building +- data teams +- leadership +- career growth +dateadded: 2021-10-16 + +duration: PT00H59M15S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=0 + endOffset: 128 +- name: 'Career Path: Data Infrastructure and Stanford ML Background' + startOffset: 128 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=128 + endOffset: 235 +- name: 'Sales Engineering: Demos, POCs and Data Integration' + startOffset: 235 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=235 + endOffset: 335 +- name: 'Early Machine Learning Work: NLP, Summarization and Entity Extraction' + startOffset: 335 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=335 + endOffset: 406 +- name: 'Prescriptive vs Predictive Analytics: Definitions and Business Use Cases' + startOffset: 406 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=406 + endOffset: 572 +- name: 'Terminology Problems: The Ambiguity of "Data Science"' + startOffset: 572 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=572 + endOffset: 648 +- name: 'ML vs Analytics: Different Goals, Shared Data Infrastructure' + startOffset: 648 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=648 + endOffset: 828 +- name: 'Machine Learning Day-to-Day: Models, APIs, Predictions and SLAs' + startOffset: 828 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=828 + endOffset: 1058 +- name: 'Fraud Detection: From Rule-Based Systems to Machine Learning' + startOffset: 1058 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1058 + endOffset: 1119 +- name: 'Analyst Responsibilities: Dashboards, Reports and Ad-hoc Queries' + startOffset: 1119 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1119 + endOffset: 1463 +- name: 'Domain Expertise: Analysts'' Tribal Knowledge and SQL Proficiency' + startOffset: 1463 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1463 + endOffset: 1593 +- name: Documentation Limitations and Attempts to Improve Knowledge Sharing + startOffset: 1593 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1593 + endOffset: 1722 +- name: 'Experimental Workflows: Model Experiments, A/B Testing and Shadow Mode' + startOffset: 1722 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1722 + endOffset: 1879 +- name: 'Experiment Analysis: Segmentation, Uplift and Root Cause Investigation' + startOffset: 1879 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1879 + endOffset: 2010 +- name: 'Overlaps and Differences: Data Quality, Timescales and Outputs' + startOffset: 2010 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2010 + endOffset: 2344 +- name: 'Bridging Roles: Notebooks, SQL+Python Workflows and Analytics Engineering' + startOffset: 2344 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2344 + endOffset: 2473 +- name: 'Investment Trends: ML Hype, Analytics Underspend and Data Infrastructure' + startOffset: 2473 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2473 + endOffset: 2582 +- name: 'Hiring Imbalance: Prioritizing Data Scientists vs Data Analysts' + startOffset: 2582 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2582 + endOffset: 2941 +- name: 'Team Organization: Embedded Data Roles Versus Centralized Structures' + startOffset: 2941 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2941 + endOffset: 3341 +- name: 'Building a Data Team: Hire Data Engineers, Then Analysts, Then DS' + startOffset: 3341 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3341 + endOffset: 3499 +- name: 'MLOpsRoundup Newsletter: ML Production, MLOps Insights and Resources' + startOffset: 3499 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3499 + endOffset: 3599 +- name: 'Contact and Community: Twitter, Slack and Episode Close' + startOffset: 3599 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3599 + endOffset: 3555 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we'll talk about the similarities and differences between machine @@ -1020,116 +1125,6 @@ transcript: sec: 3620 time: '1:00:20' who: Alexey -description: Master building data teams, deploying production machine learning and - MLOps, running A/B experiments and fraud detection to boost model reliability and - ROI. -intro: How do teams move beyond dashboards to reliable production ML—while organizing - people, running experiments, and tackling use cases like fraud detection? In this - episode Rishabh Bhargava (7+ years in analytics and ML, former Sales Engineering - lead at Datacoral—acquired by Cloudera—and early Primer.ai engineer; MS CS Stanford) - walks through the practical bridge from analytics to ML in production.

- We cover data infrastructure and sales-engineering lessons (demos, POCs, integration), - early NLP work (summarization, entity extraction), and the differences between prescriptive - and predictive analytics. Rishabh explains day-to-day ML operations—models, APIs, - SLAs—and the evolution of fraud detection from rule-based systems to machine learning. - He digs into experimental workflows (A/B testing, shadow mode), experiment analysis - (segmentation, uplift, root cause), and why documentation and analysts’ tribal knowledge - matter. We also discuss hiring and team structure—hire data engineers, then analysts, - then data scientists—and trade-offs between embedded versus centralized data roles. -

If you’re responsible for data strategy, MLOps, or deploying fraud detection - models, this episode provides actionable perspectives on experiments, team building, - and moving ML into production. -dateadded: '2021-10-16' -duration: PT00H59M15S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=0 - endOffset: 128 -- name: 'Career Path: Data Infrastructure and Stanford ML Background' - startOffset: 128 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=128 - endOffset: 235 -- name: 'Sales Engineering: Demos, POCs and Data Integration' - startOffset: 235 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=235 - endOffset: 335 -- name: 'Early Machine Learning Work: NLP, Summarization and Entity Extraction' - startOffset: 335 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=335 - endOffset: 406 -- name: 'Prescriptive vs Predictive Analytics: Definitions and Business Use Cases' - startOffset: 406 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=406 - endOffset: 572 -- name: 'Terminology Problems: The Ambiguity of "Data Science"' - startOffset: 572 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=572 - endOffset: 648 -- name: 'ML vs Analytics: Different Goals, Shared Data Infrastructure' - startOffset: 648 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=648 - endOffset: 828 -- name: 'Machine Learning Day-to-Day: Models, APIs, Predictions and SLAs' - startOffset: 828 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=828 - endOffset: 1058 -- name: 'Fraud Detection: From Rule-Based Systems to Machine Learning' - startOffset: 1058 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1058 - endOffset: 1119 -- name: 'Analyst Responsibilities: Dashboards, Reports and Ad-hoc Queries' - startOffset: 1119 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1119 - endOffset: 1463 -- name: 'Domain Expertise: Analysts'' Tribal Knowledge and SQL Proficiency' - startOffset: 1463 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1463 - endOffset: 1593 -- name: Documentation Limitations and Attempts to Improve Knowledge Sharing - startOffset: 1593 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1593 - endOffset: 1722 -- name: 'Experimental Workflows: Model Experiments, A/B Testing and Shadow Mode' - startOffset: 1722 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1722 - endOffset: 1879 -- name: 'Experiment Analysis: Segmentation, Uplift and Root Cause Investigation' - startOffset: 1879 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1879 - endOffset: 2010 -- name: 'Overlaps and Differences: Data Quality, Timescales and Outputs' - startOffset: 2010 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2010 - endOffset: 2344 -- name: 'Bridging Roles: Notebooks, SQL+Python Workflows and Analytics Engineering' - startOffset: 2344 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2344 - endOffset: 2473 -- name: 'Investment Trends: ML Hype, Analytics Underspend and Data Infrastructure' - startOffset: 2473 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2473 - endOffset: 2582 -- name: 'Hiring Imbalance: Prioritizing Data Scientists vs Data Analysts' - startOffset: 2582 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2582 - endOffset: 2941 -- name: 'Team Organization: Embedded Data Roles Versus Centralized Structures' - startOffset: 2941 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2941 - endOffset: 3341 -- name: 'Building a Data Team: Hire Data Engineers, Then Analysts, Then DS' - startOffset: 3341 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3341 - endOffset: 3499 -- name: 'MLOpsRoundup Newsletter: ML Production, MLOps Insights and Resources' - startOffset: 3499 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3499 - endOffset: 3599 -- name: 'Contact and Community: Twitter, Slack and Episode Close' - startOffset: 3599 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3599 - endOffset: 3555 --- Links: diff --git a/_podcast/s04e02-build-your-own-data-pipeline.md b/_podcast/production-ml-pipelines-with-aws-and-kafka.md similarity index 97% rename from _podcast/s04e02-build-your-own-data-pipeline.md rename to _podcast/production-ml-pipelines-with-aws-and-kafka.md index dd99c3d0..88000f15 100644 --- a/_podcast/s04e02-build-your-own-data-pipeline.md +++ b/_podcast/production-ml-pipelines-with-aws-and-kafka.md @@ -1,12 +1,11 @@ --- -title: 'From Notebooks to Production: Build Data Pipelines & Deploy ML (AWS, Kafka, - Streaming)' +title: 'From Notebooks to Production: Build Data Pipelines & Deploy ML (AWS, Kafka, Streaming)' short: Build Your Own Data Pipeline +season: 4 +episode: 2 guests: - andreaskretz image: images/podcast/s04e02-build-your-own-data-pipeline.jpg -season: 4 -episode: 2 ids: youtube: IrZPAG6OBqo anchor: Build-Your-Own-Data-Pipeline---Andreas-Kretz-e139se1 @@ -15,6 +14,140 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Build-Your-Own-Data-Pipeline---Andreas-Kretz-e139se1 spotify: https://open.spotify.com/episode/0fFRCAYFCReMxEiq2RDVak apple: https://podcasts.apple.com/us/podcast/build-your-own-data-pipeline-andreas-kretz/id1541710331?i=1000527643914 + +description: 'Learn to build data pipelines and deploy ML on AWS: productionize notebooks, cut ops risk, choose cost‑effective serving and orchestration.' +intro: 'How do you move models out of notebooks and into reliable production data pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas Kretz — the “Plumber of Data Science” — walks through the practical steps engineers and data scientists need to productionize notebooks and deploy ML systems.

Andreas, a data engineer focused on platform architecture, explains why data engineering demand is rising and why teams should hire both a data scientist and engineer early. We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, message queues). You’ll also hear about inference strategies, SageMaker endpoints vs precomputed predictions, model storage, and operational trade-offs.

Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, selecting tools, and getting from prototype to production with minimal operational risk. Find practical learning paths and project ideas to accelerate your data engineering skills.' +topics: +- data engineering +- machine learning +- production +- tools +dateadded: 2021-07-02 + +duration: PT01H01M15S + +quotableClips: +- name: Episode Introduction & Andreas Kretz — "Plumber of Data Science" + startOffset: 116 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=116 + endOffset: 199 +- name: 'Guest Bio: Andreas’s path from software to big data and data engineering' + startOffset: 199 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=199 + endOffset: 343 +- name: 'Market Trend: Why data engineering demand is rising' + startOffset: 343 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=343 + endOffset: 526 +- name: 'Hiring Strategy: Hire a data scientist and engineer early' + startOffset: 526 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=526 + endOffset: 587 +- name: 'Data Scientist Growth: From notebooks to production pipelines' + startOffset: 587 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=587 + endOffset: 723 +- name: 'Operational Risk: Why using many tools breaks operations' + startOffset: 723 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=723 + endOffset: 805 +- name: 'Data Pipeline Anatomy: Ingestion, buffer, processing, storage, visualization' + startOffset: 805 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=805 + endOffset: 911 +- name: 'Ingestion Explained: Events, message queues (Kafka, Kinesis)' + startOffset: 911 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=911 + endOffset: 1011 +- name: 'Processing Modes: Streaming vs. batch processing' + startOffset: 1011 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1011 + endOffset: 1094 +- name: 'One-Person Feasibility: Tooling, cloud vs on‑prem, and schema design' + startOffset: 1094 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1094 + endOffset: 1265 +- name: 'Practical Stack for Scientists: Python, Docker, Flask/FastAPI for prototypes' + startOffset: 1265 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1265 + endOffset: 1356 +- name: 'Processing Frameworks Overview: Spark, Flink, Lambda, Glue, Docker jobs' + startOffset: 1356 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1356 + endOffset: 1444 +- name: 'Data Transformation: Role of SQL and dataframe processing' + startOffset: 1444 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1444 + endOffset: 1536 +- name: 'AWS Example: Parquet on S3 and processing options' + startOffset: 1536 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1536 + endOffset: 1642 +- name: 'Case Study: Car price prediction — data sources and architecture' + startOffset: 1642 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1642 + endOffset: 1893 +- name: 'Inference Strategy: Live API calls versus precomputed predictions' + startOffset: 1893 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1893 + endOffset: 2056 +- name: 'Productionizing Notebooks: Dockerized training and model storage on S3' + startOffset: 2056 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2056 + endOffset: 2146 +- name: 'Scheduling Options: Airflow vs CloudWatch/Lambda vs simple schedulers' + startOffset: 2146 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2146 + endOffset: 2273 +- name: 'Model Serving: SageMaker endpoints and cost trade-offs' + startOffset: 2273 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2273 + endOffset: 2401 +- name: 'Orchestration Patterns: Message queues for job sequencing' + startOffset: 2401 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2401 + endOffset: 2466 +- name: 'Start Simple: Iterate from Lambda/queues to Airflow/Kubernetes' + startOffset: 2466 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2466 + endOffset: 2585 +- name: 'Learning DevOps: Pick tools, read docs, and practice by doing' + startOffset: 2585 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2585 + endOffset: 2731 +- name: 'Tool Selection: Use docs and tutorials to validate choices' + startOffset: 2731 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2731 + endOffset: 2916 +- name: 'Early-Career Skills: Python, SQL, basic networking; AWS and OSS basics' + startOffset: 2916 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2916 + endOffset: 3074 +- name: 'Hadoop Today: Cloud replaces Hadoop for many, but Hadoop persists in legacy' + startOffset: 3074 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3074 + endOffset: 3141 +- name: 'LearnDataEngineering Academy: Curriculum, capstones, and resources' + startOffset: 3141 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3141 + endOffset: 3292 +- name: 'Hands-on Projects: Build an e‑commerce pipeline; use Kaggle datasets' + startOffset: 3292 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3292 + endOffset: 3453 +- name: 'Learning Advice: Avoid huge datasets; start small and iterate' + startOffset: 3453 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3453 + endOffset: 3536 +- name: 'Convincing Stakeholders: Build a $0 proof‑of‑concept and quantify ROI' + startOffset: 3536 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3536 + endOffset: 3725 +- name: 'Find Andreas & Resources: LearnDataEngineering, YouTube, Telegram' + startOffset: 3725 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3725 + endOffset: 3675 + transcript: - header: Episode Introduction & Andreas Kretz — "Plumber of Data Science" - line: Today we'll talk about learning how to build data pipelines for data scientists. @@ -1004,148 +1137,6 @@ transcript: sec: 3791 time: '1:03:11' who: Alexey -description: 'Learn to build data pipelines and deploy ML on AWS: productionize notebooks, - cut ops risk, choose cost‑effective serving and orchestration.' -intro: 'How do you move models out of notebooks and into reliable production data - pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas - Kretz — the “Plumber of Data Science” — walks through the practical steps engineers - and data scientists need to productionize notebooks and deploy ML systems.

- Andreas, a data engineer focused on platform architecture, explains why data engineering - demand is rising and why teams should hire both a data scientist and engineer early. - We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, - processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus - processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines - a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, - and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, - message queues). You’ll also hear about inference strategies, SageMaker endpoints - vs precomputed predictions, model storage, and operational trade-offs.

- Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, - selecting tools, and getting from prototype to production with minimal operational - risk. Find practical learning paths and project ideas to accelerate your data engineering - skills.' -dateadded: '2021-07-02' -duration: PT01H01M15S -quotableClips: -- name: Episode Introduction & Andreas Kretz — "Plumber of Data Science" - startOffset: 116 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=116 - endOffset: 199 -- name: 'Guest Bio: Andreas’s path from software to big data and data engineering' - startOffset: 199 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=199 - endOffset: 343 -- name: 'Market Trend: Why data engineering demand is rising' - startOffset: 343 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=343 - endOffset: 526 -- name: 'Hiring Strategy: Hire a data scientist and engineer early' - startOffset: 526 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=526 - endOffset: 587 -- name: 'Data Scientist Growth: From notebooks to production pipelines' - startOffset: 587 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=587 - endOffset: 723 -- name: 'Operational Risk: Why using many tools breaks operations' - startOffset: 723 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=723 - endOffset: 805 -- name: 'Data Pipeline Anatomy: Ingestion, buffer, processing, storage, visualization' - startOffset: 805 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=805 - endOffset: 911 -- name: 'Ingestion Explained: Events, message queues (Kafka, Kinesis)' - startOffset: 911 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=911 - endOffset: 1011 -- name: 'Processing Modes: Streaming vs. batch processing' - startOffset: 1011 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1011 - endOffset: 1094 -- name: 'One-Person Feasibility: Tooling, cloud vs on‑prem, and schema design' - startOffset: 1094 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1094 - endOffset: 1265 -- name: 'Practical Stack for Scientists: Python, Docker, Flask/FastAPI for prototypes' - startOffset: 1265 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1265 - endOffset: 1356 -- name: 'Processing Frameworks Overview: Spark, Flink, Lambda, Glue, Docker jobs' - startOffset: 1356 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1356 - endOffset: 1444 -- name: 'Data Transformation: Role of SQL and dataframe processing' - startOffset: 1444 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1444 - endOffset: 1536 -- name: 'AWS Example: Parquet on S3 and processing options' - startOffset: 1536 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1536 - endOffset: 1642 -- name: 'Case Study: Car price prediction — data sources and architecture' - startOffset: 1642 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1642 - endOffset: 1893 -- name: 'Inference Strategy: Live API calls versus precomputed predictions' - startOffset: 1893 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1893 - endOffset: 2056 -- name: 'Productionizing Notebooks: Dockerized training and model storage on S3' - startOffset: 2056 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2056 - endOffset: 2146 -- name: 'Scheduling Options: Airflow vs CloudWatch/Lambda vs simple schedulers' - startOffset: 2146 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2146 - endOffset: 2273 -- name: 'Model Serving: SageMaker endpoints and cost trade-offs' - startOffset: 2273 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2273 - endOffset: 2401 -- name: 'Orchestration Patterns: Message queues for job sequencing' - startOffset: 2401 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2401 - endOffset: 2466 -- name: 'Start Simple: Iterate from Lambda/queues to Airflow/Kubernetes' - startOffset: 2466 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2466 - endOffset: 2585 -- name: 'Learning DevOps: Pick tools, read docs, and practice by doing' - startOffset: 2585 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2585 - endOffset: 2731 -- name: 'Tool Selection: Use docs and tutorials to validate choices' - startOffset: 2731 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2731 - endOffset: 2916 -- name: 'Early-Career Skills: Python, SQL, basic networking; AWS and OSS basics' - startOffset: 2916 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2916 - endOffset: 3074 -- name: 'Hadoop Today: Cloud replaces Hadoop for many, but Hadoop persists in legacy' - startOffset: 3074 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3074 - endOffset: 3141 -- name: 'LearnDataEngineering Academy: Curriculum, capstones, and resources' - startOffset: 3141 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3141 - endOffset: 3292 -- name: 'Hands-on Projects: Build an e‑commerce pipeline; use Kaggle datasets' - startOffset: 3292 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3292 - endOffset: 3453 -- name: 'Learning Advice: Avoid huge datasets; start small and iterate' - startOffset: 3453 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3453 - endOffset: 3536 -- name: 'Convincing Stakeholders: Build a $0 proof‑of‑concept and quantify ROI' - startOffset: 3536 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3536 - endOffset: 3725 -- name: 'Find Andreas & Resources: LearnDataEngineering, YouTube, Telegram' - startOffset: 3725 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3725 - endOffset: 3675 --- diff --git a/_podcast/s03e01-from-pm-to-ds.md b/_podcast/project-manager-to-data-scientist.md similarity index 97% rename from _podcast/s03e01-from-pm-to-ds.md rename to _podcast/project-manager-to-data-scientist.md index 8c632511..40a2ec2b 100644 --- a/_podcast/s03e01-from-pm-to-ds.md +++ b/_podcast/project-manager-to-data-scientist.md @@ -1,11 +1,11 @@ --- title: 'From Project Manager to Data Scientist: Skills, Tools, ML Courses & Job Search' short: Transitioning from Project Management to Data Science +season: 3 +episode: 1 guests: - ksenialegostay image: images/podcast/s03e01-from-pm-to-ds.jpg -season: 3 -episode: 1 ids: youtube: rBKezdb9jEc anchor: Transitioning-from-Project-Management-to-Data-Science---Ksenia-Legostay-euig2a @@ -14,6 +14,128 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Transitioning-from-Project-Management-to-Data-Science---Ksenia-Legostay-euig2a spotify: https://open.spotify.com/episode/3vF1B2mKwImsVC7h3NIDJW apple: https://podcasts.apple.com/us/podcast/transitioning-from-project-management-to-data-science/id1541710331?i=1000516467544 + +description: 'Discover how project managers switch to data science: master machine learning, Python, CRISP‑DM, build a portfolio, and land data roles faster.' +intro: 'How do you move from project management into a data science career — and what skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data Scientist at momox GmbH, walks through her transition after four years as a project manager into three years researching fraud and anomaly detection and earning a degree in data analysis. We cover career foundations, the difference between analytics and data science, and a concrete learning strategy: assess strengths, target gaps, and build core skills in programming, statistics, and domain expertise.

Ksenia outlines recommended coursework (machine learning, time series, graph analysis), online resources including mlcourse.ai, and a practical tools progression from spreadsheets and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP‑DM to structure projects, starting as a data analyst to build a portfolio, using Kaggle and community resources (OpenDataScience, DataTalks), and preparing for production with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain specialization (fraud detection, node2vec), realistic job search expectations, part‑time learning plans, and essential math topics — a clear roadmap for transitioning to data science.' +topics: +- career transition +- project management +- data science +- career growth +- job search +- tools +- production +dateadded: 2021-04-10 + +duration: PT01H03M20S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=0 + endOffset: 144 +- name: 'Guest Overview: Ksenia and episode focus (project management → data science)' + startOffset: 144 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=144 + endOffset: 180 +- name: 'Career Foundations: math degree, management, and early PM roles' + startOffset: 180 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=180 + endOffset: 275 +- name: 'Motivation for Analytics: customer-centric, data-driven decision making' + startOffset: 275 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=275 + endOffset: 414 +- name: 'Transition Path: moving from data analysis into machine learning' + startOffset: 414 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=414 + endOffset: 450 +- name: 'Analytics vs. Data Science: descriptive analysis vs. forecasting' + startOffset: 450 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=450 + endOffset: 513 +- name: 'Learning Strategy: assess strengths and target skill gaps' + startOffset: 513 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=513 + endOffset: 670 +- name: 'Education Choices: benefits of formal degrees vs. self-study' + startOffset: 670 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=670 + endOffset: 780 +- name: 'Core Skill Set: programming, statistics, and domain expertise' + startOffset: 780 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=780 + endOffset: 1038 +- name: 'Recommended Coursework: machine learning, time series, graph analysis' + startOffset: 1038 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1038 + endOffset: 1176 +- name: Online Resources & Course Picks (including mlcourse.ai) + startOffset: 1176 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1176 + endOffset: 1352 +- name: 'Transferable PM Skills: planning, stakeholder communication, business KPIs' + startOffset: 1352 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1352 + endOffset: 1820 +- name: 'Project Frameworks: using CRISP‑DM to structure data projects' + startOffset: 1820 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1820 + endOffset: 1963 +- name: 'Starting as a Data Analyst: apply analysis at work and build portfolio' + startOffset: 1963 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1963 + endOffset: 2088 +- name: 'Tools Progression: spreadsheets → BI tools (Tableau/Trifacta) → Python & + Pandas' + startOffset: 2088 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2088 + endOffset: 2207 +- name: 'Community Learning: OpenDataScience, DataTalks, and mentorship' + startOffset: 2207 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2207 + endOffset: 2334 +- name: 'Kaggle Practice: studying notebooks and collaborative competitions' + startOffset: 2334 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2334 + endOffset: 2467 +- name: 'Production Readiness: Git, testing, Docker, deployment, and Clean Code' + startOffset: 2467 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2467 + endOffset: 2596 +- name: 'Domain Specialization: research experience in fraud detection and node2vec' + startOffset: 2596 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2596 + endOffset: 2915 +- name: 'Job Search Reality: applications, interviews, and persistence' + startOffset: 2915 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2915 + endOffset: 3075 +- name: 'Bridging Theory and Practice: applying university work in industry' + startOffset: 3075 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3075 + endOffset: 3249 +- name: 'Part‑time Learning Plan: nanodegrees and structured six‑month paths' + startOffset: 3249 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3249 + endOffset: 3462 +- name: 'Essential Math Topics: probability, statistics, and graph theory' + startOffset: 3462 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3462 + endOffset: 3661 +- name: 'Career Habits: critical path, study techniques, and lifelong learning' + startOffset: 3661 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3661 + endOffset: 3687 +- name: 'Final Advice: contribute to projects, narrow your scope, join communities' + startOffset: 3687 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3687 + endOffset: 3907 +- name: Episode Close and Final Wishes + startOffset: 3907 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3907 + endOffset: 3800 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Ksenia and episode focus (project management → data science)' @@ -877,132 +999,6 @@ transcript: sec: 3944 time: '1:05:44' who: Alexey -description: 'Discover how project managers switch to data science: master machine - learning, Python, CRISP‑DM, build a portfolio, and land data roles faster.' -intro: 'How do you move from project management into a data science career — and what - skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data - Scientist at momox GmbH, walks through her transition after four years as a project - manager into three years researching fraud and anomaly detection and earning a degree - in data analysis. We cover career foundations, the difference between analytics - and data science, and a concrete learning strategy: assess strengths, target gaps, - and build core skills in programming, statistics, and domain expertise.

- Ksenia outlines recommended coursework (machine learning, time series, graph analysis), - online resources including mlcourse.ai, and a practical tools progression from spreadsheets - and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP‑DM to - structure projects, starting as a data analyst to build a portfolio, using Kaggle - and community resources (OpenDataScience, DataTalks), and preparing for production - with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain - specialization (fraud detection, node2vec), realistic job search expectations, part‑time - learning plans, and essential math topics — a clear roadmap for transitioning to - data science.' -dateadded: '2021-04-10' -duration: PT01H03M20S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=0 - endOffset: 144 -- name: 'Guest Overview: Ksenia and episode focus (project management → data science)' - startOffset: 144 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=144 - endOffset: 180 -- name: 'Career Foundations: math degree, management, and early PM roles' - startOffset: 180 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=180 - endOffset: 275 -- name: 'Motivation for Analytics: customer-centric, data-driven decision making' - startOffset: 275 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=275 - endOffset: 414 -- name: 'Transition Path: moving from data analysis into machine learning' - startOffset: 414 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=414 - endOffset: 450 -- name: 'Analytics vs. Data Science: descriptive analysis vs. forecasting' - startOffset: 450 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=450 - endOffset: 513 -- name: 'Learning Strategy: assess strengths and target skill gaps' - startOffset: 513 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=513 - endOffset: 670 -- name: 'Education Choices: benefits of formal degrees vs. self-study' - startOffset: 670 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=670 - endOffset: 780 -- name: 'Core Skill Set: programming, statistics, and domain expertise' - startOffset: 780 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=780 - endOffset: 1038 -- name: 'Recommended Coursework: machine learning, time series, graph analysis' - startOffset: 1038 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1038 - endOffset: 1176 -- name: Online Resources & Course Picks (including mlcourse.ai) - startOffset: 1176 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1176 - endOffset: 1352 -- name: 'Transferable PM Skills: planning, stakeholder communication, business KPIs' - startOffset: 1352 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1352 - endOffset: 1820 -- name: 'Project Frameworks: using CRISP‑DM to structure data projects' - startOffset: 1820 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1820 - endOffset: 1963 -- name: 'Starting as a Data Analyst: apply analysis at work and build portfolio' - startOffset: 1963 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1963 - endOffset: 2088 -- name: 'Tools Progression: spreadsheets → BI tools (Tableau/Trifacta) → Python & - Pandas' - startOffset: 2088 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2088 - endOffset: 2207 -- name: 'Community Learning: OpenDataScience, DataTalks, and mentorship' - startOffset: 2207 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2207 - endOffset: 2334 -- name: 'Kaggle Practice: studying notebooks and collaborative competitions' - startOffset: 2334 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2334 - endOffset: 2467 -- name: 'Production Readiness: Git, testing, Docker, deployment, and Clean Code' - startOffset: 2467 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2467 - endOffset: 2596 -- name: 'Domain Specialization: research experience in fraud detection and node2vec' - startOffset: 2596 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2596 - endOffset: 2915 -- name: 'Job Search Reality: applications, interviews, and persistence' - startOffset: 2915 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2915 - endOffset: 3075 -- name: 'Bridging Theory and Practice: applying university work in industry' - startOffset: 3075 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3075 - endOffset: 3249 -- name: 'Part‑time Learning Plan: nanodegrees and structured six‑month paths' - startOffset: 3249 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3249 - endOffset: 3462 -- name: 'Essential Math Topics: probability, statistics, and graph theory' - startOffset: 3462 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3462 - endOffset: 3661 -- name: 'Career Habits: critical path, study techniques, and lifelong learning' - startOffset: 3661 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3661 - endOffset: 3687 -- name: 'Final Advice: contribute to projects, narrow your scope, join communities' - startOffset: 3687 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3687 - endOffset: 3907 -- name: Episode Close and Final Wishes - startOffset: 3907 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3907 - endOffset: 3800 --- We talked about: diff --git a/_podcast/s02e10-public-speaking.md b/_podcast/public-speaking-for-data-scientists.md similarity index 98% rename from _podcast/s02e10-public-speaking.md rename to _podcast/public-speaking-for-data-scientists.md index 80291852..108ec29d 100644 --- a/_podcast/s02e10-public-speaking.md +++ b/_podcast/public-speaking-for-data-scientists.md @@ -1,12 +1,11 @@ --- -title: 'Public Speaking for Data Scientists: Master AI Evangelism, Storytelling & - Keynotes' +title: 'Public Speaking for Data Scientists: Master AI Evangelism, Storytelling & Keynotes' short: The Essentials of Public Speaking for Career in Data Science +season: 2 +episode: 10 guests: - bentaylor image: images/podcast/s02e10-public-speaking.jpg -season: 2 -episode: 10 ids: youtube: wOFvlR9UBxI anchor: The-Essentials-of-Public-Speaking-for-Career-in-Data-Science---Ben-Taylor-et0m4p @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-Essentials-of-Public-Speaking-for-Career-in-Data-Science---Ben-Taylor-et0m4p spotify: https://open.spotify.com/episode/4QWfObiuYmtOCtpSL5LZf9 apple: https://podcasts.apple.com/us/podcast/essentials-public-speaking-for-career-in-data-science/id1541710331?i=1000513669829 + +description: 'Master public speaking, AI evangelism & storytelling for data scientists: learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth.' +intro: How do data scientists move from technical deep dives to memorable keynotes and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at DataRobot, breaks down the public speaking playbook for data practitioners who want to persuade, teach, and scale their talks.

Ben draws on a career from engineering and quant roles through startups and acquisitions to explain the mindset for improvement, practical rehearsal habits, and the positioning and messaging that define AI evangelism. Key topics include crafting repeatable keynotes, avoiding early mistakes like technical overload, using story hooks and warm‑ups to capture attention, and structuring talks around 1–3 clear takeaways and calls to action. He also covers introductions that work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling exercises (Pixar lessons), and executive presentations that lead with recommendations while keeping an appendix ready.

Listeners will find actionable guidance on earning speaking stages, writing conference proposals that push boundaries, Q&A strategies (including how and when to admit unknowns), starter topics for newcomers, and resources like Toastmasters and story practice to build a speaker resume and break into AI evangelism +topics: +- developer relations +- public speaking +- career growth +dateadded: 2021-03-20 + +duration: PT01H09M46S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=0 + endOffset: 92 +- name: 'Guest Overview: Ben Taylor, AI Evangelist at DataRobot' + startOffset: 92 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=92 + endOffset: 188 +- name: 'Mindset for Improvement: Practice and Public Speaking Growth' + startOffset: 188 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=188 + endOffset: 234 +- name: 'Career Path: Engineering, Quant, HireVue, Startup, Acquisition' + startOffset: 234 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=234 + endOffset: 364 +- name: 'AI Evangelism: Role, Positioning, and Messaging Strategy' + startOffset: 364 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=364 + endOffset: 577 +- name: 'Scaling Talks: Process for Crafting Repeatable Keynotes' + startOffset: 577 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=577 + endOffset: 774 +- name: 'Early Mistakes: Technical Overload and Audience Awareness' + startOffset: 774 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=774 + endOffset: 957 +- name: 'Provocative Speaking: Risks, Reception, and Storytelling Ethics' + startOffset: 957 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=957 + endOffset: 1132 +- name: 'Speaking Privately: Corporate Talks and Networking Impact' + startOffset: 1132 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1132 + endOffset: 1194 +- name: 'Attention Techniques: Warm‑up, Emotion, and Story Hooks' + startOffset: 1194 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1194 + endOffset: 1315 +- name: 'Clear Outcomes: 1–3 Key Takeaways and Calls to Action' + startOffset: 1315 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1315 + endOffset: 1457 +- name: 'Introductions that Work: Hero Stories vs. Resume Intros' + startOffset: 1457 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1457 + endOffset: 1857 +- name: 'Translating Data for Impact: From Metrics to Narrative' + startOffset: 1857 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1857 + endOffset: 2052 +- name: 'Storytelling Practice: Everyday Exercises and Pixar Lessons' + startOffset: 2052 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2052 + endOffset: 2191 +- name: 'Ambitious Goals: Memorable Talks and Long‑term Impact' + startOffset: 2191 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2191 + endOffset: 2395 +- name: 'Executive Presentations: Recommendations First, Appendix Ready' + startOffset: 2395 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2395 + endOffset: 2858 +- name: 'Earning Stages: From Meetups to Conference Speaking Slots' + startOffset: 2858 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2858 + endOffset: 3020 +- name: 'Conference Proposals: Novelty, Creativity, and “Scare Yourself” Topics' + startOffset: 3020 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3020 + endOffset: 3133 +- name: 'Q&A Strategy: Handling Tough Questions and Admitting Unknowns' + startOffset: 3133 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3133 + endOffset: 3228 +- name: 'Path to Keynotes: Building a Speaker Resume and Personal Brand' + startOffset: 3228 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3228 + endOffset: 3397 +- name: 'Starter Topics for New Data Scientists: Business Problems Over Hype' + startOffset: 3397 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3397 + endOffset: 3572 +- name: 'Pitching Meetups: First Impressions, Endorsements, and Networking' + startOffset: 3572 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3572 + endOffset: 3738 +- name: 'Core Skill: Maximizing Audience Attention (Public Speaking Focus)' + startOffset: 3738 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3738 + endOffset: 3848 +- name: 'Breaking into AI Evangelism: Build Speaking Experience and Presence' + startOffset: 3848 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3848 + endOffset: 3914 +- name: 'Resources & Practice: Recommended Books, Toastmasters, Story Exercises' + startOffset: 3914 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3914 + endOffset: 4124 +- name: Closing Anecdotes and Final Advice + startOffset: 4124 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=4124 + endOffset: 4186 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Ben Taylor, AI Evangelist at DataRobot' @@ -1121,131 +1237,6 @@ transcript: sec: 4278 time: '1:11:18' who: Alexey -description: 'Master public speaking, AI evangelism & storytelling for data scientists: - learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth.' -intro: How do data scientists move from technical deep dives to memorable keynotes - and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at - DataRobot, breaks down the public speaking playbook for data practitioners who want - to persuade, teach, and scale their talks.

Ben draws on a career from engineering - and quant roles through startups and acquisitions to explain the mindset for improvement, - practical rehearsal habits, and the positioning and messaging that define AI evangelism. - Key topics include crafting repeatable keynotes, avoiding early mistakes like technical - overload, using story hooks and warm‑ups to capture attention, and structuring talks - around 1–3 clear takeaways and calls to action. He also covers introductions that - work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling - exercises (Pixar lessons), and executive presentations that lead with recommendations - while keeping an appendix ready.

Listeners will find actionable guidance - on earning speaking stages, writing conference proposals that push boundaries, Q&A - strategies (including how and when to admit unknowns), starter topics for newcomers, - and resources like Toastmasters and story practice to build a speaker resume and - break into AI evangelism. -dateadded: '2021-03-20' -duration: PT01H09M46S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=0 - endOffset: 92 -- name: 'Guest Overview: Ben Taylor, AI Evangelist at DataRobot' - startOffset: 92 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=92 - endOffset: 188 -- name: 'Mindset for Improvement: Practice and Public Speaking Growth' - startOffset: 188 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=188 - endOffset: 234 -- name: 'Career Path: Engineering, Quant, HireVue, Startup, Acquisition' - startOffset: 234 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=234 - endOffset: 364 -- name: 'AI Evangelism: Role, Positioning, and Messaging Strategy' - startOffset: 364 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=364 - endOffset: 577 -- name: 'Scaling Talks: Process for Crafting Repeatable Keynotes' - startOffset: 577 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=577 - endOffset: 774 -- name: 'Early Mistakes: Technical Overload and Audience Awareness' - startOffset: 774 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=774 - endOffset: 957 -- name: 'Provocative Speaking: Risks, Reception, and Storytelling Ethics' - startOffset: 957 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=957 - endOffset: 1132 -- name: 'Speaking Privately: Corporate Talks and Networking Impact' - startOffset: 1132 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1132 - endOffset: 1194 -- name: 'Attention Techniques: Warm‑up, Emotion, and Story Hooks' - startOffset: 1194 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1194 - endOffset: 1315 -- name: 'Clear Outcomes: 1–3 Key Takeaways and Calls to Action' - startOffset: 1315 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1315 - endOffset: 1457 -- name: 'Introductions that Work: Hero Stories vs. Resume Intros' - startOffset: 1457 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1457 - endOffset: 1857 -- name: 'Translating Data for Impact: From Metrics to Narrative' - startOffset: 1857 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1857 - endOffset: 2052 -- name: 'Storytelling Practice: Everyday Exercises and Pixar Lessons' - startOffset: 2052 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2052 - endOffset: 2191 -- name: 'Ambitious Goals: Memorable Talks and Long‑term Impact' - startOffset: 2191 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2191 - endOffset: 2395 -- name: 'Executive Presentations: Recommendations First, Appendix Ready' - startOffset: 2395 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2395 - endOffset: 2858 -- name: 'Earning Stages: From Meetups to Conference Speaking Slots' - startOffset: 2858 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2858 - endOffset: 3020 -- name: 'Conference Proposals: Novelty, Creativity, and “Scare Yourself” Topics' - startOffset: 3020 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3020 - endOffset: 3133 -- name: 'Q&A Strategy: Handling Tough Questions and Admitting Unknowns' - startOffset: 3133 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3133 - endOffset: 3228 -- name: 'Path to Keynotes: Building a Speaker Resume and Personal Brand' - startOffset: 3228 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3228 - endOffset: 3397 -- name: 'Starter Topics for New Data Scientists: Business Problems Over Hype' - startOffset: 3397 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3397 - endOffset: 3572 -- name: 'Pitching Meetups: First Impressions, Endorsements, and Networking' - startOffset: 3572 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3572 - endOffset: 3738 -- name: 'Core Skill: Maximizing Audience Attention (Public Speaking Focus)' - startOffset: 3738 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3738 - endOffset: 3848 -- name: 'Breaking into AI Evangelism: Build Speaking Experience and Presence' - startOffset: 3848 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3848 - endOffset: 3914 -- name: 'Resources & Practice: Recommended Books, Toastmasters, Story Exercises' - startOffset: 3914 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3914 - endOffset: 4124 -- name: Closing Anecdotes and Final Advice - startOffset: 4124 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=4124 - endOffset: 4186 --- diff --git a/_podcast/s15e05-mastering-data-engineering-as-remote-worker.md b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md similarity index 96% rename from _podcast/s15e05-mastering-data-engineering-as-remote-worker.md rename to _podcast/remote-data-engineering-work-and-building-iot-platforms.md index 0d3b0a67..ae18c8a0 100644 --- a/_podcast/s15e05-mastering-data-engineering-as-remote-worker.md +++ b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md @@ -1,20 +1,153 @@ --- +title: 'Remote Data Engineering Life: Building IoT Platforms, Career Transitions & Newsletter-Driven Personal Growth' +short: Mastering Data Engineering as a Remote Worker +season: 15 episode: 5 guests: - josemaria +image: images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg ids: anchor: atatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c youtube: UX7UShEioKc -image: images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c apple: https://podcasts.apple.com/us/podcast/mastering-data-engineering-as-a-remote-worker-jos%C3%A9/id1541710331?i=1000624908396 spotify: https://open.spotify.com/episode/2RLxjkPbUO3FBfFpKPHzls?si=TVveHW7PQcW7yGbOyJsJpg youtube: https://www.youtube.com/watch?v=UX7UShEioKc -season: 15 -short: Mastering Data Engineering as a Remote Worker -title: 'Build IoT Platforms & Data Pipelines for Remote Work: Hiring, Onboarding & - Personal Branding' + +description: 'Navigate remote data engineering after relocation: IoT platform architecture, sensor onboarding workflows, and newsletter-driven personal branding for career growth.' +intro: "What does it take to thrive as a remote data engineer — building IoT platforms, navigating international career moves, and leveraging writing for professional growth? In this episode, José María Sánchez Salas — a computer scientist turned data engineer and newsletter author — shares his journey from Spain to Norway and the realities of remote IoT platform work.

We explore the daily life of remote data engineering: work routines, wellness strategies, and Norway's unique hiring landscape with geographic constraints around Oslo, Bergen, and Trondheim. José breaks down IoT platform engineering fundamentals — treating platforms as an 'operating system' for sensors, sensor onboarding workflows, real-time data processing, and solving common IoT challenges like remote diagnostics and business context integration. The conversation covers data exploration patterns, ETL pipeline design, stakeholder communication, and how José uses his newsletter as both a learning tool and career advancement strategy — translating complex technical work for broader audiences and building professional visibility. You'll get actionable insights on job searching across borders, data engineering learning paths, remote work legal considerations, and communication skills that matter for distributed teams. Listen to discover practical approaches for IoT system design, remote team management, and using content creation to accelerate your data engineering career." +topics: +- data engineering +- remote work +- personal brand +- career growth +dateadded: 2023-08-28 + +duration: PT00H58M01S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=0 + endOffset: 69 +- name: Episode Overview & Guest Introduction + startOffset: 69 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=69 + endOffset: 110 +- name: 'Background: Spain to Norway and Career Transition' + startOffset: 110 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=110 + endOffset: 261 +- name: 'Relocation Story: Moving for Partner’s Job' + startOffset: 261 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=261 + endOffset: 301 +- name: 'Remote Work Routine: Two Focused Work Blocks' + startOffset: 301 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=301 + endOffset: 435 +- name: Morning Routine & Productivity Habits + startOffset: 435 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=435 + endOffset: 493 +- name: Remote-First Hiring Landscape in Norway + startOffset: 493 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=493 + endOffset: 599 +- name: 'Role Overview: IoT Platform Responsibilities' + startOffset: 599 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=599 + endOffset: 749 +- name: 'IoT Platform Architecture: "Operating System" for Sensors' + startOffset: 749 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=749 + endOffset: 797 +- name: 'Geographic Hiring Constraints: Oslo, Bergen, Trondheim' + startOffset: 797 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=797 + endOffset: 931 +- name: 'Remote Work Challenges: Loneliness & Isolation' + startOffset: 931 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=931 + endOffset: 1097 +- name: 'Workspace Boundaries: Separating Home and Work' + startOffset: 1097 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1097 + endOffset: 1231 +- name: 'IoT Data Challenges: Remote Diagnostics & Context' + startOffset: 1231 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1231 + endOffset: 1444 +- name: 'Turning Raw Data into Business Value: Understand the Why' + startOffset: 1444 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1444 + endOffset: 1654 +- name: Data Exploration, ETL, and Building Data Pipelines + startOffset: 1654 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1654 + endOffset: 1821 +- name: 'Internal Stakeholders: Platform Consumers & Users' + startOffset: 1821 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1821 + endOffset: 1864 +- name: Sensor Onboarding Workflow & Real-Time Processing + startOffset: 1864 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1864 + endOffset: 1937 +- name: 'Newsletter Purpose: Explaining Data to Non-Technical Audiences' + startOffset: 1937 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1937 + endOffset: 2002 +- name: 'Newsletter Origin: Writing as Communication for Introverts' + startOffset: 2002 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2002 + endOffset: 2157 +- name: 'Content Strategy: Inspiration and Daily Cadence' + startOffset: 2157 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2157 + endOffset: 2290 +- name: 'Personal Branding: Newsletter as Opportunity Driver' + startOffset: 2290 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2290 + endOffset: 2359 +- name: 'Newsletter Production Tactics: Idea Slicing & Repetition' + startOffset: 2359 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2359 + endOffset: 2420 +- name: 'Burnout Coping: Nature, Exercise, and Routine' + startOffset: 2420 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2420 + endOffset: 2801 +- name: 'Job Search Resources: finn.no, LinkedIn, Upwork' + startOffset: 2801 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2801 + endOffset: 2916 +- name: 'Learning Data Engineering: Software Foundations & Projects' + startOffset: 2916 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2916 + endOffset: 3132 +- name: Legal & Tax Basics for Remote Work in Norway + startOffset: 3132 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3132 + endOffset: 3211 +- name: 'Benefits of Remote Work: Location Flexibility & Time Savings' + startOffset: 3211 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3211 + endOffset: 3319 +- name: 'Personal Mobility: Partner Contracts and Remote Advantages' + startOffset: 3319 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3319 + endOffset: 3432 +- name: 'Recommendation: Develop Soft Skills, Especially Communication' + startOffset: 3432 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3432 + endOffset: 3522 +- name: Episode Closing & Final Remarks + startOffset: 3522 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3522 + endOffset: 3481 + transcript: - header: Podcast Introduction - header: Episode Overview & Guest Introduction @@ -1003,148 +1136,6 @@ transcript: sec: 3550 time: '59:10' who: Alexey -description: 'Master IoT platforms and data pipelines for remote work: hiring & onboarding - tips, sensor architecture, and personal branding tactics to advance your career.' -intro: 'How do you build reliable IoT platforms and end-to-end data pipelines while - hiring, onboarding, and staying visible as a remote data engineer? In this episode - José María Sánchez Salas — a computer scientist focused on data engineering and - author of a well-read data engineering newsletter — walks through practical answers - from his move from Spain to Norway to running IoT platform work remotely.

- We cover remote work routines and wellbeing, Norway’s remote-first hiring landscape - and geographic constraints (Oslo, Bergen, Trondheim), and the core responsibilities - of an IoT platform engineer: treating the platform as an “operating system” for - sensors, sensor onboarding workflows, real-time processing, and common IoT data - challenges like remote diagnostics and adding contextual business value. José explains - data exploration, ETL and pipeline patterns, stakeholder-driven platform design, - and tactics for translating technical work to non-technical audiences via a newsletter - — a tool he uses for personal branding and opportunity generation. You’ll also get - practical job-search resources, learning paths for data engineering, and tips on - legal/tax basics and communication skills for remote roles. Listen to learn concrete - strategies for building IoT systems, hiring and onboarding remotely, and using content - to advance your career.' -dateadded: '2023-08-28' -duration: PT00H58M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=0 - endOffset: 69 -- name: Episode Overview & Guest Introduction - startOffset: 69 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=69 - endOffset: 110 -- name: 'Background: Spain to Norway and Career Transition' - startOffset: 110 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=110 - endOffset: 261 -- name: 'Relocation Story: Moving for Partner’s Job' - startOffset: 261 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=261 - endOffset: 301 -- name: 'Remote Work Routine: Two Focused Work Blocks' - startOffset: 301 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=301 - endOffset: 435 -- name: Morning Routine & Productivity Habits - startOffset: 435 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=435 - endOffset: 493 -- name: Remote-First Hiring Landscape in Norway - startOffset: 493 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=493 - endOffset: 599 -- name: 'Role Overview: IoT Platform Responsibilities' - startOffset: 599 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=599 - endOffset: 749 -- name: 'IoT Platform Architecture: "Operating System" for Sensors' - startOffset: 749 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=749 - endOffset: 797 -- name: 'Geographic Hiring Constraints: Oslo, Bergen, Trondheim' - startOffset: 797 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=797 - endOffset: 931 -- name: 'Remote Work Challenges: Loneliness & Isolation' - startOffset: 931 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=931 - endOffset: 1097 -- name: 'Workspace Boundaries: Separating Home and Work' - startOffset: 1097 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1097 - endOffset: 1231 -- name: 'IoT Data Challenges: Remote Diagnostics & Context' - startOffset: 1231 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1231 - endOffset: 1444 -- name: 'Turning Raw Data into Business Value: Understand the Why' - startOffset: 1444 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1444 - endOffset: 1654 -- name: Data Exploration, ETL, and Building Data Pipelines - startOffset: 1654 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1654 - endOffset: 1821 -- name: 'Internal Stakeholders: Platform Consumers & Users' - startOffset: 1821 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1821 - endOffset: 1864 -- name: Sensor Onboarding Workflow & Real-Time Processing - startOffset: 1864 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1864 - endOffset: 1937 -- name: 'Newsletter Purpose: Explaining Data to Non-Technical Audiences' - startOffset: 1937 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1937 - endOffset: 2002 -- name: 'Newsletter Origin: Writing as Communication for Introverts' - startOffset: 2002 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2002 - endOffset: 2157 -- name: 'Content Strategy: Inspiration and Daily Cadence' - startOffset: 2157 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2157 - endOffset: 2290 -- name: 'Personal Branding: Newsletter as Opportunity Driver' - startOffset: 2290 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2290 - endOffset: 2359 -- name: 'Newsletter Production Tactics: Idea Slicing & Repetition' - startOffset: 2359 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2359 - endOffset: 2420 -- name: 'Burnout Coping: Nature, Exercise, and Routine' - startOffset: 2420 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2420 - endOffset: 2801 -- name: 'Job Search Resources: finn.no, LinkedIn, Upwork' - startOffset: 2801 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2801 - endOffset: 2916 -- name: 'Learning Data Engineering: Software Foundations & Projects' - startOffset: 2916 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2916 - endOffset: 3132 -- name: Legal & Tax Basics for Remote Work in Norway - startOffset: 3132 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3132 - endOffset: 3211 -- name: 'Benefits of Remote Work: Location Flexibility & Time Savings' - startOffset: 3211 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3211 - endOffset: 3319 -- name: 'Personal Mobility: Partner Contracts and Remote Advantages' - startOffset: 3319 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3319 - endOffset: 3432 -- name: 'Recommendation: Develop Soft Skills, Especially Communication' - startOffset: 3432 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3432 - endOffset: 3522 -- name: Episode Closing & Final Remarks - startOffset: 3522 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3522 - endOffset: 3481 --- Links: diff --git a/_podcast/s05e05-researchers-vs-engineers.md b/_podcast/research-to-production-ml-systems-roadmap.md similarity index 98% rename from _podcast/s05e05-researchers-vs-engineers.md rename to _podcast/research-to-production-ml-systems-roadmap.md index 7191b86b..b115e038 100644 --- a/_podcast/s05e05-researchers-vs-engineers.md +++ b/_podcast/research-to-production-ml-systems-roadmap.md @@ -1,12 +1,11 @@ --- -title: 'From Research to Production: Build Reproducible, Deployable Full-Stack ML - Systems' +title: 'From Research to Production: Build Reproducible, Deployable Full-Stack ML Systems' short: What Researchers and Engineers Can Learn from Each Other +season: 5 +episode: 5 guests: - mihaileric image: images/podcast/s05e05-researchers-vs-engineers.jpg -season: 5 -episode: 5 ids: youtube: d9xVXqKq3sU anchor: What-Researchers-and-Engineers-Can-Learn-from-Each-Other---Mihail-Eric-e1854bj @@ -15,6 +14,133 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/What-Researchers-and-Engineers-Can-Learn-from-Each-Other---Mihail-Eric-e1854bj spotify: https://open.spotify.com/episode/0cJJCjK7nX5p1PdeMvGrVL apple: https://podcasts.apple.com/us/podcast/what-researchers-and-engineers-can-learn-from-each/id1541710331?i=1000537258362 + +description: 'Learn to build reproducible, deployable full-stack ML systems: deploy models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows.' +intro: How do you move ML work from research notebooks to reproducible, deployable full‑stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that exact challenge. We trace Mihail’s path from academic NLP to self‑driving and conversational AI, then into hybrid roles that blend hypothesis‑driven research with production engineering.

Key topics include research infrastructure for data collection and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, role fluidity, code reviews for researchers, and practical skills swaps so researchers learn reproducibility and engineers learn experimental rigor.

Listeners will get concrete guidance on building end‑to‑end ML systems, improving reproducibility and model deployment, and actionable career advice (internships, reading papers, reproducing models). Tune in to learn practical steps and tools to bridge research to production for real‑world ML systems +topics: +- machine learning +- MLOps +- academia +- production +- career growth +dateadded: 2021-10-02 + +duration: PT01H01M36S + +quotableClips: +- name: Podcast Introduction + startOffset: 77 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=77 + endOffset: 112 +- name: 'Guest Overview: Mihail’s Roles and Work' + startOffset: 112 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=112 + endOffset: 120 +- name: 'Guest Background: Stanford NLP and Early Research' + startOffset: 120 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=120 + endOffset: 300 +- name: 'From NLP to Self-Driving: Shared Long-Tail Challenges' + startOffset: 300 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=300 + endOffset: 406 +- name: 'Transition to Industry: Building Engineering Foundations' + startOffset: 406 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=406 + endOffset: 514 +- name: 'Research Infrastructure: Data Collection and Prototyping' + startOffset: 514 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=514 + endOffset: 561 +- name: 'Hybrid Role at Amazon: Research Integrated with Production' + startOffset: 561 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=561 + endOffset: 652 +- name: 'Researcher Focus: Hypothesis-Driven Work and Benchmarks' + startOffset: 652 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=652 + endOffset: 770 +- name: 'Experimental Tooling: Notebooks, W&B, Fast Prototyping' + startOffset: 770 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=770 + endOffset: 885 +- name: 'Sourcing Research Questions: Surveys, Citations, and "Future Work"' + startOffset: 885 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=885 + endOffset: 1055 +- name: 'ML Engineer Focus: Full ML Lifecycle and Production Systems' + startOffset: 1055 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1055 + endOffset: 1073 +- name: 'Engineering Tooling: PyTorch, Docker, Cloud, and Web Frameworks' + startOffset: 1073 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1073 + endOffset: 1225 +- name: 'Data Science Evolution: From Data Science 1.0 to Data Science 2.0' + startOffset: 1225 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1225 + endOffset: 1412 +- name: 'Skills Swap — Researchers Learn: Engineering Rigor and Reproducibility' + startOffset: 1412 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1412 + endOffset: 1730 +- name: 'Skills Swap — Engineers Learn: Handling Uncertainty and Experimental Rigor' + startOffset: 1730 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1730 + endOffset: 1816 +- name: 'Bridging the Gap: Cultural and Organizational Challenges' + startOffset: 1816 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1816 + endOffset: 2060 +- name: 'Embedded Teams vs. Handoffs: Avoiding the "Throw-It-Over-the-Wall" Trap' + startOffset: 2060 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2060 + endOffset: 2217 +- name: 'Breaking Silos: Leadership, Sprints, and Active Collaboration' + startOffset: 2217 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2217 + endOffset: 2348 +- name: 'Role Fluidity: Flexible Responsibilities in High-Performing Teams' + startOffset: 2348 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2348 + endOffset: 2433 +- name: 'Full-Stack Data Scientist: From Model Development to Deployment' + startOffset: 2433 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2433 + endOffset: 2676 +- name: 'Advice for Researchers: Build End-to-End Systems and Deploy' + startOffset: 2676 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2676 + endOffset: 2817 +- name: 'Code Reviews for Researchers: Rapid Engineering Skill Development' + startOffset: 2817 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2817 + endOffset: 2871 +- name: 'Advice for Engineers: Read Papers, Reproduce Models, Run Experiments' + startOffset: 2871 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2871 + endOffset: 3088 +- name: 'Practical Paper Reading: Tutorials, Code, and Researcher Collaboration' + startOffset: 3088 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3088 + endOffset: 3331 +- name: 'Choosing a Path: Internships, Masters, PhD — Try Both Early' + startOffset: 3331 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3331 + endOffset: 3536 +- name: 'Confetti.ai: Career Preparation and Learning Resources for ML Roles' + startOffset: 3536 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3536 + endOffset: 3700 +- name: 'Contact & Resources: Twitter, LinkedIn, and Confetti.ai' + startOffset: 3700 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3700 + endOffset: 3756 +- name: Episode Wrap-Up and Key Takeaways + startOffset: 3756 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3756 + endOffset: 3696 + transcript: - header: Podcast Introduction - line: This week, we'll talk about machine learning researchers and machine learning @@ -1124,139 +1250,6 @@ transcript: sec: 3773 time: '1:02:53' who: Alexey -description: 'Learn to build reproducible, deployable full-stack ML systems: deploy - models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows.' -intro: How do you move ML work from research notebooks to reproducible, deployable - full‑stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation - and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS - and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that - exact challenge. We trace Mihail’s path from academic NLP to self‑driving and conversational - AI, then into hybrid roles that blend hypothesis‑driven research with production - engineering.

Key topics include research infrastructure for data collection - and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), - engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and - the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, - role fluidity, code reviews for researchers, and practical skills swaps so researchers - learn reproducibility and engineers learn experimental rigor.

Listeners - will get concrete guidance on building end‑to‑end ML systems, improving reproducibility - and model deployment, and actionable career advice (internships, reading papers, - reproducing models). Tune in to learn practical steps and tools to bridge research - to production for real‑world ML systems. -dateadded: '2021-10-02' -duration: PT01H01M36S -quotableClips: -- name: Podcast Introduction - startOffset: 77 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=77 - endOffset: 112 -- name: 'Guest Overview: Mihail’s Roles and Work' - startOffset: 112 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=112 - endOffset: 120 -- name: 'Guest Background: Stanford NLP and Early Research' - startOffset: 120 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=120 - endOffset: 300 -- name: 'From NLP to Self-Driving: Shared Long-Tail Challenges' - startOffset: 300 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=300 - endOffset: 406 -- name: 'Transition to Industry: Building Engineering Foundations' - startOffset: 406 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=406 - endOffset: 514 -- name: 'Research Infrastructure: Data Collection and Prototyping' - startOffset: 514 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=514 - endOffset: 561 -- name: 'Hybrid Role at Amazon: Research Integrated with Production' - startOffset: 561 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=561 - endOffset: 652 -- name: 'Researcher Focus: Hypothesis-Driven Work and Benchmarks' - startOffset: 652 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=652 - endOffset: 770 -- name: 'Experimental Tooling: Notebooks, W&B, Fast Prototyping' - startOffset: 770 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=770 - endOffset: 885 -- name: 'Sourcing Research Questions: Surveys, Citations, and "Future Work"' - startOffset: 885 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=885 - endOffset: 1055 -- name: 'ML Engineer Focus: Full ML Lifecycle and Production Systems' - startOffset: 1055 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1055 - endOffset: 1073 -- name: 'Engineering Tooling: PyTorch, Docker, Cloud, and Web Frameworks' - startOffset: 1073 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1073 - endOffset: 1225 -- name: 'Data Science Evolution: From Data Science 1.0 to Data Science 2.0' - startOffset: 1225 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1225 - endOffset: 1412 -- name: 'Skills Swap — Researchers Learn: Engineering Rigor and Reproducibility' - startOffset: 1412 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1412 - endOffset: 1730 -- name: 'Skills Swap — Engineers Learn: Handling Uncertainty and Experimental Rigor' - startOffset: 1730 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1730 - endOffset: 1816 -- name: 'Bridging the Gap: Cultural and Organizational Challenges' - startOffset: 1816 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1816 - endOffset: 2060 -- name: 'Embedded Teams vs. Handoffs: Avoiding the "Throw-It-Over-the-Wall" Trap' - startOffset: 2060 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2060 - endOffset: 2217 -- name: 'Breaking Silos: Leadership, Sprints, and Active Collaboration' - startOffset: 2217 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2217 - endOffset: 2348 -- name: 'Role Fluidity: Flexible Responsibilities in High-Performing Teams' - startOffset: 2348 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2348 - endOffset: 2433 -- name: 'Full-Stack Data Scientist: From Model Development to Deployment' - startOffset: 2433 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2433 - endOffset: 2676 -- name: 'Advice for Researchers: Build End-to-End Systems and Deploy' - startOffset: 2676 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2676 - endOffset: 2817 -- name: 'Code Reviews for Researchers: Rapid Engineering Skill Development' - startOffset: 2817 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2817 - endOffset: 2871 -- name: 'Advice for Engineers: Read Papers, Reproduce Models, Run Experiments' - startOffset: 2871 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2871 - endOffset: 3088 -- name: 'Practical Paper Reading: Tutorials, Code, and Researcher Collaboration' - startOffset: 3088 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3088 - endOffset: 3331 -- name: 'Choosing a Path: Internships, Masters, PhD — Try Both Early' - startOffset: 3331 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3331 - endOffset: 3536 -- name: 'Confetti.ai: Career Preparation and Learning Resources for ML Roles' - startOffset: 3536 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3536 - endOffset: 3700 -- name: 'Contact & Resources: Twitter, LinkedIn, and Confetti.ai' - startOffset: 3700 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3700 - endOffset: 3756 -- name: Episode Wrap-Up and Key Takeaways - startOffset: 3756 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3756 - endOffset: 3696 --- Links: diff --git a/_podcast/s10e09-responsible-and-explainable-ai.md b/_podcast/responsible-explainable-ai-bias-detection.md similarity index 97% rename from _podcast/s10e09-responsible-and-explainable-ai.md rename to _podcast/responsible-explainable-ai-bias-detection.md index 5b178f3b..c69be1a8 100644 --- a/_podcast/s10e09-responsible-and-explainable-ai.md +++ b/_podcast/responsible-explainable-ai-bias-detection.md @@ -1,20 +1,139 @@ --- +title: 'Responsible & Explainable AI: Practical Guide to Bias Detection, Fairness & Governance' +short: Responsible and Explainable AI +season: 10 episode: 9 guests: - supreetkaur +image: images/podcast/s10e09-responsible-and-explainable-ai.jpg ids: anchor: Responsible-and-Explainable-AI---Supreet-Kaur-e1o6mgj youtube: 8Eb5mG-pC3o -image: images/podcast/s10e09-responsible-and-explainable-ai.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Responsible-and-Explainable-AI---Supreet-Kaur-e1o6mgj apple: https://podcasts.apple.com/us/podcast/responsible-and-explainable-ai-supreet-kaur/id1541710331?i=1000581178150 spotify: https://open.spotify.com/episode/0xCSjSCG6tTiMSGfUJrMmO youtube: https://www.youtube.com/watch?v=8Eb5mG-pC3o -season: 10 -short: Responsible and Explainable AI -title: 'Responsible & Explainable AI: Practical Guide to Bias Detection, Fairness - & Governance' + +description: Discover Responsible AI & Explainable AI tactics for bias detection, fairness checks and governance, practical tools to build trustworthy, compliant ML models +intro: How do you detect bias, enforce fairness, and govern AI systems in production without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia and Rutgers — walks through a practical roadmap for responsible AI and explainable AI grounded in real-world examples.

We define responsible AI and contrast it with post‑hoc explainability, then unpack a credit decision bias case to show disparate outcomes in practice. Supreet outlines glass‑box explainability techniques, data‑level fairness checks (skewness, missingness, coverage), and EDA methods for bias detection. She covers PII handling, feature necessity assessments with SMEs and compliance, and automating data quality and monitoring. You’ll hear tool recommendations — What‑If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local interpretability, drift and feedback‑loop detection, and trade‑offs between accuracy and interpretability.

Listeners will gain actionable guidance on bias detection, model interpretability, AI governance structures, and managing AutoML and regulated‑industry risks — practical steps to make AI systems more fair, transparent, and accountable +topics: +- responsible AI +- explainable AI +- bias detection +- fairness +- governance +- tools +dateadded: 2022-10-02 + +duration: PT00H58M56S + +quotableClips: +- name: 'Episode Introduction: Responsible and Explainable AI' + startOffset: 0 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=0 + endOffset: 134 +- name: 'Career Journey: Master''s, Consulting, and Founding DataBuzz' + startOffset: 134 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=134 + endOffset: 234 +- name: 'Data Strategy Role: Building AI Products at Morgan Stanley' + startOffset: 234 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=234 + endOffset: 283 +- name: 'Responsible AI: Definition, Trust, and Stakeholder Collaboration' + startOffset: 283 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=283 + endOffset: 402 +- name: 'Credit Decision Bias Example: Explaining Disparate Outcomes' + startOffset: 402 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=402 + endOffset: 500 +- name: 'Explainable vs Responsible AI: Post‑mortem Tools vs Governance Mindset' + startOffset: 500 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=500 + endOffset: 630 +- name: 'Glass‑Box Approach: Explainable AI Techniques Overview' + startOffset: 630 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=630 + endOffset: 696 +- name: 'Data‑Level Fairness Checks: Skewness, Missingness, and Coverage' + startOffset: 696 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=696 + endOffset: 768 +- name: Exploratory Data Analysis for Bias Detection + startOffset: 768 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=768 + endOffset: 879 +- name: 'PII Handling: Age, Gender, Masking, and Use‑case Justification' + startOffset: 879 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=879 + endOffset: 1040 +- name: 'Feature Necessity: Product, SME, and Compliance Decisioning' + startOffset: 1040 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1040 + endOffset: 1107 +- name: 'Automating Data Quality: DQ Tools, Alerts, and Monitoring' + startOffset: 1107 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1107 + endOffset: 1143 +- name: 'Model Explainability Tools: What‑If, Skater, and AI Explainability 360' + startOffset: 1143 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1143 + endOffset: 1404 +- name: 'Local Interpretability: LIME, SHAP, and Surrogate Models' + startOffset: 1404 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1404 + endOffset: 1462 +- name: 'Ethics vs Profitability: Balancing Fairness and Business Objectives' + startOffset: 1462 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1462 + endOffset: 1658 +- name: 'Cross‑Functional Governance: SMEs, Compliance, and Leadership Roles' + startOffset: 1658 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1658 + endOffset: 1949 +- name: 'Accuracy vs Interpretability: Managing Model Complexity Trade‑offs' + startOffset: 1949 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1949 + endOffset: 2128 +- name: 'Human‑in‑the‑Loop: Limits of Automation and Responsible Oversight' + startOffset: 2128 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2128 + endOffset: 2251 +- name: 'Detecting Drift & Feedback Loops: Demographics, Overfitting, and KS Tests' + startOffset: 2251 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2251 + endOffset: 2559 +- name: 'Regulated Industry Perspectives: Finance, Pharma, and Risk Sensitivity' + startOffset: 2559 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2559 + endOffset: 2647 +- name: 'Hiring Tool Case Study: Historical Bias and Remediation Lessons' + startOffset: 2647 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2647 + endOffset: 3017 +- name: 'AutoML Risks: Democratization, Oversight, and Responsible Usage' + startOffset: 3017 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3017 + endOffset: 3128 +- name: 'Community & Mentorship: DataBuzz Resources and Networking' + startOffset: 3128 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3128 + endOffset: 3230 +- name: 'Data Career Landscape: Analyst, MLOps, Consultant, and Strategist Roles' + startOffset: 3230 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3230 + endOffset: 3404 +- name: 'Ethics Training: Professional Responsibility for Data Practitioners' + startOffset: 3404 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3404 + endOffset: 3567 +- name: 'Closing Remarks: Follow‑up, Links, and Contact Information' + startOffset: 3567 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3567 + endOffset: 3536 + transcript: - header: 'Episode Introduction: Responsible and Explainable AI' - line: This week, we'll talk about responsible and Explainable AI. We have a special @@ -1163,131 +1282,6 @@ transcript: sec: 3630 time: '1:00:30' who: Alexey -description: Discover Responsible AI & Explainable AI tactics for bias detection, - fairness checks and governance, practical tools to build trustworthy, compliant - ML models. -intro: How do you detect bias, enforce fairness, and govern AI systems in production - without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan - Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia - and Rutgers — walks through a practical roadmap for responsible AI and explainable - AI grounded in real-world examples.

We define responsible AI and contrast - it with post‑hoc explainability, then unpack a credit decision bias case to show - disparate outcomes in practice. Supreet outlines glass‑box explainability techniques, - data‑level fairness checks (skewness, missingness, coverage), and EDA methods for - bias detection. She covers PII handling, feature necessity assessments with SMEs - and compliance, and automating data quality and monitoring. You’ll hear tool recommendations - — What‑If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local - interpretability, drift and feedback‑loop detection, and trade‑offs between accuracy - and interpretability.

Listeners will gain actionable guidance on bias detection, - model interpretability, AI governance structures, and managing AutoML and regulated‑industry - risks — practical steps to make AI systems more fair, transparent, and accountable. -dateadded: '2022-10-02' -duration: PT00H58M56S -quotableClips: -- name: 'Episode Introduction: Responsible and Explainable AI' - startOffset: 0 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=0 - endOffset: 134 -- name: 'Career Journey: Master''s, Consulting, and Founding DataBuzz' - startOffset: 134 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=134 - endOffset: 234 -- name: 'Data Strategy Role: Building AI Products at Morgan Stanley' - startOffset: 234 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=234 - endOffset: 283 -- name: 'Responsible AI: Definition, Trust, and Stakeholder Collaboration' - startOffset: 283 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=283 - endOffset: 402 -- name: 'Credit Decision Bias Example: Explaining Disparate Outcomes' - startOffset: 402 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=402 - endOffset: 500 -- name: 'Explainable vs Responsible AI: Post‑mortem Tools vs Governance Mindset' - startOffset: 500 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=500 - endOffset: 630 -- name: 'Glass‑Box Approach: Explainable AI Techniques Overview' - startOffset: 630 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=630 - endOffset: 696 -- name: 'Data‑Level Fairness Checks: Skewness, Missingness, and Coverage' - startOffset: 696 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=696 - endOffset: 768 -- name: Exploratory Data Analysis for Bias Detection - startOffset: 768 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=768 - endOffset: 879 -- name: 'PII Handling: Age, Gender, Masking, and Use‑case Justification' - startOffset: 879 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=879 - endOffset: 1040 -- name: 'Feature Necessity: Product, SME, and Compliance Decisioning' - startOffset: 1040 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1040 - endOffset: 1107 -- name: 'Automating Data Quality: DQ Tools, Alerts, and Monitoring' - startOffset: 1107 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1107 - endOffset: 1143 -- name: 'Model Explainability Tools: What‑If, Skater, and AI Explainability 360' - startOffset: 1143 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1143 - endOffset: 1404 -- name: 'Local Interpretability: LIME, SHAP, and Surrogate Models' - startOffset: 1404 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1404 - endOffset: 1462 -- name: 'Ethics vs Profitability: Balancing Fairness and Business Objectives' - startOffset: 1462 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1462 - endOffset: 1658 -- name: 'Cross‑Functional Governance: SMEs, Compliance, and Leadership Roles' - startOffset: 1658 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1658 - endOffset: 1949 -- name: 'Accuracy vs Interpretability: Managing Model Complexity Trade‑offs' - startOffset: 1949 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1949 - endOffset: 2128 -- name: 'Human‑in‑the‑Loop: Limits of Automation and Responsible Oversight' - startOffset: 2128 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2128 - endOffset: 2251 -- name: 'Detecting Drift & Feedback Loops: Demographics, Overfitting, and KS Tests' - startOffset: 2251 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2251 - endOffset: 2559 -- name: 'Regulated Industry Perspectives: Finance, Pharma, and Risk Sensitivity' - startOffset: 2559 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2559 - endOffset: 2647 -- name: 'Hiring Tool Case Study: Historical Bias and Remediation Lessons' - startOffset: 2647 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2647 - endOffset: 3017 -- name: 'AutoML Risks: Democratization, Oversight, and Responsible Usage' - startOffset: 3017 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3017 - endOffset: 3128 -- name: 'Community & Mentorship: DataBuzz Resources and Networking' - startOffset: 3128 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3128 - endOffset: 3230 -- name: 'Data Career Landscape: Analyst, MLOps, Consultant, and Strategist Roles' - startOffset: 3230 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3230 - endOffset: 3404 -- name: 'Ethics Training: Professional Responsibility for Data Practitioners' - startOffset: 3404 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3404 - endOffset: 3567 -- name: 'Closing Remarks: Follow‑up, Links, and Contact Information' - startOffset: 3567 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3567 - endOffset: 3536 --- Links: diff --git a/_podcast/s10e05-growing-data-engineering-team-in-scale-up.md b/_podcast/scale-data-engineering-teams-self-service-platforms.md similarity index 97% rename from _podcast/s10e05-growing-data-engineering-team-in-scale-up.md rename to _podcast/scale-data-engineering-teams-self-service-platforms.md index cf338f28..6b10ebe9 100644 --- a/_podcast/s10e05-growing-data-engineering-team-in-scale-up.md +++ b/_podcast/scale-data-engineering-teams-self-service-platforms.md @@ -1,20 +1,124 @@ --- +title: 'Scale Data Engineering Teams: Build Self‑Service Data Platforms, Hire Senior Engineers & Use Kafka' +short: Growing Data Engineering Team in a Scale-Up +season: 10 episode: 5 guests: - mehdiouazza +image: images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg ids: anchor: Growing-Data-Engineering-Team-in-a-Scale-Up---Mehdi-OUAZZA-e1mq8et youtube: acJ6sVqKOUk -image: images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Growing-Data-Engineering-Team-in-a-Scale-Up---Mehdi-OUAZZA-e1mq8et apple: https://podcasts.apple.com/us/podcast/growing-data-engineering-team-in-a-scale-up-mehdi-ouazza/id1541710331?i=1000577461365 spotify: https://open.spotify.com/episode/5DkuaYQpbJ13sU9bknFZnk?si=RtQnTHHYQb-ytMEw8J3e8g youtube: https://www.youtube.com/watch?v=acJ6sVqKOUk -season: 10 -short: Growing Data Engineering Team in a Scale-Up -title: 'Scale Data Engineering Teams: Build Self‑Service Data Platforms, Hire Senior - Engineers & Use Kafka' + +description: 'Master scaling data engineering teams: build self-service data platforms, hire senior engineers, deploy Kafka best practices to boost velocity, onboarding.' +intro: 'How do you scale data engineering teams during hypergrowth without sacrificing quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, orchestration, infrastructure and analytics — walks through practical approaches to scale data engineering teams, build self‑service data platforms, hire senior engineers and adopt Kafka-based event streaming.

We cover what “scale‑up” looks like in practice (rapid hiring, product launches, US expansion), the data platform’s role in enabling self‑service onboarding and scalability, and a platform anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi also digs into event streaming: Kafka, schema registries and data contracts, plus hiring-for-scale tactics — prioritizing senior experts and niche tech experience — and assessment strategies like reverse interviews. You’ll hear about balancing platform engineering and use‑case pipelines, cultivating culture shifts, creating junior learning paths, and growing toward senior roles through proactivity and cross‑team impact.

Listen for concrete guidance on building a self‑service data platform, practical Kafka practices, and hiring strategies that help teams move fast while staying reliable.' +dateadded: 2022-08-29 + +duration: PT01H01M25S + +quotableClips: +- name: 'Episode Introduction: Growing Data Engineering Team & Guest Mehdi' + startOffset: 117 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=117 + endOffset: 162 +- name: 'Guest background: BI, on‑prem Big Data to staff data engineer (career highlights)' + startOffset: 162 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=162 + endOffset: 341 +- name: 'Defining scale‑up: hypergrowth, funding, hiring surge, speed vs quality' + startOffset: 341 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=341 + endOffset: 621 +- name: 'Hypergrowth challenges: product launches, US expansion, operational strain' + startOffset: 621 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=621 + endOffset: 750 +- name: 'Data platform role: enabling self‑service, onboarding, and scalability' + startOffset: 750 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=750 + endOffset: 1042 +- name: 'Data platform anatomy: Airflow, conventions, playbooks, and best practices' + startOffset: 1042 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1042 + endOffset: 1213 +- name: 'Hiring for scale: prioritize senior experts and niche technology experience' + startOffset: 1213 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1213 + endOffset: 1406 +- name: 'Event streaming practices: Kafka, schemas, schema registry, and data contracts' + startOffset: 1406 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1406 + endOffset: 1625 +- name: 'Velocity vs growth: managing fast pace while ensuring personal growth' + startOffset: 1625 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1625 + endOffset: 1867 +- name: 'Culture shift: evolving processes and influencing company norms' + startOffset: 1867 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1867 + endOffset: 2105 +- name: 'Career trade‑offs: scale‑up vs enterprise vs FAANG' + startOffset: 2105 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2105 + endOffset: 2292 +- name: 'Assessment tactics: reverse interviews to evaluate team workload and culture' + startOffset: 2292 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2292 + endOffset: 2342 +- name: 'Junior opportunities: rapid learning, promotions, and exposure in scale‑ups' + startOffset: 2342 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2342 + endOffset: 2451 +- name: 'Talent sourcing: employer brand, community contributions, and open source' + startOffset: 2451 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2451 + endOffset: 2804 +- name: 'Technical content: writing, OSS contributions, and getting external feedback' + startOffset: 2804 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2804 + endOffset: 2946 +- name: 'Community engagement: reader outreach, calls, and mentorship benefits' + startOffset: 2946 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2946 + endOffset: 3017 +- name: 'Role evolution: generalist to specialist as teams and projects mature' + startOffset: 3017 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3017 + endOffset: 3175 +- name: 'Work balance: platform engineering vs use‑case pipelines (~50/50)' + startOffset: 3175 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3175 + endOffset: 3271 +- name: 'Path to senior: proactivity, broader impact, and cross‑team collaboration' + startOffset: 3271 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3271 + endOffset: 3394 +- name: 'Casual segment: light banter about music, caps, and hobbies' + startOffset: 3394 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3394 + endOffset: 3468 +- name: 'Creator spotlight: MehdiO DataTV, DataCreators.club, and content channels' + startOffset: 3468 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3468 + endOffset: 3612 +- name: 'Content production: time investment, process improvements, and persistence' + startOffset: 3612 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3612 + endOffset: 3713 +- name: 'Video editing tips: multi‑take filming, lighting consistency, and tricks' + startOffset: 3713 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3713 + endOffset: 3776 +- name: 'Episode close: key takeaways, resources, and links' + startOffset: 3776 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3776 + endOffset: 3685 + transcript: - header: 'Episode Introduction: Growing Data Engineering Team & Guest Mehdi' - line: This week, we will talk about growing a data engineering team in a scale-up. @@ -1084,124 +1188,6 @@ transcript: sec: 3802 time: '1:03:22' who: Mehdi -description: 'Master scaling data engineering teams: build self-service data platforms, - hire senior engineers, deploy Kafka best practices to boost velocity, onboarding.' -intro: 'How do you scale data engineering teams during hypergrowth without sacrificing - quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and - entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, - orchestration, infrastructure and analytics — walks through practical approaches - to scale data engineering teams, build self‑service data platforms, hire senior - engineers and adopt Kafka-based event streaming.

We cover what “scale‑up” - looks like in practice (rapid hiring, product launches, US expansion), the data - platform’s role in enabling self‑service onboarding and scalability, and a platform - anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi - also digs into event streaming: Kafka, schema registries and data contracts, plus - hiring-for-scale tactics — prioritizing senior experts and niche tech experience - — and assessment strategies like reverse interviews. You’ll hear about balancing - platform engineering and use‑case pipelines, cultivating culture shifts, creating - junior learning paths, and growing toward senior roles through proactivity and cross‑team - impact.

Listen for concrete guidance on building a self‑service data platform, - practical Kafka practices, and hiring strategies that help teams move fast while - staying reliable.' -dateadded: '2022-08-29' -duration: PT01H01M25S -quotableClips: -- name: 'Episode Introduction: Growing Data Engineering Team & Guest Mehdi' - startOffset: 117 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=117 - endOffset: 162 -- name: 'Guest background: BI, on‑prem Big Data to staff data engineer (career highlights)' - startOffset: 162 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=162 - endOffset: 341 -- name: 'Defining scale‑up: hypergrowth, funding, hiring surge, speed vs quality' - startOffset: 341 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=341 - endOffset: 621 -- name: 'Hypergrowth challenges: product launches, US expansion, operational strain' - startOffset: 621 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=621 - endOffset: 750 -- name: 'Data platform role: enabling self‑service, onboarding, and scalability' - startOffset: 750 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=750 - endOffset: 1042 -- name: 'Data platform anatomy: Airflow, conventions, playbooks, and best practices' - startOffset: 1042 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1042 - endOffset: 1213 -- name: 'Hiring for scale: prioritize senior experts and niche technology experience' - startOffset: 1213 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1213 - endOffset: 1406 -- name: 'Event streaming practices: Kafka, schemas, schema registry, and data contracts' - startOffset: 1406 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1406 - endOffset: 1625 -- name: 'Velocity vs growth: managing fast pace while ensuring personal growth' - startOffset: 1625 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1625 - endOffset: 1867 -- name: 'Culture shift: evolving processes and influencing company norms' - startOffset: 1867 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1867 - endOffset: 2105 -- name: 'Career trade‑offs: scale‑up vs enterprise vs FAANG' - startOffset: 2105 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2105 - endOffset: 2292 -- name: 'Assessment tactics: reverse interviews to evaluate team workload and culture' - startOffset: 2292 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2292 - endOffset: 2342 -- name: 'Junior opportunities: rapid learning, promotions, and exposure in scale‑ups' - startOffset: 2342 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2342 - endOffset: 2451 -- name: 'Talent sourcing: employer brand, community contributions, and open source' - startOffset: 2451 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2451 - endOffset: 2804 -- name: 'Technical content: writing, OSS contributions, and getting external feedback' - startOffset: 2804 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2804 - endOffset: 2946 -- name: 'Community engagement: reader outreach, calls, and mentorship benefits' - startOffset: 2946 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2946 - endOffset: 3017 -- name: 'Role evolution: generalist to specialist as teams and projects mature' - startOffset: 3017 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3017 - endOffset: 3175 -- name: 'Work balance: platform engineering vs use‑case pipelines (~50/50)' - startOffset: 3175 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3175 - endOffset: 3271 -- name: 'Path to senior: proactivity, broader impact, and cross‑team collaboration' - startOffset: 3271 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3271 - endOffset: 3394 -- name: 'Casual segment: light banter about music, caps, and hobbies' - startOffset: 3394 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3394 - endOffset: 3468 -- name: 'Creator spotlight: MehdiO DataTV, DataCreators.club, and content channels' - startOffset: 3468 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3468 - endOffset: 3612 -- name: 'Content production: time investment, process improvements, and persistence' - startOffset: 3612 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3612 - endOffset: 3713 -- name: 'Video editing tips: multi‑take filming, lighting consistency, and tricks' - startOffset: 3713 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3713 - endOffset: 3776 -- name: 'Episode close: key takeaways, resources, and links' - startOffset: 3776 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3776 - endOffset: 3685 --- Links: diff --git a/_podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.md b/_podcast/scale-enterprise-ai-mlops-data-first-strategy.md similarity index 97% rename from _podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.md rename to _podcast/scale-enterprise-ai-mlops-data-first-strategy.md index e2099c61..2c061955 100644 --- a/_podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.md +++ b/_podcast/scale-enterprise-ai-mlops-data-first-strategy.md @@ -1,20 +1,115 @@ --- +title: 'Scale Enterprise AI: Data-First Strategies, MLOps Best Practices & Realistic Experiments' +short: Lessons Learned About Data & AI at Enterprises +season: 10 episode: 4 guests: - alexanderhendorf +image: images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg ids: anchor: Lessons-Learned-About-Data--AI-at-Enterprises---Alexander-Hendorf-e1milm0/a-a8d08ua youtube: Vms29u9xC3k -image: images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Lessons-Learned-About-Data--AI-at-Enterprises---Alexander-Hendorf-e1milm0/a-a8d08ua apple: https://podcasts.apple.com/us/podcast/lessons-learned-about-data-ai-at-enterprises-alexander/id1541710331?i=1000576622709 spotify: https://open.spotify.com/episode/5t3SwzH17mFjxEoDUx9i5c?si=gaTfOoFnQ7muVkBiYuMxuA youtube: https://www.youtube.com/watch?v=Vms29u9xC3k -season: 10 -short: Lessons Learned About Data & AI at Enterprises -title: 'Scale Enterprise AI: Data-First Strategies, MLOps Best Practices & Realistic - Experiments' + +description: Discover data-first Enterprise AI strategies and MLOps best practices—learn realistic experiments, CI/CD, governance, and align ML to business impact +intro: 'How do you move from proof-of-concept to scaled enterprise AI without over-investing in hype? In this episode, Alexander Hendorf — head of data and AI at KÖNIGSWEG, PyData chair and Python Software Foundation/EuroPython fellow — walks through pragmatic, data-first strategies for scaling AI across organizations.

We cover how to align AI initiatives with company goals, run realistic experiments (and why transparent evaluation matters), and set expectations about AI’s limits (the “Beethoven” example). Alexander breaks down a data-first architecture — data lake, BI vs. ML vs. deep learning splits — and explains productionization needs like retraining, feedback loops, and MLOps automation. He shares MLOps best practices: standardization, CI/CD, governance, reproducibility, and warnings about vendor lock-in and consultancy pitfalls. You’ll also hear advice on prioritization over perfection, timing innovation, and choosing platforms that fit long-term team maturity.

Listen for actionable guidance on experiment design, model evaluation, and building repeatable pipelines so you can scale enterprise AI responsibly and sustainably. Ideal for data leaders, ML engineers, and product owners implementing production ML and MLOps.' +dateadded: 2022-08-19 + +duration: PT01H01M25S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=0 + endOffset: 122 +- name: 'Guest Overview: Alexander Hendorf — Königsweg partner & PyData chair' + startOffset: 122 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=122 + endOffset: 199 +- name: 'Career Path: from law and DJing to programming and machine learning' + startOffset: 199 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=199 + endOffset: 307 +- name: 'Partner Role: team leadership, strategy, and client selection' + startOffset: 307 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=307 + endOffset: 576 +- name: 'Community Engagement: PyData, cross-domain learning, and meetups' + startOffset: 576 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=576 + endOffset: 693 +- name: 'Conference Organizing: becoming chair, scaling events, and organizer summit' + startOffset: 693 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=693 + endOffset: 991 +- name: 'Public Speaking: generating talk ideas and learning through presentations' + startOffset: 991 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=991 + endOffset: 1256 +- name: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit"' + startOffset: 1256 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1256 + endOffset: 1471 +- name: 'Communicating AI to Business: simplification, open source, and stakeholder + buy-in' + startOffset: 1471 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1471 + endOffset: 1878 +- name: 'Enterprise AI Strategy: aligning initiatives, experiments, and company goals' + startOffset: 1878 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1878 + endOffset: 2210 +- name: 'Experimentation Reality: evaluation, transparency, and avoiding hype-driven + bets' + startOffset: 2210 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2210 + endOffset: 2242 +- name: 'AI Limitations Illustrated: realistic expectations (Beethoven example)' + startOffset: 2242 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2242 + endOffset: 2568 +- name: 'Innovation Patience: retrospectives, avoiding over-engineering, and timing' + startOffset: 2568 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2568 + endOffset: 2763 +- name: 'Prioritization Over Perfection: "good enough" engineering and impact focus' + startOffset: 2763 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2763 + endOffset: 2950 +- name: 'Data-First Approach: data lake concept, BI vs. ML vs. deep learning split' + startOffset: 2950 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2950 + endOffset: 3132 +- name: 'Productionization Needs: retraining, feedback loops, and MLOps automation' + startOffset: 3132 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3132 + endOffset: 3214 +- name: 'MLOps Best Practices: standardization, CI/CD, governance, and reproducibility' + startOffset: 3214 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3214 + endOffset: 3335 +- name: 'MLOps Hype vs. Reality: buzzword caution and consultancy pitfalls' + startOffset: 3335 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3335 + endOffset: 3531 +- name: 'Platform Selection & Longevity: vendor lock-in, long-term planning, and team + maturity' + startOffset: 3531 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3531 + endOffset: 3667 +- name: 'How to Reach Alexander: LinkedIn, Twitter, and PyData events' + startOffset: 3667 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3667 + endOffset: 3754 +- name: Episode Wrap-Up and Upcoming Conferences + startOffset: 3754 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3754 + endOffset: 3685 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Alexander Hendorf — Königsweg partner & PyData chair' @@ -1029,113 +1124,6 @@ transcript: sec: 3807 time: '1:03:27' who: Alexander -description: Discover data-first Enterprise AI strategies and MLOps best practices—learn - realistic experiments, CI/CD, governance, and align ML to business impact. -intro: 'How do you move from proof-of-concept to scaled enterprise AI without over-investing - in hype? In this episode, Alexander Hendorf — head of data and AI at KÖNIGSWEG, PyData - chair and Python Software Foundation/EuroPython fellow — walks through pragmatic, - data-first strategies for scaling AI across organizations.

We cover how - to align AI initiatives with company goals, run realistic experiments (and why transparent - evaluation matters), and set expectations about AI’s limits (the “Beethoven” example). - Alexander breaks down a data-first architecture — data lake, BI vs. ML vs. deep - learning splits — and explains productionization needs like retraining, feedback - loops, and MLOps automation. He shares MLOps best practices: standardization, CI/CD, - governance, reproducibility, and warnings about vendor lock-in and consultancy pitfalls. - You’ll also hear advice on prioritization over perfection, timing innovation, and - choosing platforms that fit long-term team maturity.

Listen for actionable - guidance on experiment design, model evaluation, and building repeatable pipelines - so you can scale enterprise AI responsibly and sustainably. Ideal for data leaders, - ML engineers, and product owners implementing production ML and MLOps.' -dateadded: '2022-08-19' -duration: PT01H01M25S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=0 - endOffset: 122 -- name: 'Guest Overview: Alexander Hendorf — Königsweg partner & PyData chair' - startOffset: 122 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=122 - endOffset: 199 -- name: 'Career Path: from law and DJing to programming and machine learning' - startOffset: 199 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=199 - endOffset: 307 -- name: 'Partner Role: team leadership, strategy, and client selection' - startOffset: 307 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=307 - endOffset: 576 -- name: 'Community Engagement: PyData, cross-domain learning, and meetups' - startOffset: 576 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=576 - endOffset: 693 -- name: 'Conference Organizing: becoming chair, scaling events, and organizer summit' - startOffset: 693 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=693 - endOffset: 991 -- name: 'Public Speaking: generating talk ideas and learning through presentations' - startOffset: 991 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=991 - endOffset: 1256 -- name: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit"' - startOffset: 1256 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1256 - endOffset: 1471 -- name: 'Communicating AI to Business: simplification, open source, and stakeholder - buy-in' - startOffset: 1471 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1471 - endOffset: 1878 -- name: 'Enterprise AI Strategy: aligning initiatives, experiments, and company goals' - startOffset: 1878 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1878 - endOffset: 2210 -- name: 'Experimentation Reality: evaluation, transparency, and avoiding hype-driven - bets' - startOffset: 2210 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2210 - endOffset: 2242 -- name: 'AI Limitations Illustrated: realistic expectations (Beethoven example)' - startOffset: 2242 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2242 - endOffset: 2568 -- name: 'Innovation Patience: retrospectives, avoiding over-engineering, and timing' - startOffset: 2568 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2568 - endOffset: 2763 -- name: 'Prioritization Over Perfection: "good enough" engineering and impact focus' - startOffset: 2763 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2763 - endOffset: 2950 -- name: 'Data-First Approach: data lake concept, BI vs. ML vs. deep learning split' - startOffset: 2950 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2950 - endOffset: 3132 -- name: 'Productionization Needs: retraining, feedback loops, and MLOps automation' - startOffset: 3132 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3132 - endOffset: 3214 -- name: 'MLOps Best Practices: standardization, CI/CD, governance, and reproducibility' - startOffset: 3214 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3214 - endOffset: 3335 -- name: 'MLOps Hype vs. Reality: buzzword caution and consultancy pitfalls' - startOffset: 3335 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3335 - endOffset: 3531 -- name: 'Platform Selection & Longevity: vendor lock-in, long-term planning, and team - maturity' - startOffset: 3531 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3531 - endOffset: 3667 -- name: 'How to Reach Alexander: LinkedIn, Twitter, and PyData events' - startOffset: 3667 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3667 - endOffset: 3754 -- name: Episode Wrap-Up and Upcoming Conferences - startOffset: 3754 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3754 - endOffset: 3685 --- Links: diff --git a/_podcast/s13e05-se4ml-software-engineering-for-machine-learning.md b/_podcast/software-engineering-for-machine-learning.md similarity index 97% rename from _podcast/s13e05-se4ml-software-engineering-for-machine-learning.md rename to _podcast/software-engineering-for-machine-learning.md index 046cc645..5675d637 100644 --- a/_podcast/s13e05-se4ml-software-engineering-for-machine-learning.md +++ b/_podcast/software-engineering-for-machine-learning.md @@ -1,20 +1,130 @@ --- +title: 'Software Engineering for ML: Prevent Hidden Technical Debt with MLOps, Documentation & Team Alignment' +short: Software Engineering for ML +season: 13 episode: 5 guests: - nadianahar +image: images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/SE4ML---Software-Engineering-for-Machine-Learning---Nadia-Nahar-e20svmn youtube: 35Ch8xL2SA8 -image: images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/SE4ML---Software-Engineering-for-Machine-Learning---Nadia-Nahar-e20svmn apple: https://podcasts.apple.com/us/podcast/se4ml-software-engineering-for-machine-learning-nadia/id1541710331?i=1000605782433 spotify: https://open.spotify.com/episode/6ElyurOyGfRiCwLGUWOG7f?si=6k0i3XNUSPWd31vsZv4pfA youtube: https://www.youtube.com/watch?v=35Ch8xL2SA8 -season: 13 -short: SE4ML - Software Engineering for Machine Learning -title: 'Prevent Hidden Technical Debt in ML Systems: Software Engineering, MLOps, - Docs & Responsible AI' + +description: Learn how to prevent hidden technical debt in ML systems with MLOps, documentation and responsible AI— improve reliability, tests, and team alignment +intro: How do teams prevent hidden technical debt in ML systems before it derails production? In this episode, Nadia Nahar, a PhD student in Software Engineering at Carnegie Mellon University, walks through the software-engineering challenges unique to machine learning and practical steps to reduce long-term costs.

We cover defining software engineering for ML systems, differences from traditional software (uncertainty, data workflows, monitoring), and the “hidden technical debt” scope. Nadia describes an artifact analysis of open-source ML products (~300 repos), common failure modes (discontinuation, unmet requirements, poor data, deployment gaps), and research methods combining manual review with commit/code scripts. Key topics include requirements alignment, team structures and integration patterns, CRISP-DM vs Agile mismatches, MLOps and engineering support, plus documentation practices (Model Cards, Datasheets, factsheets, checklists). We also discuss responsible AI use cases—explainability needs in healthcare and education, including a classroom game predicting smoking risk—and governance approaches for product-centric fairness.

Listen to learn concrete remedies—workshops, shared vocabularies, documentation standards, and how to involve ML practitioners from requirements through testing—to prevent hidden technical debt in ML systems +topics: +- software engineering +- machine learning +- MLOps +dateadded: 2023-03-27 + +duration: PT00H58M25S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=0 + endOffset: 96 +- name: 'Guest Background: Nadia Nahar (PhD, software engineering)' + startOffset: 96 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=96 + endOffset: 254 +- name: Academia–Industry Collaboration in Software Engineering + startOffset: 254 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=254 + endOffset: 418 +- name: Defining Software Engineering for Machine Learning Systems + startOffset: 418 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=418 + endOffset: 462 +- name: 'ML vs Traditional Software: uncertainty, data workflows, monitoring' + startOffset: 462 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=462 + endOffset: 612 +- name: 'System-Centric Perspective: "Hidden Technical Debt" and scope' + startOffset: 612 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=612 + endOffset: 654 +- name: 'Industry Pain Points: requirements, unrealistic expectations, data access' + startOffset: 654 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=654 + endOffset: 832 +- name: 'Communication & Alignment: vocabulary, expectation setting, documentation' + startOffset: 832 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=832 + endOffset: 917 +- name: 'Artifact Analysis: building an open-source ML product dataset' + startOffset: 917 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=917 + endOffset: 1145 +- name: 'Open-Source ML Products: dataset size (~300 repos) and availability issues' + startOffset: 1145 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1145 + endOffset: 1314 +- name: 'Product Criteria: distinguishing ML products from models and APIs' + startOffset: 1314 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1314 + endOffset: 1443 +- name: 'Dataset Research Questions: development order, collaboration, testing, ops, + responsible AI' + startOffset: 1443 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1443 + endOffset: 1562 +- name: 'Analysis Approach: manual review augmented by scripts (commits & code)' + startOffset: 1562 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1562 + endOffset: 1782 +- name: 'Failure Modes: discontinuation, unmet requirements, poor data, deployment + gaps' + startOffset: 1782 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1782 + endOffset: 2062 +- name: 'Process Gap: CRISP-DM, Agile mismatch, and the need for integrated ML+SW + processes' + startOffset: 2062 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2062 + endOffset: 2188 +- name: 'Team Structures & Integration Patterns: siloing, APIs, all-in-one teams, + ML engineers' + startOffset: 2188 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2188 + endOffset: 2345 +- name: 'Practical Remedies: workshops, shared vocabularies, documentation, engineering + support (MLOps)' + startOffset: 2345 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2345 + endOffset: 2567 +- name: 'Documentation Practices: Model Cards, Datasheets, factsheets, and checklists' + startOffset: 2567 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2567 + endOffset: 2836 +- name: 'Responsible AI Research: explainability requirements in healthcare and education' + startOffset: 2836 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2836 + endOffset: 3003 +- name: 'Explainability Use Case: classroom game predicting smoking risk and stakeholder + needs' + startOffset: 3003 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3003 + endOffset: 3256 +- name: 'Responsible AI Governance: product-centric fairness and team accountability' + startOffset: 3256 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3256 + endOffset: 3415 +- name: 'Agile Integration: involving ML practitioners from requirements through testing' + startOffset: 3415 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3415 + endOffset: 3601 +- name: Closing Remarks & Resources + startOffset: 3601 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3601 + endOffset: 3505 + transcript: - header: Podcast Introduction - header: 'Guest Background: Nadia Nahar (PhD, software engineering)' @@ -1092,126 +1202,6 @@ transcript: sec: 3601 time: '1:00:01' who: Nadia -description: Learn how to prevent hidden technical debt in ML systems with MLOps, - documentation and responsible AI— improve reliability, tests, and team alignment. -intro: How do teams prevent hidden technical debt in ML systems before it derails - production? In this episode, Nadia Nahar, a PhD student in Software Engineering at - Carnegie Mellon University, walks through the software-engineering challenges unique - to machine learning and practical steps to reduce long-term costs.

We cover - defining software engineering for ML systems, differences from traditional software - (uncertainty, data workflows, monitoring), and the “hidden technical debt” scope. - Nadia describes an artifact analysis of open-source ML products (~300 repos), common - failure modes (discontinuation, unmet requirements, poor data, deployment gaps), - and research methods combining manual review with commit/code scripts. Key topics - include requirements alignment, team structures and integration patterns, CRISP-DM - vs Agile mismatches, MLOps and engineering support, plus documentation practices - (Model Cards, Datasheets, factsheets, checklists). We also discuss responsible AI - use cases—explainability needs in healthcare and education, including a classroom - game predicting smoking risk—and governance approaches for product-centric fairness. -

Listen to learn concrete remedies—workshops, shared vocabularies, documentation - standards, and how to involve ML practitioners from requirements through testing—to - prevent hidden technical debt in ML systems. -dateadded: '2023-03-27' -duration: PT00H58M25S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=0 - endOffset: 96 -- name: 'Guest Background: Nadia Nahar (PhD, software engineering)' - startOffset: 96 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=96 - endOffset: 254 -- name: Academia–Industry Collaboration in Software Engineering - startOffset: 254 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=254 - endOffset: 418 -- name: Defining Software Engineering for Machine Learning Systems - startOffset: 418 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=418 - endOffset: 462 -- name: 'ML vs Traditional Software: uncertainty, data workflows, monitoring' - startOffset: 462 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=462 - endOffset: 612 -- name: 'System-Centric Perspective: "Hidden Technical Debt" and scope' - startOffset: 612 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=612 - endOffset: 654 -- name: 'Industry Pain Points: requirements, unrealistic expectations, data access' - startOffset: 654 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=654 - endOffset: 832 -- name: 'Communication & Alignment: vocabulary, expectation setting, documentation' - startOffset: 832 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=832 - endOffset: 917 -- name: 'Artifact Analysis: building an open-source ML product dataset' - startOffset: 917 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=917 - endOffset: 1145 -- name: 'Open-Source ML Products: dataset size (~300 repos) and availability issues' - startOffset: 1145 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1145 - endOffset: 1314 -- name: 'Product Criteria: distinguishing ML products from models and APIs' - startOffset: 1314 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1314 - endOffset: 1443 -- name: 'Dataset Research Questions: development order, collaboration, testing, ops, - responsible AI' - startOffset: 1443 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1443 - endOffset: 1562 -- name: 'Analysis Approach: manual review augmented by scripts (commits & code)' - startOffset: 1562 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1562 - endOffset: 1782 -- name: 'Failure Modes: discontinuation, unmet requirements, poor data, deployment - gaps' - startOffset: 1782 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1782 - endOffset: 2062 -- name: 'Process Gap: CRISP-DM, Agile mismatch, and the need for integrated ML+SW - processes' - startOffset: 2062 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2062 - endOffset: 2188 -- name: 'Team Structures & Integration Patterns: siloing, APIs, all-in-one teams, - ML engineers' - startOffset: 2188 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2188 - endOffset: 2345 -- name: 'Practical Remedies: workshops, shared vocabularies, documentation, engineering - support (MLOps)' - startOffset: 2345 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2345 - endOffset: 2567 -- name: 'Documentation Practices: Model Cards, Datasheets, factsheets, and checklists' - startOffset: 2567 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2567 - endOffset: 2836 -- name: 'Responsible AI Research: explainability requirements in healthcare and education' - startOffset: 2836 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2836 - endOffset: 3003 -- name: 'Explainability Use Case: classroom game predicting smoking risk and stakeholder - needs' - startOffset: 3003 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3003 - endOffset: 3256 -- name: 'Responsible AI Governance: product-centric fairness and team accountability' - startOffset: 3256 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3256 - endOffset: 3415 -- name: 'Agile Integration: involving ML practitioners from requirements through testing' - startOffset: 3415 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3415 - endOffset: 3601 -- name: Closing Remarks & Resources - startOffset: 3601 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3601 - endOffset: 3505 --- Links: diff --git a/_podcast/s05e04-introducing-data-science-in-startups.md b/_podcast/solo-data-scientist.md similarity index 97% rename from _podcast/s05e04-introducing-data-science-in-startups.md rename to _podcast/solo-data-scientist.md index c92d1e3e..320762d4 100644 --- a/_podcast/s05e04-introducing-data-science-in-startups.md +++ b/_podcast/solo-data-scientist.md @@ -1,11 +1,11 @@ --- title: 'Solo Data Scientist Playbook: 90-Day Roadmap, Pipelines, A/B Tests & Prioritization' short: Introducing Data Science in Startups +season: 5 +episode: 4 guests: - mariannadiachuk image: images/podcast/s05e04-introducing-data-science-in-startups.jpg -season: 5 -episode: 4 ids: youtube: KMSE9GkU2mE anchor: Introducing-Data-Science-in-Startups---Marianna-Diachuk-e17rc4i @@ -14,6 +14,130 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Introducing-Data-Science-in-Startups---Marianna-Diachuk-e17rc4i spotify: https://open.spotify.com/episode/0kGFYX12RgkmZC2lMml6S4 apple: https://podcasts.apple.com/us/podcast/introducing-data-science-in-startups-marianna-diachuk/id1541710331?i=1000536525162 + +description: 'Master the solo data scientist 90-day roadmap: prioritize projects, run A/B tests, align stakeholders and deploy models for fast business impact.' +intro: 'How can a solo data scientist deliver measurable impact in the first 90 days? In this episode, Marianna Diachuk — data scientist at Restream, former DataRobot engineer and fintech team lead, and Data Science Lead/mentor with Women Who Code — walks through a practical Solo Data Scientist playbook. You''ll hear a clear 90-day roadmap covering first-week stakeholder interviews and data exploration, first-month research and proofs-of-concept, and first-quarter priorities: building data pipelines, deployment, methodology, and A/B testing. Marianna breaks down company prerequisites (pipelines, engineers, analytics), the experience needed for end-to-end projects, and how to translate business problems into data science work through proactive outreach and prioritization by feasibility, impact, and stakeholder alignment. Topics include churn workflows, reuse and automation to speed iterations, metrics and KPIs for solution selection, experiment design and safe rollouts, plus communicating results through reports and tech talks. Listen for actionable guidance on transitioning from engineering, when to stop projects, hiring signals, an interview readiness checklist, and resources to learn faster and educate your organization.' +topics: +- data science +- startups +- career transition +- software engineering +- communication +- career growth +dateadded: 2021-09-25 + +duration: PT00H58M04S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=0 + endOffset: 2 +- name: Guest Background & Career Path in Data Science + startOffset: 2 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2 + endOffset: 222 +- name: 'Solo Data Scientist: Freedom, Influence & Responsibility' + startOffset: 222 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=222 + endOffset: 493 +- name: 'Company Prerequisites: Data Pipelines, Engineers & Analytics' + startOffset: 493 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=493 + endOffset: 653 +- name: 'Experience Required: Mid‑Senior, End‑to‑End Project Skills' + startOffset: 653 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=653 + endOffset: 753 +- name: 'Problem Discovery: Translating Business Needs to Data Science' + startOffset: 753 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=753 + endOffset: 865 +- name: Proactive Outreach & Building a Data Science Roadmap + startOffset: 865 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=865 + endOffset: 961 +- name: 'Prioritization: Feasibility, Impact & Stakeholder Alignment' + startOffset: 961 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=961 + endOffset: 1267 +- name: 'First Week: Stakeholder Interviews and Data Exploration' + startOffset: 1267 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1267 + endOffset: 1345 +- name: 'First Month: Early Research, Insights or Proof‑of‑Concept' + startOffset: 1345 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1345 + endOffset: 1447 +- name: 'First Quarter: Pipelines, Methodology, Deployment & A/B Testing' + startOffset: 1447 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1447 + endOffset: 1540 +- name: 'Managing Expectations: Data Science as Iterative Inquiry' + startOffset: 1540 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1540 + endOffset: 1687 +- name: 'Start Small: Exploratory Analysis, Dashboards vs. Machine Learning' + startOffset: 1687 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1687 + endOffset: 1811 +- name: 'Churn Workflows: Analysis to Model to Marketing Collaboration' + startOffset: 1811 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1811 + endOffset: 1974 +- name: 'Project Timelines: Reuse, Automation & Faster Iterations' + startOffset: 1974 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1974 + endOffset: 2063 +- name: 'Solution Selection: Define Metrics and Measure Outcomes' + startOffset: 2063 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2063 + endOffset: 2149 +- name: 'Evaluating Performance: KPIs, Experiments & Delivering Insights' + startOffset: 2149 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2149 + endOffset: 2365 +- name: 'When You Get Stuck: Networks, Communities & Learning Resources' + startOffset: 2365 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2365 + endOffset: 2459 +- name: 'Communicating Results: Reports, Visualizations & Tech Talks' + startOffset: 2459 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2459 + endOffset: 2636 +- name: 'Transitioning from Engineering: Mindset, Deployment & Monitoring' + startOffset: 2636 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2636 + endOffset: 2747 +- name: 'Scaling the Team: Signals to Hire More Data Scientists' + startOffset: 2747 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2747 + endOffset: 2882 +- name: 'Stopping Projects: Prioritize, Cut Losses & Reallocate Effort' + startOffset: 2882 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2882 + endOffset: 3017 +- name: 'Interview Checklist: Questions to Assess Company Readiness' + startOffset: 3017 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3017 + endOffset: 3255 +- name: 'Assessing Readiness: Pipelines, Analytics Dept. & Expectations' + startOffset: 3255 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3255 + endOffset: 3318 +- name: 'Research to Production: Silent Mode, A/B Tests & Safe Rollout' + startOffset: 3318 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3318 + endOffset: 3435 +- name: 'Closing Advice: Learn Fast and Educate Your Organization' + startOffset: 3435 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3435 + endOffset: 3472 +- name: Contact Info & Episode Wrap-up + startOffset: 3472 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3472 + endOffset: 3484 + transcript: - header: Podcast Introduction - header: Guest Background & Career Path in Data Science @@ -946,134 +1070,6 @@ transcript: sec: 3486 time: '58:06' who: Alexey -description: 'Master the solo data scientist 90-day roadmap: prioritize projects, - run A/B tests, align stakeholders and deploy models for fast business impact.' -intro: 'How can a solo data scientist deliver measurable impact in the first 90 days? - In this episode, Marianna Diachuk — data scientist at Restream, former DataRobot - engineer and fintech team lead, and Data Science Lead/mentor with Women Who Code - — walks through a practical Solo Data Scientist playbook. You''ll hear a clear 90-day - roadmap covering first-week stakeholder interviews and data exploration, first-month - research and proofs-of-concept, and first-quarter priorities: building data pipelines, - deployment, methodology, and A/B testing. Marianna breaks down company prerequisites - (pipelines, engineers, analytics), the experience needed for end-to-end projects, - and how to translate business problems into data science work through proactive - outreach and prioritization by feasibility, impact, and stakeholder alignment. Topics - include churn workflows, reuse and automation to speed iterations, metrics and KPIs - for solution selection, experiment design and safe rollouts, plus communicating - results through reports and tech talks. Listen for actionable guidance on transitioning - from engineering, when to stop projects, hiring signals, an interview readiness - checklist, and resources to learn faster and educate your organization.' -dateadded: '2021-09-25' -duration: PT00H58M04S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=0 - endOffset: 2 -- name: Guest Background & Career Path in Data Science - startOffset: 2 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2 - endOffset: 222 -- name: 'Solo Data Scientist: Freedom, Influence & Responsibility' - startOffset: 222 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=222 - endOffset: 493 -- name: 'Company Prerequisites: Data Pipelines, Engineers & Analytics' - startOffset: 493 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=493 - endOffset: 653 -- name: 'Experience Required: Mid‑Senior, End‑to‑End Project Skills' - startOffset: 653 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=653 - endOffset: 753 -- name: 'Problem Discovery: Translating Business Needs to Data Science' - startOffset: 753 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=753 - endOffset: 865 -- name: Proactive Outreach & Building a Data Science Roadmap - startOffset: 865 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=865 - endOffset: 961 -- name: 'Prioritization: Feasibility, Impact & Stakeholder Alignment' - startOffset: 961 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=961 - endOffset: 1267 -- name: 'First Week: Stakeholder Interviews and Data Exploration' - startOffset: 1267 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1267 - endOffset: 1345 -- name: 'First Month: Early Research, Insights or Proof‑of‑Concept' - startOffset: 1345 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1345 - endOffset: 1447 -- name: 'First Quarter: Pipelines, Methodology, Deployment & A/B Testing' - startOffset: 1447 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1447 - endOffset: 1540 -- name: 'Managing Expectations: Data Science as Iterative Inquiry' - startOffset: 1540 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1540 - endOffset: 1687 -- name: 'Start Small: Exploratory Analysis, Dashboards vs. Machine Learning' - startOffset: 1687 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1687 - endOffset: 1811 -- name: 'Churn Workflows: Analysis to Model to Marketing Collaboration' - startOffset: 1811 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1811 - endOffset: 1974 -- name: 'Project Timelines: Reuse, Automation & Faster Iterations' - startOffset: 1974 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1974 - endOffset: 2063 -- name: 'Solution Selection: Define Metrics and Measure Outcomes' - startOffset: 2063 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2063 - endOffset: 2149 -- name: 'Evaluating Performance: KPIs, Experiments & Delivering Insights' - startOffset: 2149 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2149 - endOffset: 2365 -- name: 'When You Get Stuck: Networks, Communities & Learning Resources' - startOffset: 2365 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2365 - endOffset: 2459 -- name: 'Communicating Results: Reports, Visualizations & Tech Talks' - startOffset: 2459 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2459 - endOffset: 2636 -- name: 'Transitioning from Engineering: Mindset, Deployment & Monitoring' - startOffset: 2636 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2636 - endOffset: 2747 -- name: 'Scaling the Team: Signals to Hire More Data Scientists' - startOffset: 2747 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2747 - endOffset: 2882 -- name: 'Stopping Projects: Prioritize, Cut Losses & Reallocate Effort' - startOffset: 2882 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2882 - endOffset: 3017 -- name: 'Interview Checklist: Questions to Assess Company Readiness' - startOffset: 3017 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3017 - endOffset: 3255 -- name: 'Assessing Readiness: Pipelines, Analytics Dept. & Expectations' - startOffset: 3255 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3255 - endOffset: 3318 -- name: 'Research to Production: Silent Mode, A/B Tests & Safe Rollout' - startOffset: 3318 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3318 - endOffset: 3435 -- name: 'Closing Advice: Learn Fast and Educate Your Organization' - startOffset: 3435 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3435 - endOffset: 3472 -- name: Contact Info & Episode Wrap-up - startOffset: 3472 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3472 - endOffset: 3484 --- Links: diff --git a/_podcast/s06e01-solopreneur.md b/_podcast/solopreneurship-for-developers-and-data-professionals.md similarity index 98% rename from _podcast/s06e01-solopreneur.md rename to _podcast/solopreneurship-for-developers-and-data-professionals.md index c3a8006f..869089d5 100644 --- a/_podcast/s06e01-solopreneur.md +++ b/_podcast/solopreneurship-for-developers-and-data-professionals.md @@ -1,14 +1,11 @@ --- title: 'Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs' short: 'Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs' +season: 6 +episode: 1 guests: - noahgift -description: Discover solopreneur tactics to build a side-gig tunnel, diversify income - mix with courses, teaching and consulting, and quit corporate on your terms. image: images/podcast/s06e01-solopreneur.jpg -date: 2025-11-07 -season: 6 -episode: 1 ids: youtube: gCLUY37HGtw anchor: Becoming-a-Solopreneur-in-Data---Noah-Gift-e19gqbr @@ -17,6 +14,118 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Solopreneur-in-Data---Noah-Gift-e19gqbr spotify: https://open.spotify.com/episode/264kr8rkSV71NwlU3kphHm apple: https://podcasts.apple.com/us/podcast/becoming-a-solopreneur-in-data-noah-gift/id1541710331?i=1000540908616 + +description: Discover solopreneur tactics to build a side-gig tunnel, diversify income mix with courses, teaching and consulting, and quit corporate on your terms +intro: How do you build a sustainable solopreneur business that doesn't rely on VC funding—while diversifying income across courses, consulting, books, and side-gigs? In this episode, Noah Gift, founder of Pragmatic AI Labs and a lecturer on machine learning and data science at Northwestern, Duke MIDS, UC Berkeley, UC Davis, and UNC Charlotte, walks through his transition to solo work (since 2017) and a repeatable income mix for intentional small-business ownership.

We cover defining solopreneurship, the practical income mix formula (online courses, university teaching, selective consulting, book publishing, apps, real estate, and investments) plus how to build a side-gig tunnel while employed. Noah shares work allocation strategies (exponential projects vs. consulting), publishing trade-offs, a book workflow (outline -> projects -> write), daily routines, time-and-cost tactics, and signals for financial readiness to quit full-time work.

If you're planning to diversify income streams with online courses, consulting, or writing, this episode gives actionable steps, publishing considerations, and networking advice to help you transition deliberately and scale revenue without sacrificing control +topics: +- solopreneurship +- entrepreneurship +- career growth +- career transition +- consulting +dateadded: 2021-11-06 +date: 2025-11-07 + +duration: PT00H58M56S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=0 + endOffset: 74 +- name: 'Episode Topic & Guest Overview: Becoming a Solopreneur with Noah Gift' + startOffset: 74 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=74 + endOffset: 157 +- name: 'Early Career: TV, Caltech, Disney and Film Pipeline Experience' + startOffset: 157 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=157 + endOffset: 376 +- name: 'Transition to Independent Work: Solopreneur Since 2017' + startOffset: 376 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=376 + endOffset: 402 +- name: 'Defining Solopreneurship: Intentional Smallness and Revenue Diversification' + startOffset: 402 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=402 + endOffset: 585 +- name: 'Small-Business Philosophy: Alternatives to Venture-Backed Growth' + startOffset: 585 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=585 + endOffset: 821 +- name: 'Daily Routine: Exercise, Prioritization, and Task Queues' + startOffset: 821 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=821 + endOffset: 987 +- name: 'Work Allocation Strategy: Exponential Projects vs. Consulting' + startOffset: 987 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=987 + endOffset: 1301 +- name: 'Long-Term Escape Plan: Incremental Transition Out of Corporate Roles' + startOffset: 1301 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1301 + endOffset: 1505 +- name: 'Income Mix Formula: Courses, University Teaching, and Select Consulting' + startOffset: 1505 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1505 + endOffset: 1672 +- name: 'Goal Criteria: Scale, Ethics, and Asynchronous Work Preferences' + startOffset: 1672 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1672 + endOffset: 1860 +- name: 'Maintaining Motivation: Deadlines, Enjoyment, and Accepting Imperfection' + startOffset: 1860 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1860 + endOffset: 1991 +- name: 'Pressure Management: Demand as a Signal of Success' + startOffset: 1991 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1991 + endOffset: 2144 +- name: 'Publishing Options: Tradeoffs Between Traditional and Self-Publishing' + startOffset: 2144 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2144 + endOffset: 2288 +- name: 'Book Workflow: Outlines First, Build Projects, Then Write' + startOffset: 2288 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2288 + endOffset: 2494 +- name: 'Writing Discipline: Treating Book Projects Like Marathons' + startOffset: 2494 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2494 + endOffset: 2576 +- name: 'Distributed Income: Combining Books, Apps, Real Estate, and Investments' + startOffset: 2576 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2576 + endOffset: 2787 +- name: 'Side-Gig Strategy: Building the “Tunnel” While Employed' + startOffset: 2787 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2787 + endOffset: 2952 +- name: 'Time & Cost Tactics: Reduce Commute, Lower Expenses, and Save Cash' + startOffset: 2952 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2952 + endOffset: 3229 +- name: 'Financial Readiness: When to Quit Full-Time Employment' + startOffset: 3229 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3229 + endOffset: 3306 +- name: 'Networking for Independence: Deep Skill, Visibility, and Avoiding Management' + startOffset: 3306 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3306 + endOffset: 3504 +- name: 'University Teaching Path: Leverage Expertise, Professors, and Written Work' + startOffset: 3504 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3504 + endOffset: 3600 +- name: 'Contact & Resources: NoahGift.com and LinkedIn' + startOffset: 3600 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3600 + endOffset: 3610 +- name: 'Closing Remarks: Final Advice — Be Excellent and Bet on Yourself' + startOffset: 3610 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3610 + endOffset: 3536 + transcript: - header: Podcast Introduction - header: 'Episode Topic & Guest Overview: Becoming a Solopreneur with Noah Gift' @@ -1692,118 +1801,4 @@ transcript: sec: 3610 time: '1:00:10' who: Alexey -intro: How do you build a sustainable solopreneur business that doesn't rely on VC - funding—while diversifying income across courses, consulting, books, and side-gigs? - In this episode, Noah Gift, founder of Pragmatic AI Labs and a lecturer on machine - learning and data science at Northwestern, Duke MIDS, UC Berkeley, UC Davis, and - UNC Charlotte, walks through his transition to solo work (since 2017) and a repeatable - income mix for intentional small-business ownership.

We cover defining - solopreneurship, the practical income mix formula (online courses, university teaching, - selective consulting, book publishing, apps, real estate, and investments) plus - how to build a side-gig tunnel while employed. Noah shares work allocation strategies - (exponential projects vs. consulting), publishing trade-offs, a book workflow (outline - -> projects -> write), daily routines, time-and-cost tactics, and signals for financial - readiness to quit full-time work.

If you're planning to diversify income - streams with online courses, consulting, or writing, this episode gives actionable - steps, publishing considerations, and networking advice to help you transition deliberately - and scale revenue without sacrificing control. -dateadded: '2021-11-06' -duration: PT00H58M56S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=0 - endOffset: 74 -- name: 'Episode Topic & Guest Overview: Becoming a Solopreneur with Noah Gift' - startOffset: 74 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=74 - endOffset: 157 -- name: 'Early Career: TV, Caltech, Disney and Film Pipeline Experience' - startOffset: 157 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=157 - endOffset: 376 -- name: 'Transition to Independent Work: Solopreneur Since 2017' - startOffset: 376 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=376 - endOffset: 402 -- name: 'Defining Solopreneurship: Intentional Smallness and Revenue Diversification' - startOffset: 402 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=402 - endOffset: 585 -- name: 'Small-Business Philosophy: Alternatives to Venture-Backed Growth' - startOffset: 585 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=585 - endOffset: 821 -- name: 'Daily Routine: Exercise, Prioritization, and Task Queues' - startOffset: 821 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=821 - endOffset: 987 -- name: 'Work Allocation Strategy: Exponential Projects vs. Consulting' - startOffset: 987 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=987 - endOffset: 1301 -- name: 'Long-Term Escape Plan: Incremental Transition Out of Corporate Roles' - startOffset: 1301 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1301 - endOffset: 1505 -- name: 'Income Mix Formula: Courses, University Teaching, and Select Consulting' - startOffset: 1505 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1505 - endOffset: 1672 -- name: 'Goal Criteria: Scale, Ethics, and Asynchronous Work Preferences' - startOffset: 1672 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1672 - endOffset: 1860 -- name: 'Maintaining Motivation: Deadlines, Enjoyment, and Accepting Imperfection' - startOffset: 1860 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1860 - endOffset: 1991 -- name: 'Pressure Management: Demand as a Signal of Success' - startOffset: 1991 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1991 - endOffset: 2144 -- name: 'Publishing Options: Tradeoffs Between Traditional and Self-Publishing' - startOffset: 2144 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2144 - endOffset: 2288 -- name: 'Book Workflow: Outlines First, Build Projects, Then Write' - startOffset: 2288 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2288 - endOffset: 2494 -- name: 'Writing Discipline: Treating Book Projects Like Marathons' - startOffset: 2494 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2494 - endOffset: 2576 -- name: 'Distributed Income: Combining Books, Apps, Real Estate, and Investments' - startOffset: 2576 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2576 - endOffset: 2787 -- name: 'Side-Gig Strategy: Building the “Tunnel” While Employed' - startOffset: 2787 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2787 - endOffset: 2952 -- name: 'Time & Cost Tactics: Reduce Commute, Lower Expenses, and Save Cash' - startOffset: 2952 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2952 - endOffset: 3229 -- name: 'Financial Readiness: When to Quit Full-Time Employment' - startOffset: 3229 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3229 - endOffset: 3306 -- name: 'Networking for Independence: Deep Skill, Visibility, and Avoiding Management' - startOffset: 3306 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3306 - endOffset: 3504 -- name: 'University Teaching Path: Leverage Expertise, Professors, and Written Work' - startOffset: 3504 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3504 - endOffset: 3600 -- name: 'Contact & Resources: NoahGift.com and LinkedIn' - startOffset: 3600 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3600 - endOffset: 3610 -- name: 'Closing Remarks: Final Advice — Be Excellent and Bet on Yourself' - startOffset: 3610 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3610 - endOffset: 3536 --- diff --git a/_podcast/s11e09-teaching-and-mentoring-in-data-analytics.md b/_podcast/teaching-mentoring-data-analytics-fintech.md similarity index 97% rename from _podcast/s11e09-teaching-and-mentoring-in-data-analytics.md rename to _podcast/teaching-mentoring-data-analytics-fintech.md index 3ecf924e..3b66e941 100644 --- a/_podcast/s11e09-teaching-and-mentoring-in-data-analytics.md +++ b/_podcast/teaching-mentoring-data-analytics-fintech.md @@ -1,20 +1,144 @@ --- +title: 'Designing FinTech Data Analytics Curriculum: Fraud Detection, BigQuery Labs & Mentoring' +short: Teaching and Mentoring in Data Analytics +season: 11 episode: 9 guests: - irinabrudaru +image: images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg ids: anchor: Teaching-and-Mentoring-in-Data-Analytics---Irina-Brudaru-e1rihm1 youtube: saaRRzgHsmE -image: images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Teaching-and-Mentoring-in-Data-Analytics---Irina-Brudaru-e1rihm1 apple: https://podcasts.apple.com/us/podcast/teaching-and-mentoring-in-data-analytics-irina-brudaru/id1541710331?i=1000588551445 spotify: https://open.spotify.com/episode/0ES2N4yIu61bUB3dY9oxgQ?si=_KFHPXOUQVap8oSBp6AJgA youtube: https://www.youtube.com/watch?v=saaRRzgHsmE -season: 11 -short: Teaching and Mentoring in Data Analytics -title: 'Designing FinTech Data Analytics Curriculum: Fraud Detection, BigQuery Labs - & Mentoring' + +description: 'Discover FinTech data analytics curriculum: fraud detection, BigQuery labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career guidance.' +intro: 'How do you design a FinTech data analytics curriculum that teaches fraud detection, chargeback modeling, and real-world cloud skills while also mentoring diverse learners? In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google data leader, and long-time mentor and teacher — walks through building practical FinTech courses informed by industry experience across Berlin, Amsterdam and the Bay Area.

We cover curriculum components you can reuse: rule‑based vs neural approaches to fraud detection, chargeback modeling, deploying ML in production, and essential business skills for analysts. Irina explains hands‑on BigQuery labs, student cloud access strategies, and how to demystify Google Cloud for analysts. She shares mentoring methods (visual explanations, learner‑centered teaching), instructor sourcing and storytelling for classroom impact, cohort analysis for product metrics, recruiting women to zoomcamps, and securing technical reviewers.

Listen to gain actionable guidance on structuring FinTech analytics training, designing cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring practices that help career changers and underrepresented learners succeed in data analytics.' +topics: +- data analytics +- fintech +- mentoring +- teaching +- data science +dateadded: 2022-12-03 + +duration: PT01H46S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=0 + endOffset: 68 +- name: 'Guest Overview: Irina Brudaru — teacher, curriculum developer, mentor in + data analytics' + startOffset: 68 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=68 + endOffset: 133 +- name: 'Career Origins: early computing, Romania education, Max Planck research' + startOffset: 133 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=133 + endOffset: 221 +- name: 'Industry Transition: data consulting, BI, Google and product analytics experience' + startOffset: 221 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=221 + endOffset: 376 +- name: 'International Roles & Management: San Francisco, Netherlands, Berlin; leading + data teams' + startOffset: 376 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=376 + endOffset: 537 +- name: 'Early Mentoring Wins: mentoring family, interns, and career pivot stories' + startOffset: 537 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=537 + endOffset: 574 +- name: 'Mentoring Methods: visual explanations, learner-centered teaching techniques' + startOffset: 574 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=574 + endOffset: 597 +- name: 'Community Teaching: NGOs, bootcamps, and FrauenLoop volunteer work' + startOffset: 597 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=597 + endOffset: 798 +- name: 'Curriculum Design for FinTech: AI Guild program planning and certification + integration' + startOffset: 798 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=798 + endOffset: 896 +- name: 'Curriculum Components: fraud, chargeback, ML in production, and business + skills' + startOffset: 896 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=896 + endOffset: 1107 +- name: 'Instructor Sourcing & Storytelling: finding teachers and teaching data storytelling' + startOffset: 1107 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1107 + endOffset: 1334 +- name: 'Fraud Detection & Chargeback Modeling: rule-based vs neural approaches in + FinTech' + startOffset: 1334 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1334 + endOffset: 1543 +- name: 'Hands-on Cloud Teaching: BigQuery labs, student cloud access, demystifying + Google Cloud' + startOffset: 1543 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1543 + endOffset: 1734 +- name: 'Overcoming Cloud Reluctance: focusing on essential cloud skills for analysts' + startOffset: 1734 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1734 + endOffset: 1791 +- name: 'Managerial Scope: balancing analytics, data engineering, and technical credibility' + startOffset: 1791 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1791 + endOffset: 1910 +- name: 'Cohort Analysis Explained: retention metrics, product analytics visualization' + startOffset: 1910 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1910 + endOffset: 2134 +- name: 'Path to Formal Teaching: outreach, invitations, and joining teaching programs' + startOffset: 2134 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2134 + endOffset: 2329 +- name: 'Gender Diversity Research: plans to analyze company data for inclusion insights' + startOffset: 2329 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2329 + endOffset: 2476 +- name: 'Recruiting Women to Zoomcamps: targeted outreach, partnerships, and scheduling + considerations' + startOffset: 2476 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2476 + endOffset: 2724 +- name: 'Securing Technical Feedback: finding reviewers, advocating for code review + processes' + startOffset: 2724 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2724 + endOffset: 2979 +- name: 'Learning Antipatterns: ML hype, overengineering, and tool-centric approaches' + startOffset: 2979 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2979 + endOffset: 3286 +- name: 'Career Transition Advice: moving into data science from non-technical roles' + startOffset: 3286 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3286 + endOffset: 3488 +- name: 'Core Analyst Fundamentals: SQL, data visualization, soft skills, and product + tracking' + startOffset: 3488 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3488 + endOffset: 3632 +- name: 'Community Partnerships: collaborating with Women in Tech groups and volunteer + orgs' + startOffset: 3632 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3632 + endOffset: 3666 +- name: Episode Wrap-up and Contact Info + startOffset: 3666 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3666 + endOffset: 3646 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Irina Brudaru — teacher, curriculum developer, mentor in @@ -1445,138 +1569,6 @@ transcript: sec: 3714 time: '1:01:54' who: Irina -description: 'Discover FinTech data analytics curriculum: fraud detection, BigQuery - labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career - guidance.' -intro: 'How do you design a FinTech data analytics curriculum that teaches fraud detection, - chargeback modeling, and real-world cloud skills while also mentoring diverse learners? - In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google - data leader, and long-time mentor and teacher — walks through building practical - FinTech courses informed by industry experience across Berlin, Amsterdam and the - Bay Area.

We cover curriculum components you can reuse: rule‑based vs neural - approaches to fraud detection, chargeback modeling, deploying ML in production, - and essential business skills for analysts. Irina explains hands‑on BigQuery labs, - student cloud access strategies, and how to demystify Google Cloud for analysts. - She shares mentoring methods (visual explanations, learner‑centered teaching), instructor - sourcing and storytelling for classroom impact, cohort analysis for product metrics, - recruiting women to zoomcamps, and securing technical reviewers.

Listen - to gain actionable guidance on structuring FinTech analytics training, designing - cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring - practices that help career changers and underrepresented learners succeed in data - analytics.' -dateadded: '2022-12-03' -duration: PT01H46S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=0 - endOffset: 68 -- name: 'Guest Overview: Irina Brudaru — teacher, curriculum developer, mentor in - data analytics' - startOffset: 68 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=68 - endOffset: 133 -- name: 'Career Origins: early computing, Romania education, Max Planck research' - startOffset: 133 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=133 - endOffset: 221 -- name: 'Industry Transition: data consulting, BI, Google and product analytics experience' - startOffset: 221 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=221 - endOffset: 376 -- name: 'International Roles & Management: San Francisco, Netherlands, Berlin; leading - data teams' - startOffset: 376 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=376 - endOffset: 537 -- name: 'Early Mentoring Wins: mentoring family, interns, and career pivot stories' - startOffset: 537 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=537 - endOffset: 574 -- name: 'Mentoring Methods: visual explanations, learner-centered teaching techniques' - startOffset: 574 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=574 - endOffset: 597 -- name: 'Community Teaching: NGOs, bootcamps, and FrauenLoop volunteer work' - startOffset: 597 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=597 - endOffset: 798 -- name: 'Curriculum Design for FinTech: AI Guild program planning and certification - integration' - startOffset: 798 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=798 - endOffset: 896 -- name: 'Curriculum Components: fraud, chargeback, ML in production, and business - skills' - startOffset: 896 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=896 - endOffset: 1107 -- name: 'Instructor Sourcing & Storytelling: finding teachers and teaching data storytelling' - startOffset: 1107 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1107 - endOffset: 1334 -- name: 'Fraud Detection & Chargeback Modeling: rule-based vs neural approaches in - FinTech' - startOffset: 1334 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1334 - endOffset: 1543 -- name: 'Hands-on Cloud Teaching: BigQuery labs, student cloud access, demystifying - Google Cloud' - startOffset: 1543 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1543 - endOffset: 1734 -- name: 'Overcoming Cloud Reluctance: focusing on essential cloud skills for analysts' - startOffset: 1734 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1734 - endOffset: 1791 -- name: 'Managerial Scope: balancing analytics, data engineering, and technical credibility' - startOffset: 1791 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1791 - endOffset: 1910 -- name: 'Cohort Analysis Explained: retention metrics, product analytics visualization' - startOffset: 1910 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1910 - endOffset: 2134 -- name: 'Path to Formal Teaching: outreach, invitations, and joining teaching programs' - startOffset: 2134 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2134 - endOffset: 2329 -- name: 'Gender Diversity Research: plans to analyze company data for inclusion insights' - startOffset: 2329 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2329 - endOffset: 2476 -- name: 'Recruiting Women to Zoomcamps: targeted outreach, partnerships, and scheduling - considerations' - startOffset: 2476 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2476 - endOffset: 2724 -- name: 'Securing Technical Feedback: finding reviewers, advocating for code review - processes' - startOffset: 2724 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2724 - endOffset: 2979 -- name: 'Learning Antipatterns: ML hype, overengineering, and tool-centric approaches' - startOffset: 2979 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2979 - endOffset: 3286 -- name: 'Career Transition Advice: moving into data science from non-technical roles' - startOffset: 3286 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3286 - endOffset: 3488 -- name: 'Core Analyst Fundamentals: SQL, data visualization, soft skills, and product - tracking' - startOffset: 3488 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3488 - endOffset: 3632 -- name: 'Community Partnerships: collaborating with Women in Tech groups and volunteer - orgs' - startOffset: 3632 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3632 - endOffset: 3666 -- name: Episode Wrap-up and Contact Info - startOffset: 3666 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3666 - endOffset: 3646 --- Links: diff --git a/_podcast/s12e04-doing-software-engineering-in-academia.md b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md similarity index 97% rename from _podcast/s12e04-doing-software-engineering-in-academia.md rename to _podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md index 395287b2..b7b9722d 100644 --- a/_podcast/s12e04-doing-software-engineering-in-academia.md +++ b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md @@ -1,20 +1,153 @@ --- +title: 'Teaching Open Science & Reproducible Research: Research Software Engineering Practices for Academia' +short: Teaching Open Science & Reproducible Research +season: 12 episode: 4 guests: - johannabayer +image: images/podcast/s12e04-doing-software-engineering-in-academia.jpg ids: anchor: Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb youtube: K0PdQITQzVQ -image: images/podcast/s12e04-doing-software-engineering-in-academia.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb apple: https://podcasts.apple.com/us/podcast/doing-software-engineering-in-academia-johanna-bayer/id1541710331?i=1000594351759 spotify: https://open.spotify.com/episode/3ol91Xt0A6VBbPgFxGh5N6?si=QDcjMCJ7SOG6eJjjYbyEcg youtube: https://www.youtube.com/watch?v=K0PdQITQzVQ -season: 12 -short: Doing Software Engineering in Academia -title: 'Teach Reproducible Research: RSE Practices for Neuroimaging, Packaging, MLflow - & Data Sharing' + +description: 'Master reproducible research for neuroimaging: packaging, MLflow & data sharing to publish reproducible manuscripts, boost citations and career visibility.' +intro: 'How do you teach reproducible research and practical research software engineering (RSE) skills to neuroimaging students and researchers? In this episode, Johanna Bayer — a psychologist-turned-computational neuroscientist completing a PhD in machine learning for clinical neuroimaging at the University of Melbourne and an open science advocate — walks through concrete approaches for teaching reproducible research. We cover course design (Carpentries-style curricula, Git introductions, and reproducible manuscripts with embedded code), guided onboarding to open source (small repos, pull requests, cookiecutter templates), and core coding practices to teach: packaging, environments, formatting, testing, branching and versioning. Johanna also discusses experiment tracking with MLflow, treating software as a research output (DOIs and toolboxes), data sharing realities and sensitive-data practices, and strategies for culture change in labs via hackathons and grassroots efforts. Listeners will gain practical teaching tactics, tooling recommendations, and considerations for infrastructure and academic-industry tensions — plus pointers to resources like The Turing Way, The Carpentries, and the ML Solutions Handbook to help implement reproducible research and RSE practices in neuroimaging projects.' +topics: +- open science +- software engineering +- academia +- teaching +dateadded: 2023-01-14 + +duration: PT00H58M10S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=0 + endOffset: 68 +- name: 'Guest Background: Johanna Bayer — Psychology to Machine Learning in Neuroimaging' + startOffset: 68 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=68 + endOffset: 144 +- name: 'Academic Journey: Studies in Germany, Zurich and Move to Melbourne' + startOffset: 144 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=144 + endOffset: 327 +- name: 'Teaching Open Science: Intro to Git, Homework Support and Course Structure' + startOffset: 327 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=327 + endOffset: 459 +- name: Carpentries & Structured Beginner Curriculum for Reproducible Research + startOffset: 459 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=459 + endOffset: 510 +- name: 'Open Science Curriculum: Reproducible Manuscripts with Embedded Code' + startOffset: 510 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=510 + endOffset: 652 +- name: 'Guided Onboarding to Open Source: Small Repos, Pull Requests & Turing Book' + startOffset: 652 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=652 + endOffset: 730 +- name: 'What RSE Means: Software-Focused Research Outputs and Practices' + startOffset: 730 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=730 + endOffset: 850 +- name: 'Academic RSE Roles: PhD Students, Methods Papers and Toolboxes' + startOffset: 850 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=850 + endOffset: 996 +- name: 'Software as Research Output: DOIs, Toolboxes and Publishing Code' + startOffset: 996 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=996 + endOffset: 1030 +- name: 'Culture Change in Labs: Convincing Supervisors & Grassroots Hackathons' + startOffset: 1030 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1030 + endOffset: 1205 +- name: 'Industry Lessons for Academia: Programming Expectations & Tool Adoption' + startOffset: 1205 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1205 + endOffset: 1332 +- name: 'Experiment Tracking in Research: MLflow and Reproducibility Tools' + startOffset: 1332 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1332 + endOffset: 1336 +- name: 'Barriers to Teaching Software Skills: Time, Expertise and Fear of Scrutiny' + startOffset: 1336 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1336 + endOffset: 1434 +- name: 'Infrastructure Gaps: Hosting Interactive Reproducible Papers and Costs' + startOffset: 1434 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1434 + endOffset: 1658 +- name: 'Core Coding Practices to Teach: Packaging, Environments, Formatting & Tests' + startOffset: 1658 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1658 + endOffset: 1698 +- name: 'Learning by Doing: Brainhack, Hackathons, Community Contributions' + startOffset: 1698 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1698 + endOffset: 1844 +- name: 'Formal Courses vs Self-Learning: Structure, Discipline and Freelancing' + startOffset: 1844 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1844 + endOffset: 1984 +- name: 'Collaboration & Code Review: Working Alone vs Community Feedback' + startOffset: 1984 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1984 + endOffset: 2165 +- name: 'Benefits of Open Code: Citations, Collaboration and Career Visibility' + startOffset: 2165 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2165 + endOffset: 2221 +- name: 'Data Sharing Reality: "Data Upon Request", Access Controls and Consortia' + startOffset: 2221 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2221 + endOffset: 2330 +- name: 'Project Case Study: Normative Brain Model — Folder Structure & Cookiecutter' + startOffset: 2330 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2330 + endOffset: 2367 +- name: 'Applied Engineering Practices: Branching, Formatting, Versioning & MLflow' + startOffset: 2367 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2367 + endOffset: 2542 +- name: 'Sensitive Data Practices: De-identification and Controlled Access' + startOffset: 2542 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2542 + endOffset: 2724 +- name: Balancing Open Source, Hackathons and Full-Time Research Commitments + startOffset: 2724 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2724 + endOffset: 2862 +- name: 'Discovering Projects: GitHub Trending, Social Media & Community Platforms' + startOffset: 2862 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2862 + endOffset: 2986 +- name: 'Contributing to Repositories: Readme, Contributing Guides, Issues & Communication' + startOffset: 2986 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2986 + endOffset: 3142 +- name: 'Open Publishing vs Industry IP: Academic Openness and Commercial Concerns' + startOffset: 3142 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3142 + endOffset: 3312 +- name: 'Recommended Resources: The Turing Way, The Carpentries & ML Solutions Handbook' + startOffset: 3312 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3312 + endOffset: 3483 +- name: Episode Conclusion and Closing Remarks + startOffset: 3483 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3483 + endOffset: 3490 + transcript: - header: Podcast Introduction - header: 'Guest Background: Johanna Bayer — Psychology to Machine Learning in Neuroimaging' @@ -1153,147 +1286,6 @@ transcript: sec: 3558 time: '59:18' who: Alexey -description: 'Master reproducible research for neuroimaging: packaging, MLflow & data - sharing to publish reproducible manuscripts, boost citations and career visibility.' -intro: 'How do you teach reproducible research and practical research software engineering - (RSE) skills to neuroimaging students and researchers? In this episode, Johanna Bayer - — a psychologist-turned-computational neuroscientist completing a PhD in machine - learning for clinical neuroimaging at the University of Melbourne and an open science - advocate — walks through concrete approaches for teaching reproducible research. - We cover course design (Carpentries-style curricula, Git introductions, and reproducible - manuscripts with embedded code), guided onboarding to open source (small repos, - pull requests, cookiecutter templates), and core coding practices to teach: packaging, - environments, formatting, testing, branching and versioning. Johanna also discusses - experiment tracking with MLflow, treating software as a research output (DOIs and - toolboxes), data sharing realities and sensitive-data practices, and strategies - for culture change in labs via hackathons and grassroots efforts. Listeners will - gain practical teaching tactics, tooling recommendations, and considerations for - infrastructure and academic-industry tensions — plus pointers to resources like - The Turing Way, The Carpentries, and the ML Solutions Handbook to help implement - reproducible research and RSE practices in neuroimaging projects.' -dateadded: '2023-01-14' -duration: PT00H58M10S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=0 - endOffset: 68 -- name: 'Guest Background: Johanna Bayer — Psychology to Machine Learning in Neuroimaging' - startOffset: 68 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=68 - endOffset: 144 -- name: 'Academic Journey: Studies in Germany, Zurich and Move to Melbourne' - startOffset: 144 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=144 - endOffset: 327 -- name: 'Teaching Open Science: Intro to Git, Homework Support and Course Structure' - startOffset: 327 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=327 - endOffset: 459 -- name: Carpentries & Structured Beginner Curriculum for Reproducible Research - startOffset: 459 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=459 - endOffset: 510 -- name: 'Open Science Curriculum: Reproducible Manuscripts with Embedded Code' - startOffset: 510 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=510 - endOffset: 652 -- name: 'Guided Onboarding to Open Source: Small Repos, Pull Requests & Turing Book' - startOffset: 652 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=652 - endOffset: 730 -- name: 'What RSE Means: Software-Focused Research Outputs and Practices' - startOffset: 730 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=730 - endOffset: 850 -- name: 'Academic RSE Roles: PhD Students, Methods Papers and Toolboxes' - startOffset: 850 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=850 - endOffset: 996 -- name: 'Software as Research Output: DOIs, Toolboxes and Publishing Code' - startOffset: 996 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=996 - endOffset: 1030 -- name: 'Culture Change in Labs: Convincing Supervisors & Grassroots Hackathons' - startOffset: 1030 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1030 - endOffset: 1205 -- name: 'Industry Lessons for Academia: Programming Expectations & Tool Adoption' - startOffset: 1205 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1205 - endOffset: 1332 -- name: 'Experiment Tracking in Research: MLflow and Reproducibility Tools' - startOffset: 1332 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1332 - endOffset: 1336 -- name: 'Barriers to Teaching Software Skills: Time, Expertise and Fear of Scrutiny' - startOffset: 1336 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1336 - endOffset: 1434 -- name: 'Infrastructure Gaps: Hosting Interactive Reproducible Papers and Costs' - startOffset: 1434 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1434 - endOffset: 1658 -- name: 'Core Coding Practices to Teach: Packaging, Environments, Formatting & Tests' - startOffset: 1658 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1658 - endOffset: 1698 -- name: 'Learning by Doing: Brainhack, Hackathons, Community Contributions' - startOffset: 1698 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1698 - endOffset: 1844 -- name: 'Formal Courses vs Self-Learning: Structure, Discipline and Freelancing' - startOffset: 1844 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1844 - endOffset: 1984 -- name: 'Collaboration & Code Review: Working Alone vs Community Feedback' - startOffset: 1984 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1984 - endOffset: 2165 -- name: 'Benefits of Open Code: Citations, Collaboration and Career Visibility' - startOffset: 2165 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2165 - endOffset: 2221 -- name: 'Data Sharing Reality: "Data Upon Request", Access Controls and Consortia' - startOffset: 2221 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2221 - endOffset: 2330 -- name: 'Project Case Study: Normative Brain Model — Folder Structure & Cookiecutter' - startOffset: 2330 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2330 - endOffset: 2367 -- name: 'Applied Engineering Practices: Branching, Formatting, Versioning & MLflow' - startOffset: 2367 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2367 - endOffset: 2542 -- name: 'Sensitive Data Practices: De-identification and Controlled Access' - startOffset: 2542 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2542 - endOffset: 2724 -- name: Balancing Open Source, Hackathons and Full-Time Research Commitments - startOffset: 2724 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2724 - endOffset: 2862 -- name: 'Discovering Projects: GitHub Trending, Social Media & Community Platforms' - startOffset: 2862 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2862 - endOffset: 2986 -- name: 'Contributing to Repositories: Readme, Contributing Guides, Issues & Communication' - startOffset: 2986 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2986 - endOffset: 3142 -- name: 'Open Publishing vs Industry IP: Academic Openness and Commercial Concerns' - startOffset: 3142 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3142 - endOffset: 3312 -- name: 'Recommended Resources: The Turing Way, The Carpentries & ML Solutions Handbook' - startOffset: 3312 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3312 - endOffset: 3483 -- name: Episode Conclusion and Closing Remarks - startOffset: 3483 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3483 - endOffset: 3490 --- Links: diff --git a/_podcast/s01e05-mentoring.md b/_podcast/tech-mentoring-how-to-find-and-become-a-mentor.md.md similarity index 93% rename from _podcast/s01e05-mentoring.md rename to _podcast/tech-mentoring-how-to-find-and-become-a-mentor.md.md index 0d697aa9..7b860f51 100644 --- a/_podcast/s01e05-mentoring.md +++ b/_podcast/tech-mentoring-how-to-find-and-become-a-mentor.md.md @@ -1,16 +1,11 @@ --- title: 'How to Find a Mentor and Become One: Mentoring Strategies for Tech Careers' short: Mentoring +season: 1 +episode: 5 guests: - rahuljain image: images/podcast/s01e05-mentoring.jpg -description: 'Discover practical mentoring strategies for tech careers: find mentors, - master cold outreach, run effective sessions, start paid mentorship & boost leadership.' -keywords: mentoring, career development, tech mentorship, finding a mentor, becoming - a mentor, imposter syndrome, tech leadership, career advice, professional development, - data engineering -season: 1 -episode: 5 ids: youtube: LQvwTNQbPg4 anchor: Mentoring---Rahul-Jain-eo7cmu @@ -19,23 +14,12 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Mentoring---Rahul-Jain-eo7cmu spotify: TODO apple: TODO -intro: 'Struggling to find a mentor — or wondering how to become one — in a fast-moving - tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI - leader with 15+ years driving enterprise data transformations and a career arc from - mining engineering to data engineering and leadership, walks through practical mentoring - strategies for tech professionals. We define mentoring (purpose, scope, types), - explore early models like Thoughtworks’ sponsorship, and show how to find mentors - through networks, platforms, and cold outreach — with concrete outreach best practices: - specificity, background, and follow‑up. Rahul covers preparing mentoring sessions - (goals, agendas), mentoring formats (one‑off advice vs long‑term relationships), - and how to start as a mentor using simple first steps and platforms. Topics include - benefits of mentoring, transferable workplace guidance, developing people skills - (empathy, listening), balancing technical work and leadership, tackling imposter - syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining - development plans. Listen to gain actionable steps, templates, and mindset shifts - to both secure meaningful mentorship and build a sustainable mentoring practice - in your tech career.' -dateadded: '2021-02-23' + +description: 'Discover practical mentoring strategies for tech careers: find mentors, master cold outreach, run effective sessions, start paid mentorship & boost leadership.' +intro: 'Struggling to find a mentor — or wondering how to become one — in a fast-moving tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI leader with 15+ years driving enterprise data transformations and a career arc from mining engineering to data engineering and leadership, walks through practical mentoring strategies for tech professionals. We define mentoring (purpose, scope, types), explore early models like Thoughtworks’ sponsorship, and show how to find mentors through networks, platforms, and cold outreach — with concrete outreach best practices: specificity, background, and follow‑up. Rahul covers preparing mentoring sessions (goals, agendas), mentoring formats (one‑off advice vs long‑term relationships), and how to start as a mentor using simple first steps and platforms. Topics include benefits of mentoring, transferable workplace guidance, developing people skills (empathy, listening), balancing technical work and leadership, tackling imposter syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining development plans. Listen to gain actionable steps, templates, and mindset shifts to both secure meaningful mentorship and build a sustainable mentoring practice in your tech career.' +dateadded: 2021-02-23 + + quotableClips: - name: Episode Introduction startOffset: 0 @@ -125,6 +109,8 @@ quotableClips: startOffset: 3480 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=3480 endOffset: 3480 + +keywords: mentoring, career development, tech mentorship, finding a mentor, becoming a mentor, imposter syndrome, tech leadership, career advice, professional development, data engineering --- Today we're discussing mentoring with [Rahul Jain](/people/rahuljain.html), a technical leader with about 20 years of experience building and running software products. He currently leads the Business Intelligence and Data Engineering units at Omio, a ticket-booking company, and mentors engineers and managers through The Mentoring Club. diff --git a/_podcast/s02e01-writing.md b/_podcast/technical-writing-for-data-scientists.md similarity index 91% rename from _podcast/s02e01-writing.md rename to _podcast/technical-writing-for-data-scientists.md index 2457edf0..e3534c41 100644 --- a/_podcast/s02e01-writing.md +++ b/_podcast/technical-writing-for-data-scientists.md @@ -1,17 +1,11 @@ --- title: 'Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career' short: 'Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career' +season: 2 +episode: 1 guests: - eugeneyan image: images/podcast/s02e01-writing.jpg -description: 'Master technical writing for data science with a practical 7-day workflow: - outline-first cadence, portfolio tips, docs & distribution to accelerate your career.' -keywords: technical writing, data science career, ML engineer writing, documentation - skills, technical communication, data science blog, career growth, writing process, - Amazon data scientist, Eugene Yan, technical documentation, data science portfolio, - ML career advice, technical writing tips, data science writing -season: 2 -episode: 1 ids: youtube: vXWGd7olv3c anchor: The-Importance-of-Writing-in-a-Tech-Career---Eugene-Yan-ep17du @@ -20,23 +14,12 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-Importance-of-Writing-in-a-Tech-Career---Eugene-Yan-ep17du spotify: TODO apple: TODO -intro: How can technical writing accelerate your data science career in just one week? - In this episode, Eugene Yan — an Applied Scientist at Amazon who previously led data - science teams at Lazada and uCare.ai and writes about ML in production and career - growth — walks through a practical, repeatable 7-day workflow for technical writing - tailored to data scientists.

We cover Eugene’s career transition and first - public writing, motivations for sharing work, and how to target readers (peers, - future teammates, and hiring managers). He frames writing as a product with a weekly - shipping cadence, explains the outline-first method for filtering ideas, and outlines - a realistic time budget and editing limits. You’ll get concrete guidance on idea - sourcing, title crafting, article length, blogging tools (Medium, Substack, WordPress, - Jekyll), writing habits, distribution via Twitter and LinkedIn, and writing at work - (press releases, design docs, decision logs). Practical portfolio advice — clear - README, quick start, repo tour — and tips to iterate outlines and ship weekly round - out the episode.

Listen to learn a concrete 7-day workflow, documentation - and portfolio best practices, and distribution tactics to boost your technical writing - and advance your data science career. -dateadded: '2021-02-23' + +description: 'Master technical writing for data science with a practical 7-day workflow: outline-first cadence, portfolio tips, docs & distribution to accelerate your career.' +intro: How can technical writing accelerate your data science career in just one week? In this episode, Eugene Yan — an Applied Scientist at Amazon who previously led data science teams at Lazada and uCare.ai and writes about ML in production and career growth — walks through a practical, repeatable 7-day workflow for technical writing tailored to data scientists.

We cover Eugene’s career transition and first public writing, motivations for sharing work, and how to target readers (peers, future teammates, and hiring managers). He frames writing as a product with a weekly shipping cadence, explains the outline-first method for filtering ideas, and outlines a realistic time budget and editing limits. You’ll get concrete guidance on idea sourcing, title crafting, article length, blogging tools (Medium, Substack, WordPress, Jekyll), writing habits, distribution via Twitter and LinkedIn, and writing at work (press releases, design docs, decision logs). Practical portfolio advice — clear README, quick start, repo tour — and tips to iterate outlines and ship weekly round out the episode.

Listen to learn a concrete 7-day workflow, documentation and portfolio best practices, and distribution tactics to boost your technical writing and advance your data science career +dateadded: 2021-02-23 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -118,6 +101,8 @@ quotableClips: startOffset: 3630 url: https://www.youtube.com/watch?v=vXWGd7olv3c&t=3630 endOffset: 3630 + +keywords: technical writing, data science career, ML engineer writing, documentation skills, technical communication, data science blog, career growth, writing process, Amazon data scientist, Eugene Yan, technical documentation, data science portfolio, ML career advice, technical writing tips, data science writing --- Today we're discussing technical writing, logging, documentation, and more. Our special guest is [Eugene Yan](/people/eugeneyan). Eugene works at the intersection of machine learning and product, building pragmatic ML systems while writing and speaking about effective data science, ML in production, and career growth. diff --git a/_podcast/s16e01-datatalks-club-anniversary-interview.md b/_podcast/to-update/s16e01-datatalks-club-anniversary-interview.md similarity index 97% rename from _podcast/s16e01-datatalks-club-anniversary-interview.md rename to _podcast/to-update/s16e01-datatalks-club-anniversary-interview.md index 726d768f..8846b08e 100644 --- a/_podcast/s16e01-datatalks-club-anniversary-interview.md +++ b/_podcast/to-update/s16e01-datatalks-club-anniversary-interview.md @@ -1,36 +1,112 @@ --- +title: "Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and Evolution" +short: DataTalks.Club Anniversary Interview +season: 16 episode: 1 guests: - alexeygrigorev - johannabayer -intro: How do you build and sustain a data community that helps people switch into - machine learning careers while adapting to rapid AI change? In this anniversary - episode of DataTalks.Club, contributors who transitioned from roles like Java development - into machine learning and Python—and who now work full‑time on community and engineering - efforts—reflect on that exact challenge. They cover practical topics including sustainability - and monetization strategies, the roles of community and marketing leads, and decisions - around building courses (LLM/AI content versus volatile material).

You’ll - hear a detailed discussion of GPTs and LLMs and their effects on data workflows, - hiring and take‑home tests; community programs like Project of the Week, competitions, - and portfolio building; moderation and safety practices; and the evolution from - a Slack community to instructor‑led Zoomcamps and a Machine Learning Bookcamp. The - episode also examines outcomes—career switches, internships, and student success—plus - metrics that matter (newsletter performance, active users, sponsors). Listen to - learn concrete ideas for running a community‑driven learning program, designing - resilient courses in an AI era, and measuring long‑term impact. +image: images/podcast/s16e01-datatalks-club-anniversary-interview.jpg ids: anchor: atatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo youtube: nCqwZT9zA0M -image: images/podcast/s16e01-datatalks-club-anniversary-interview.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo apple: https://podcasts.apple.com/us/podcast/datatalks-club-anniversary-interview-alexey-grigorev/id1541710331?i=1000631114088 spotify: https://open.spotify.com/episode/0j1eKj9NbK3oAXHXHyaNae?si=M7rw9WixTvWw-BfKPXPwVg youtube: https://www.youtube.com/watch?v=nCqwZT9zA0M -season: 16 -short: DataTalks.Club Anniversary Interview -title: How DataTalks.Club Built a Thriving Data Community, ML/LLM Courses & Career - Switches + +intro: How do you build and sustain a data community that helps people switch into machine learning careers while adapting to rapid AI change? In this anniversary episode of DataTalks.Club, contributors who transitioned from roles like Java development into machine learning and Python—and who now work full‑time on community and engineering efforts—reflect on that exact challenge. They cover practical topics including sustainability and monetization strategies, the roles of community and marketing leads, and decisions around building courses (LLM/AI content versus volatile material).

You’ll hear a detailed discussion of GPTs and LLMs and their effects on data workflows, hiring and take‑home tests; community programs like Project of the Week, competitions, and portfolio building; moderation and safety practices; and the evolution from a Slack community to instructor‑led Zoomcamps and a Machine Learning Bookcamp. The episode also examines outcomes—career switches, internships, and student success—plus metrics that matter (newsletter performance, active users, sponsors). Listen to learn concrete ideas for running a community‑driven learning program, designing resilient courses in an AI era, and measuring long‑term impact +dateadded: 2023-10-16 + +duration: PT01H02M57S + +quotableClips: +- name: Episode Opening & DataTalks.Club 3rd Anniversary + startOffset: 0 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=0 + endOffset: 77 +- name: 'Career Shift: From Java Developer to Machine Learning & Python' + startOffset: 77 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=77 + endOffset: 251 +- name: 'Transition: Full‑time on DataTalks.Club; engineering-heavy roles' + startOffset: 251 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=251 + endOffset: 355 +- name: 'Sustainability Strategy: Monetization and Sponsorship Focus' + startOffset: 355 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=355 + endOffset: 363 +- name: 'Team & Operations: Roles of Francis (community) and Valeria (marketing)' + startOffset: 363 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=363 + endOffset: 442 +- name: 'Course Planning: Considering LLM/AI Courses vs. Rapidly Changing Content' + startOffset: 442 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=442 + endOffset: 670 +- name: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take‑home Tests' + startOffset: 670 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=670 + endOffset: 896 +- name: 'Community Participation: Slack Engagement, TAs, and Webinar Contributions' + startOffset: 896 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=896 + endOffset: 1223 +- name: 'Community Programs: Project of the Week, Competitions, and Portfolios' + startOffset: 1223 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1223 + endOffset: 1398 +- name: 'Community Lessons: Finding a Niche and Moderation Challenges' + startOffset: 1398 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1398 + endOffset: 1895 +- name: 'Origin Story: Launching the Slack Community and Early Organic Growth' + startOffset: 1895 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1895 + endOffset: 2026 +- name: 'Course Model: Creating Zoomcamps Inspired by Community‑Driven Courses' + startOffset: 2026 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2026 + endOffset: 2131 +- name: 'Book & Course Synergy: Developing the Machine Learning Bookcamp' + startOffset: 2131 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2131 + endOffset: 2224 +- name: 'COVID Effect: Timing, Online Momentum, and Community Persistence' + startOffset: 2224 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2224 + endOffset: 2351 +- name: 'Typical Outcomes: Career Switches, Internships, and Student Successes' + startOffset: 2351 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2351 + endOffset: 2572 +- name: 'Content Strategy: Choosing Guests and Avoiding Hype‑Chasing' + startOffset: 2572 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2572 + endOffset: 2748 +- name: 'Safety & Moderation: Handling Unsolicited Messages and Community Safety' + startOffset: 2748 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2748 + endOffset: 2908 +- name: 'Publishing Journey: Writing, Reviewing, and Working with Publishers' + startOffset: 2908 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2908 + endOffset: 3328 +- name: 'Backburner Projects: Hackathons, Event Recommender Ideas, and Event Tools' + startOffset: 3328 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3328 + endOffset: 3559 +- name: 'Success Metrics: Newsletter Performance, Active Users, and Sponsors' + startOffset: 3559 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3559 + endOffset: 3726 +- name: 'Closing Remarks: Next Steps and Continuing Community Growth' + startOffset: 3726 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3726 + endOffset: 3777 + transcript: - header: Episode Opening & DataTalks.Club 3rd Anniversary - line: Welcome everyone. My name is Johanna and Welcome to DataTalks.Club. DataTalks.Club @@ -1126,91 +1202,4 @@ transcript: sec: 3777 time: '1:02:57' who: Johanna -dateadded: '2023-10-16' -duration: PT01H02M57S -quotableClips: -- name: Episode Opening & DataTalks.Club 3rd Anniversary - startOffset: 0 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=0 - endOffset: 77 -- name: 'Career Shift: From Java Developer to Machine Learning & Python' - startOffset: 77 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=77 - endOffset: 251 -- name: 'Transition: Full‑time on DataTalks.Club; engineering-heavy roles' - startOffset: 251 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=251 - endOffset: 355 -- name: 'Sustainability Strategy: Monetization and Sponsorship Focus' - startOffset: 355 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=355 - endOffset: 363 -- name: 'Team & Operations: Roles of Francis (community) and Valeria (marketing)' - startOffset: 363 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=363 - endOffset: 442 -- name: 'Course Planning: Considering LLM/AI Courses vs. Rapidly Changing Content' - startOffset: 442 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=442 - endOffset: 670 -- name: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take‑home Tests' - startOffset: 670 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=670 - endOffset: 896 -- name: 'Community Participation: Slack Engagement, TAs, and Webinar Contributions' - startOffset: 896 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=896 - endOffset: 1223 -- name: 'Community Programs: Project of the Week, Competitions, and Portfolios' - startOffset: 1223 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1223 - endOffset: 1398 -- name: 'Community Lessons: Finding a Niche and Moderation Challenges' - startOffset: 1398 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1398 - endOffset: 1895 -- name: 'Origin Story: Launching the Slack Community and Early Organic Growth' - startOffset: 1895 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1895 - endOffset: 2026 -- name: 'Course Model: Creating Zoomcamps Inspired by Community‑Driven Courses' - startOffset: 2026 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2026 - endOffset: 2131 -- name: 'Book & Course Synergy: Developing the Machine Learning Bookcamp' - startOffset: 2131 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2131 - endOffset: 2224 -- name: 'COVID Effect: Timing, Online Momentum, and Community Persistence' - startOffset: 2224 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2224 - endOffset: 2351 -- name: 'Typical Outcomes: Career Switches, Internships, and Student Successes' - startOffset: 2351 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2351 - endOffset: 2572 -- name: 'Content Strategy: Choosing Guests and Avoiding Hype‑Chasing' - startOffset: 2572 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2572 - endOffset: 2748 -- name: 'Safety & Moderation: Handling Unsolicited Messages and Community Safety' - startOffset: 2748 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2748 - endOffset: 2908 -- name: 'Publishing Journey: Writing, Reviewing, and Working with Publishers' - startOffset: 2908 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2908 - endOffset: 3328 -- name: 'Backburner Projects: Hackathons, Event Recommender Ideas, and Event Tools' - startOffset: 3328 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3328 - endOffset: 3559 -- name: 'Success Metrics: Newsletter Performance, Active Users, and Sponsors' - startOffset: 3559 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3559 - endOffset: 3726 -- name: 'Closing Remarks: Next Steps and Continuing Community Growth' - startOffset: 3726 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3726 - endOffset: 3777 --- diff --git a/_podcast/s16e02-bridging-data-science-and-healthcare.md b/_podcast/to-update/s16e02-bridging-data-science-and-healthcare.md similarity index 96% rename from _podcast/s16e02-bridging-data-science-and-healthcare.md rename to _podcast/to-update/s16e02-bridging-data-science-and-healthcare.md index 300cc2f7..f49f5089 100644 --- a/_podcast/s16e02-bridging-data-science-and-healthcare.md +++ b/_podcast/to-update/s16e02-bridging-data-science-and-healthcare.md @@ -1,19 +1,154 @@ --- +title: "Context: A conversation with ML researcher Elena Stamatelou covering her path into healthcare data science, technical projects (from C‑arm imaging and cell sorting to ballistography and home‑pregnancy monitoring), data collection and annotation, low‑resource pediatric solutions, clinical use cases like sepsis prediction, and the practical constraints of validation, regulation, deployment, infrastructure, and clinician adoption. + +Core (single unifying theme): Building meaningful healthcare ML is not primarily about technical novelty but about purposefully bridging technical innovation with clinical reality—designing explainable, validated, and infrastructure‑aware systems through iterative, multidisciplinary collaboration and rigorous data collection so that models safely augment clinicians and improve patient outcomes (including equitable solutions for low‑resource settings)." +short: Bridging Data Science and Healthcare +season: 16 episode: 2 guests: - elenistamatelou +image: images/podcast/s16e02-bridging-data-science-and-healthcare.jpg ids: anchor: datatalksclub/episodes/Bridging-Data-Science-and-Healthcare---Eleni-Stamatelou-e2aegvc youtube: pDOwlulDh0c -image: images/podcast/s16e02-bridging-data-science-and-healthcare.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Bridging-Data-Science-and-Healthcare---Eleni-Stamatelou-e2aegvc apple: https://podcasts.apple.com/us/podcast/bridging-data-science-and-healthcare-eleni-stamatelou/id1541710331?i=1000632040444 spotify: https://open.spotify.com/episode/5W6lfZVhjIKEmVzBuexfzE?si=0nUHr66eQa6oPVJDb3d0rw youtube: https://www.youtube.com/watch?v=pDOwlulDh0c -season: 16 -short: Bridging Data Science and Healthcare -title: 'ML in Healthcare: Low-Resource Monitoring, Sepsis Prediction & Clinical Translation' + +description: Learn ML tactics for sepsis prediction and low-resource monitoring with clinical translation tips—deployment, validation, clinician adoption to accelerate impact +intro: 'How do you move machine learning from promising models to reliable tools that work in low-resource hospitals — and what does it take to predict conditions like sepsis from routinely collected vitals? In this episode, we speak with Eleni Stamatelou, a machine learning researcher focused on healthcare whose path spans the University of Patras, Erasmus exchanges, work at VUB/ULB, a Philips Healthcare internship and a doctorate in data science. Eleni’s work ranges from C‑arm 3D reconstruction and white blood cell image classification to home pregnancy monitoring and a vital‑sign system deployed for pediatric care in Malawi.

We dig into practical topics: designing sensors and linking them to lab outcomes, ballistography signal denoising and U‑Net heart‑rate estimation, the tradeoffs between signal‑processing and deep learning approaches, and a sepsis prediction use case built from vitals and clinical data. We also cover clinical translation challenges — annotation scarcity, explainability, validation timelines, population generalization, and on‑device versus cloud deployment constraints. Listen to understand the technical and clinical steps needed to build, validate and deploy ML in healthcare and how to navigate a career in healthcare data science.' +dateadded: 2023-10-23 + +duration: PT00H59M01S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=0 + endOffset: 44 +- name: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' + startOffset: 44 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=44 + endOffset: 105 +- name: 'Education & Early Career: University of Patras, Erasmus, VUB/ULB' + startOffset: 105 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=105 + endOffset: 165 +- name: 'Moving to the Netherlands: Philips Healthcare internship and doctorate in + data science' + startOffset: 165 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=165 + endOffset: 283 +- name: 'Philips Healthcare Projects: C‑arm imaging and pregnancy monitoring' + startOffset: 283 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=283 + endOffset: 408 +- name: 'Low‑Resource Pediatric Monitoring: Vital‑sign system design for Malawi' + startOffset: 408 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=408 + endOffset: 454 +- name: 'Data Collection for Clinical Outcomes: linking sensors to lab results' + startOffset: 454 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=454 + endOffset: 574 +- name: 'Motivation for Healthcare: choosing impact over other engineering paths' + startOffset: 574 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=574 + endOffset: 663 +- name: 'Master’s Thesis: white blood cell image classification for a cell sorter + (IMEC)' + startOffset: 663 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=663 + endOffset: 793 +- name: '3D Reconstruction Work: multi‑view geometry from C‑arm images' + startOffset: 793 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=793 + endOffset: 943 +- name: 'Home Monitoring for Pregnancy: smartwatches, weight tracking, and midwife + dashboards' + startOffset: 943 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=943 + endOffset: 1085 +- name: 'Research Orientation: novelty with clinical translation' + startOffset: 1085 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1085 + endOffset: 1168 +- name: 'Ballistography Signal Research: denoising and U‑Net for infant heart rate + estimation' + startOffset: 1168 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1168 + endOffset: 1309 +- name: 'Signal Processing vs Deep Learning: filters, Fourier methods, and when to + use ML' + startOffset: 1309 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1309 + endOffset: 1483 +- name: 'Patient Acuity Scoring: vitals‑based scoring poster' + startOffset: 1483 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1483 + endOffset: 1523 +- name: Regulatory & Explainable AI Challenges; annotation scarcity and data gaps + startOffset: 1523 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1523 + endOffset: 1692 +- name: 'Clinical Use Case: sepsis prediction from vitals and clinical data' + startOffset: 1692 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1692 + endOffset: 1870 +- name: 'Clinical Validation & Adoption: engaging clinicians and long approval timelines' + startOffset: 1870 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1870 + endOffset: 2034 +- name: 'Healthcare vs E‑commerce Data: offline events, timestamps, and higher risk' + startOffset: 2034 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2034 + endOffset: 2145 +- name: 'Population Differences & Generalization: Europe vs Africa considerations' + startOffset: 2145 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2145 + endOffset: 2377 +- name: 'Automation Impact: job displacement concerns and augmentation potential' + startOffset: 2377 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2377 + endOffset: 2590 +- name: 'Data Infrastructure Variability: digitization, interoperability, and compact + discs' + startOffset: 2590 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2590 + endOffset: 2792 +- name: 'Incremental Adoption Strategy: visualization, feedback loops, and trust building' + startOffset: 2792 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2792 + endOffset: 3050 +- name: 'ML Deployment Constraints: on‑device vs cloud for low‑resource settings' + startOffset: 3050 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3050 + endOffset: 3165 +- name: 'Sabbatical & Personal Projects: reflection and next steps' + startOffset: 3165 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3165 + endOffset: 3211 +- name: 'Transitioning into Healthcare Data Science: pathways and role types' + startOffset: 3211 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3211 + endOffset: 3346 +- name: 'Skills Transferability: technical skills are sufficient; learn clinical context + on the job' + startOffset: 3346 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3346 + endOffset: 3410 +- name: 'Job Market & Funding: demand for healthcare data scientists and research + funding' + startOffset: 3410 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3410 + endOffset: 3539 +- name: Closing Remarks and Resources (publications, GitHub, LinkedIn) + startOffset: 3539 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3539 + endOffset: 3541 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' @@ -1002,152 +1137,6 @@ transcript: sec: 3585 time: '59:45' who: Alexey -description: Learn ML tactics for sepsis prediction and low-resource monitoring with - clinical translation tips—deployment, validation, clinician adoption to accelerate - impact. -intro: 'How do you move machine learning from promising models to reliable tools that - work in low-resource hospitals — and what does it take to predict conditions like - sepsis from routinely collected vitals? In this episode, we speak with Eleni Stamatelou, - a machine learning researcher focused on healthcare whose path spans the University - of Patras, Erasmus exchanges, work at VUB/ULB, a Philips Healthcare internship and - a doctorate in data science. Eleni’s work ranges from C‑arm 3D reconstruction and - white blood cell image classification to home pregnancy monitoring and a vital‑sign - system deployed for pediatric care in Malawi.

We dig into practical topics: - designing sensors and linking them to lab outcomes, ballistography signal denoising - and U‑Net heart‑rate estimation, the tradeoffs between signal‑processing and deep - learning approaches, and a sepsis prediction use case built from vitals and clinical - data. We also cover clinical translation challenges — annotation scarcity, explainability, - validation timelines, population generalization, and on‑device versus cloud deployment - constraints. Listen to understand the technical and clinical steps needed to build, - validate and deploy ML in healthcare and how to navigate a career in healthcare - data science.' -dateadded: '2023-10-23' -duration: PT00H59M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=0 - endOffset: 44 -- name: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' - startOffset: 44 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=44 - endOffset: 105 -- name: 'Education & Early Career: University of Patras, Erasmus, VUB/ULB' - startOffset: 105 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=105 - endOffset: 165 -- name: 'Moving to the Netherlands: Philips Healthcare internship and doctorate in - data science' - startOffset: 165 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=165 - endOffset: 283 -- name: 'Philips Healthcare Projects: C‑arm imaging and pregnancy monitoring' - startOffset: 283 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=283 - endOffset: 408 -- name: 'Low‑Resource Pediatric Monitoring: Vital‑sign system design for Malawi' - startOffset: 408 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=408 - endOffset: 454 -- name: 'Data Collection for Clinical Outcomes: linking sensors to lab results' - startOffset: 454 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=454 - endOffset: 574 -- name: 'Motivation for Healthcare: choosing impact over other engineering paths' - startOffset: 574 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=574 - endOffset: 663 -- name: 'Master’s Thesis: white blood cell image classification for a cell sorter - (IMEC)' - startOffset: 663 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=663 - endOffset: 793 -- name: '3D Reconstruction Work: multi‑view geometry from C‑arm images' - startOffset: 793 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=793 - endOffset: 943 -- name: 'Home Monitoring for Pregnancy: smartwatches, weight tracking, and midwife - dashboards' - startOffset: 943 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=943 - endOffset: 1085 -- name: 'Research Orientation: novelty with clinical translation' - startOffset: 1085 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1085 - endOffset: 1168 -- name: 'Ballistography Signal Research: denoising and U‑Net for infant heart rate - estimation' - startOffset: 1168 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1168 - endOffset: 1309 -- name: 'Signal Processing vs Deep Learning: filters, Fourier methods, and when to - use ML' - startOffset: 1309 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1309 - endOffset: 1483 -- name: 'Patient Acuity Scoring: vitals‑based scoring poster' - startOffset: 1483 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1483 - endOffset: 1523 -- name: Regulatory & Explainable AI Challenges; annotation scarcity and data gaps - startOffset: 1523 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1523 - endOffset: 1692 -- name: 'Clinical Use Case: sepsis prediction from vitals and clinical data' - startOffset: 1692 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1692 - endOffset: 1870 -- name: 'Clinical Validation & Adoption: engaging clinicians and long approval timelines' - startOffset: 1870 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1870 - endOffset: 2034 -- name: 'Healthcare vs E‑commerce Data: offline events, timestamps, and higher risk' - startOffset: 2034 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2034 - endOffset: 2145 -- name: 'Population Differences & Generalization: Europe vs Africa considerations' - startOffset: 2145 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2145 - endOffset: 2377 -- name: 'Automation Impact: job displacement concerns and augmentation potential' - startOffset: 2377 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2377 - endOffset: 2590 -- name: 'Data Infrastructure Variability: digitization, interoperability, and compact - discs' - startOffset: 2590 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2590 - endOffset: 2792 -- name: 'Incremental Adoption Strategy: visualization, feedback loops, and trust building' - startOffset: 2792 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2792 - endOffset: 3050 -- name: 'ML Deployment Constraints: on‑device vs cloud for low‑resource settings' - startOffset: 3050 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3050 - endOffset: 3165 -- name: 'Sabbatical & Personal Projects: reflection and next steps' - startOffset: 3165 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3165 - endOffset: 3211 -- name: 'Transitioning into Healthcare Data Science: pathways and role types' - startOffset: 3211 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3211 - endOffset: 3346 -- name: 'Skills Transferability: technical skills are sufficient; learn clinical context - on the job' - startOffset: 3346 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3346 - endOffset: 3410 -- name: 'Job Market & Funding: demand for healthcare data scientists and research - funding' - startOffset: 3410 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3410 - endOffset: 3539 -- name: Closing Remarks and Resources (publications, GitHub, LinkedIn) - startOffset: 3539 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3539 - endOffset: 3541 --- Links: diff --git a/_podcast/s16e03-collaborative-data-science-in-business.md b/_podcast/to-update/s16e03-collaborative-data-science-in-business.md similarity index 97% rename from _podcast/s16e03-collaborative-data-science-in-business.md rename to _podcast/to-update/s16e03-collaborative-data-science-in-business.md index 0342a444..21a0e063 100644 --- a/_podcast/s16e03-collaborative-data-science-in-business.md +++ b/_podcast/to-update/s16e03-collaborative-data-science-in-business.md @@ -1,20 +1,137 @@ --- +title: "Context: A conversation with EasyJet lead data scientist Ioannis Mesionis that covers career roots, team structure, project intake and prioritization, the end‑to‑end data‑product lifecycle (EDA, modeling, pilots, A/B tests, rollout), stakeholder engagement, estimation and cadence, communication and soft skills, and pragmatic MLOps/monitoring choices. + +Core theme: Deliver measurable business impact by treating data science as a product — combine domain knowledge and stakeholder partnership with clear intake/DoD processes, iterative MVP experimentation, and lightweight engineering and monitoring practices so models move quickly, safely, and transparently from idea to production." +short: Collaborative Data Science in Business +season: 16 episode: 3 guests: - ioannismesionis +image: images/podcast/s16e03-collaborative-data-science-in-business.jpg ids: anchor: atatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c youtube: 1pExOVuCF8Q -image: images/podcast/s16e03-collaborative-data-science-in-business.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c apple: https://podcasts.apple.com/us/podcast/collaborative-data-science-in-business-ioannis-mesionis/id1541710331?i=1000632860980 spotify: https://open.spotify.com/episode/46DN6rAlufvvXaqdOomoTe?si=OMPDN8m5QZWsc5kJY8IcAA youtube: https://www.youtube.com/watch?v=1pExOVuCF8Q -season: 16 -short: Collaborative Data Science in Business -title: 'MLOps & Data Product Operating Model: Prioritization, A/B Testing & Model - Monitoring' + +description: Discover MLOps tactics to prioritize data products, run A/B testing and enable model monitoring for faster validation, reliable rollouts and stakeholder buy-in +intro: How do you prioritize data product work, validate models in production, and keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a practical data product operating model for tackling those challenges.

Drawing on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains a four‑phase funnel with a "single front door" intake, a Definition of Done template with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR feasibility. He breaks down when to treat work as analytics vs. research, how R&D sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies for production rollout as MLOps capabilities evolve. Technical tooling and monitoring get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection — plus pragmatic dashboarding and alerting patterns. Listeners will come away with actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder engagement, and the estimation and cadence practices that make ML teams productive +dateadded: 2023-10-29 + +duration: PT01H14S + +quotableClips: +- name: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) + startOffset: 100 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=100 + endOffset: 154 +- name: Career origin & early projects (mathematics degree, master's, internship model) + startOffset: 154 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=154 + endOffset: 443 +- name: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' + startOffset: 443 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=443 + endOffset: 512 +- name: 'Stakeholder collaboration: weekly embedded meetings and observation' + startOffset: 512 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=512 + endOffset: 675 +- name: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' + startOffset: 675 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=675 + endOffset: 840 +- name: 'Operating model for data products: four-phase funnel and accountability' + startOffset: 840 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=840 + endOffset: 923 +- name: 'Project intake & prioritization: "single front door" and cross-functional + kickoff' + startOffset: 923 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=923 + endOffset: 1057 +- name: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' + startOffset: 1057 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1057 + endOffset: 1254 +- name: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' + startOffset: 1254 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1254 + endOffset: 1272 +- name: 'Data science vs analytics: choosing technical approach and leads' + startOffset: 1272 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1272 + endOffset: 1368 +- name: 'Research & development: modeling work, sprint planning and Kanban usage' + startOffset: 1368 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1368 + endOffset: 1517 +- name: 'Pilot & A/B testing: validating models against baseline KPIs and feedback + loops' + startOffset: 1517 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1517 + endOffset: 1645 +- name: 'Production rollout: spectrum of production and evolving MLOps capabilities' + startOffset: 1645 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1645 + endOffset: 1698 +- name: 'Organizational structure: domain-focused lead data scientists (scheduling, + ops, pricing)' + startOffset: 1698 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1698 + endOffset: 1821 +- name: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' + startOffset: 1821 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1821 + endOffset: 2138 +- name: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' + startOffset: 2138 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2138 + endOffset: 2297 +- name: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' + startOffset: 2297 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2297 + endOffset: 2449 +- name: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' + startOffset: 2449 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2449 + endOffset: 2493 +- name: 'Communicating technical results: simplifying concepts for non‑technical audiences' + startOffset: 2493 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2493 + endOffset: 2710 +- name: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a helper' + startOffset: 2710 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2710 + endOffset: 2918 +- name: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' + startOffset: 2918 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2918 + endOffset: 2950 +- name: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' + startOffset: 2950 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2950 + endOffset: 3213 +- name: 'Model monitoring with Evidently: drift detection and integration plans' + startOffset: 3213 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3213 + endOffset: 3311 +- name: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' + startOffset: 3311 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3311 + endOffset: 3429 +- name: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' + startOffset: 3429 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3429 + endOffset: 3660 +- name: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' + startOffset: 3660 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3660 + endOffset: 3614 + transcript: - header: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) - line: This week, we'll talk about collaborative data science in business. We have @@ -1278,133 +1395,6 @@ transcript: sec: 3714 time: '1:01:54' who: Alexey -description: Discover MLOps tactics to prioritize data products, run A/B testing and - enable model monitoring for faster validation, reliable rollouts and stakeholder - buy-in. -intro: How do you prioritize data product work, validate models in production, and - keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, - Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a - practical data product operating model for tackling those challenges.

Drawing - on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains - a four‑phase funnel with a "single front door" intake, a Definition of Done template - with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR - feasibility. He breaks down when to treat work as analytics vs. research, how R&D - sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies - for production rollout as MLOps capabilities evolve. Technical tooling and monitoring - get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection - — plus pragmatic dashboarding and alerting patterns. Listeners will come away with - actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder - engagement, and the estimation and cadence practices that make ML teams productive. -dateadded: '2023-10-29' -duration: PT01H14S -quotableClips: -- name: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) - startOffset: 100 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=100 - endOffset: 154 -- name: Career origin & early projects (mathematics degree, master's, internship model) - startOffset: 154 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=154 - endOffset: 443 -- name: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' - startOffset: 443 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=443 - endOffset: 512 -- name: 'Stakeholder collaboration: weekly embedded meetings and observation' - startOffset: 512 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=512 - endOffset: 675 -- name: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' - startOffset: 675 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=675 - endOffset: 840 -- name: 'Operating model for data products: four-phase funnel and accountability' - startOffset: 840 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=840 - endOffset: 923 -- name: 'Project intake & prioritization: "single front door" and cross-functional - kickoff' - startOffset: 923 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=923 - endOffset: 1057 -- name: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' - startOffset: 1057 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1057 - endOffset: 1254 -- name: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' - startOffset: 1254 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1254 - endOffset: 1272 -- name: 'Data science vs analytics: choosing technical approach and leads' - startOffset: 1272 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1272 - endOffset: 1368 -- name: 'Research & development: modeling work, sprint planning and Kanban usage' - startOffset: 1368 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1368 - endOffset: 1517 -- name: 'Pilot & A/B testing: validating models against baseline KPIs and feedback - loops' - startOffset: 1517 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1517 - endOffset: 1645 -- name: 'Production rollout: spectrum of production and evolving MLOps capabilities' - startOffset: 1645 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1645 - endOffset: 1698 -- name: 'Organizational structure: domain-focused lead data scientists (scheduling, - ops, pricing)' - startOffset: 1698 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1698 - endOffset: 1821 -- name: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' - startOffset: 1821 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1821 - endOffset: 2138 -- name: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' - startOffset: 2138 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2138 - endOffset: 2297 -- name: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' - startOffset: 2297 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2297 - endOffset: 2449 -- name: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' - startOffset: 2449 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2449 - endOffset: 2493 -- name: 'Communicating technical results: simplifying concepts for non‑technical audiences' - startOffset: 2493 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2493 - endOffset: 2710 -- name: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a helper' - startOffset: 2710 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2710 - endOffset: 2918 -- name: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' - startOffset: 2918 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2918 - endOffset: 2950 -- name: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' - startOffset: 2950 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2950 - endOffset: 3213 -- name: 'Model monitoring with Evidently: drift detection and integration plans' - startOffset: 3213 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3213 - endOffset: 3311 -- name: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' - startOffset: 3311 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3311 - endOffset: 3429 -- name: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' - startOffset: 3429 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3429 - endOffset: 3660 -- name: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' - startOffset: 3660 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3660 - endOffset: 3614 --- Links: diff --git a/_podcast/s16e04-from-marketing-to-product-owner-in-search.md b/_podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md similarity index 95% rename from _podcast/s16e04-from-marketing-to-product-owner-in-search.md rename to _podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md index 900c5c52..22669225 100644 --- a/_podcast/s16e04-from-marketing-to-product-owner-in-search.md +++ b/_podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md @@ -1,20 +1,105 @@ --- +title: "Context: A marketer-turned-product owner describes moving from performance marketing into product roles at AUTODOC, relocating to Germany, learning product ownership on the job, forming a dedicated e-commerce search team, choosing processes (Scrum vs Kanban), structuring one-on-ones, recruiting remotely, upskilling in search and NLP, and leveraging marketing strengths for user insight, internal influence, and roadmapping—plus recommended resources and closing advice on breaking barriers and continual learning. + +Core narrative: Adaptive translational leadership—using marketing-honed user empathy, communication, and persuasion as the bridge to technical product impact: learning rapidly on the job, shaping pragmatic team structures and processes, recruiting and upskilling to close technical gaps (search/NLP), and continuously experimenting to deliver user-centered e‑commerce solutions." +short: From Marketing to Product Owner in Search +season: 16 episode: 4 guests: - lerakaimashnikova +image: images/podcast/s16e04-from-marketing-to-product-owner-in-search.jpg ids: anchor: atatalksclub/episodes/From-Marketing-to-Product-Owner-in-Search---Lera-Kaimashnkova-e2b33qt youtube: -HbQQ_bVdfE -image: images/podcast/s16e04-from-marketing-to-product-owner-in-search.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Marketing-to-Product-Owner-in-Search---Lera-Kaimashnkova-e2b33qt apple: https://podcasts.apple.com/us/podcast/from-marketing-to-product-owner-in-search-lera-kaimashn%D1%96kova/id1541710331?i=1000633617858 spotify: https://open.spotify.com/episode/540Mzul8eaulfqettzAHJH?si=OJWEa8NqSIaviV3zMyzL6Q youtube: https://www.youtube.com/watch?v=-HbQQ_bVdfE -season: 16 -short: From Marketing to Product Owner in Search -title: 'From Marketing to Product Owner: Build E-commerce Search with Elasticsearch - & NLP' + +description: Discover e-commerce search with Elasticsearch & NLP, plus product owner tactics, roadmap prioritization, team-building and hiring tips to boost conversions +intro: 'How do you move from performance marketing into a product role and build scalable e-commerce search with Elasticsearch and NLP? In this episode, Lera Kaimashnіkova — an e-commerce Product Owner focused on site search optimization, analytics, and conversion — walks through that exact journey. She explains transitioning from B2B marketing to product ownership, landing a PO role at AUTODOC, and relocating to Germany while learning the craft on the job.

You''ll hear practical approaches to structuring one‑on‑ones, owning monitoring, roadmaps, and prioritization, and choosing Scrum for deliveries vs. Kanban for investigations. Lera covers forming a dedicated e‑commerce search team and the technical side of relevance engineering: Elasticsearch, autocomplete, search filters, vehicle selector and part fitment flows, plus NLP and information retrieval learnings from Haystack and ChatGPT experimentation. She also discusses recruiting remote developers, why marketing backgrounds are valuable for product roles, and resources for communication, strategy, and experimentation.

If you’re responsible for site search, product discovery, or making the leap into product ownership, this episode delivers concrete tactics for improving relevance, boosting conversion rates, and growing your technical and team capabilities.' +dateadded: 2023-11-05 + +duration: PT01H02M19S + +quotableClips: +- name: Podcast Introduction & Guest Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=0 + endOffset: 111 +- name: 'Background: Transition from performance marketing to product roles' + startOffset: 111 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=111 + endOffset: 134 +- name: 'Marketing Experience: B2B e‑commerce, lead acquisition, branding' + startOffset: 134 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=134 + endOffset: 566 +- name: Landing Product Owner Role at AUTODOC Despite Non‑traditional Fit + startOffset: 566 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=566 + endOffset: 623 +- name: 'Relocation: Moving from Ukraine to Germany during 2020' + startOffset: 623 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=623 + endOffset: 704 +- name: 'Transition Challenges: Learning product ownership on the job' + startOffset: 704 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=704 + endOffset: 996 +- name: 'One‑on‑Ones: Structuring meetings to align with engineers and QA' + startOffset: 996 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=996 + endOffset: 1347 +- name: 'Product Owner Scope: Monitoring, roadmap, prioritization, team operations' + startOffset: 1347 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1347 + endOffset: 1727 +- name: 'Process Choices: Scrum for deliveries vs Kanban for investigations' + startOffset: 1727 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1727 + endOffset: 1790 +- name: 'Team Building: Forming a dedicated e‑commerce search team' + startOffset: 1790 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1790 + endOffset: 2072 +- name: 'Search Expertise: Relevant Search book, Elasticsearch, and relevance as business + context' + startOffset: 2072 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2072 + endOffset: 2348 +- name: 'User Journey: Vehicle selector, part fitment, and contextual search flows' + startOffset: 2348 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2348 + endOffset: 2571 +- name: 'Technical Upskilling: NLP, information retrieval, Haystack conference, and + ChatGPT' + startOffset: 2571 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2571 + endOffset: 2802 +- name: 'Recruiting: Remote roles, office hubs, and open developer positions' + startOffset: 2802 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2802 + endOffset: 2909 +- name: 'Hiring Criteria: Why marketing backgrounds are valued for product roles' + startOffset: 2909 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2909 + endOffset: 3136 +- name: 'Marketing Strengths: User understanding, internal PR, and pitching roadmaps' + startOffset: 3136 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3136 + endOffset: 3486 +- name: 'Recommended Resources: Communication, Professional Product Owner, strategy, + experimentation' + startOffset: 3486 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3486 + endOffset: 3775 +- name: 'Closing Advice: Breaking mental barriers and committing to continuous learning' + startOffset: 3775 + url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3775 + endOffset: 3739 + transcript: - header: Podcast Introduction & Guest Welcome - line: This week, we'll talk about transitioning from marketing to being a product @@ -973,102 +1058,6 @@ transcript: sec: 3818 time: '1:03:38' who: Alexey -description: Discover e-commerce search with Elasticsearch & NLP, plus product owner - tactics, roadmap prioritization, team-building and hiring tips to boost conversions. -intro: 'How do you move from performance marketing into a product role and build scalable - e-commerce search with Elasticsearch and NLP? In this episode, Lera Kaimashnіkova - — an e-commerce Product Owner focused on site search optimization, analytics, and - conversion — walks through that exact journey. She explains transitioning from B2B - marketing to product ownership, landing a PO role at AUTODOC, and relocating to - Germany while learning the craft on the job.

You''ll hear practical approaches - to structuring one‑on‑ones, owning monitoring, roadmaps, and prioritization, and - choosing Scrum for deliveries vs. Kanban for investigations. Lera covers forming - a dedicated e‑commerce search team and the technical side of relevance engineering: - Elasticsearch, autocomplete, search filters, vehicle selector and part fitment flows, - plus NLP and information retrieval learnings from Haystack and ChatGPT experimentation. - She also discusses recruiting remote developers, why marketing backgrounds are valuable - for product roles, and resources for communication, strategy, and experimentation. -

If you’re responsible for site search, product discovery, or making the - leap into product ownership, this episode delivers concrete tactics for improving - relevance, boosting conversion rates, and growing your technical and team capabilities.' -dateadded: '2023-11-05' -duration: PT01H02M19S -quotableClips: -- name: Podcast Introduction & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=0 - endOffset: 111 -- name: 'Background: Transition from performance marketing to product roles' - startOffset: 111 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=111 - endOffset: 134 -- name: 'Marketing Experience: B2B e‑commerce, lead acquisition, branding' - startOffset: 134 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=134 - endOffset: 566 -- name: Landing Product Owner Role at AUTODOC Despite Non‑traditional Fit - startOffset: 566 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=566 - endOffset: 623 -- name: 'Relocation: Moving from Ukraine to Germany during 2020' - startOffset: 623 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=623 - endOffset: 704 -- name: 'Transition Challenges: Learning product ownership on the job' - startOffset: 704 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=704 - endOffset: 996 -- name: 'One‑on‑Ones: Structuring meetings to align with engineers and QA' - startOffset: 996 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=996 - endOffset: 1347 -- name: 'Product Owner Scope: Monitoring, roadmap, prioritization, team operations' - startOffset: 1347 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1347 - endOffset: 1727 -- name: 'Process Choices: Scrum for deliveries vs Kanban for investigations' - startOffset: 1727 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1727 - endOffset: 1790 -- name: 'Team Building: Forming a dedicated e‑commerce search team' - startOffset: 1790 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1790 - endOffset: 2072 -- name: 'Search Expertise: Relevant Search book, Elasticsearch, and relevance as business - context' - startOffset: 2072 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2072 - endOffset: 2348 -- name: 'User Journey: Vehicle selector, part fitment, and contextual search flows' - startOffset: 2348 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2348 - endOffset: 2571 -- name: 'Technical Upskilling: NLP, information retrieval, Haystack conference, and - ChatGPT' - startOffset: 2571 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2571 - endOffset: 2802 -- name: 'Recruiting: Remote roles, office hubs, and open developer positions' - startOffset: 2802 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2802 - endOffset: 2909 -- name: 'Hiring Criteria: Why marketing backgrounds are valued for product roles' - startOffset: 2909 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2909 - endOffset: 3136 -- name: 'Marketing Strengths: User understanding, internal PR, and pitching roadmaps' - startOffset: 3136 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3136 - endOffset: 3486 -- name: 'Recommended Resources: Communication, Professional Product Owner, strategy, - experimentation' - startOffset: 3486 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3486 - endOffset: 3775 -- name: 'Closing Advice: Breaking mental barriers and committing to continuous learning' - startOffset: 3775 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3775 - endOffset: 3739 --- Links: diff --git a/_podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md b/_podcast/to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md similarity index 96% rename from _podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md rename to _podcast/to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md index b9930a32..64d5f520 100644 --- a/_podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md +++ b/_podcast/to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md @@ -1,21 +1,143 @@ --- +title: "Context: The episode follows a journey from academic foundations in economics, Chinese, and statistics through industry research and platform data roles to independent generative-AI consultancy, touching on technical approaches (SOTA models, model‑in‑the‑loop annotation, evaluation), product and business priorities, client acquisition and pitching, entrepreneurship realities, and community support initiatives. + +Core: The unifying idea is translating deep technical expertise into pragmatic, production‑oriented generative-AI solutions that deliver measurable business impact—anchored in rigorous evaluation, stakeholder ownership, evidence-based communication (workshops, decks, case studies), continual learning, and a commitment to accessibility and mentorship while managing the practicalities of running a sustainable freelance practice." +short: From a Research Scientist at Amazon to a Machine learning/AI Consultant +season: 16 episode: 5 guests: - verenaweber -date: 2025-11-07 +image: images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg ids: anchor: atatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr youtube: 4RargY8iOaE -image: images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr apple: https://podcasts.apple.com/us/podcast/from-a-research-scientist-at-amazon-to-a/id1541710331?i=1000634411188 spotify: https://open.spotify.com/episode/7gJI3ds3k1vXd3m3W9iRj9?si=oG6A7BuTSjaEoH6FhvEVug youtube: https://www.youtube.com/watch?v=4RargY8iOaE -season: 16 -short: From a Research Scientist at Amazon to a Machine learning/AI Consultant -title: 'Launch a Generative AI Freelance Business: NLP, Model-in-the-Loop Annotation - & Client Pitch' + +description: 'Discover how to launch a generative AI freelance business: NLP services, model-in-the-loop annotation, pitch-deck strategies, client leads & scalable workshops.' +intro: 'How do you turn NLP research experience into a viable generative AI freelance business — and how do you actually win clients? In this episode, Verena Weber, a former Research Scientist at Alexa AI with 7+ years in machine learning and a background in statistics, walks through that transition and the practical work that sells. We cover launching a freelance generative AI business, designing and running model-in-the-loop annotation studies (why they save time and improve consistency), model evaluation strategies for stabilizing high‑traffic utterances, and how to package offerings like generative AI workshops and use‑case discovery.

Verena also breaks down the nuts-and-bolts of client acquisition: crafting pitch decks (long and short formats), positioning, evidence and rates, LinkedIn visibility, network referrals, events and mentorship. She doesn’t skip the realities of self-employment — taxes, health insurance and admin — or content strategies to showcase expertise (technical posts, papers, side projects). Listen to learn concrete steps for becoming an NLP consultant, running annotation experiments that scale, and pitching value to SMEs and product teams.' +dateadded: 2023-11-12 +date: 2025-11-07 + +duration: PT00H59M53S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=0 + endOffset: 106 +- name: 'Early Education: From Economics & Chinese to Statistics' + startOffset: 106 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=106 + endOffset: 189 +- name: Discovering Data Science During Master’s Studies + startOffset: 189 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=189 + endOffset: 219 +- name: 'Career Progression: Consulting, In‑house Roles, and Platform Data' + startOffset: 219 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=219 + endOffset: 397 +- name: 'Freelance Transition: Becoming a Generative AI Consultant' + startOffset: 397 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=397 + endOffset: 416 +- name: 'Landing a Research Role Without a PhD: Hiring Dynamics' + startOffset: 416 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=416 + endOffset: 641 +- name: 'Amazon Research: Customer‑Focused, Production‑Oriented Work' + startOffset: 641 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=641 + endOffset: 702 +- name: Using State‑of‑the‑Art Models and Publishing in Industry Tracks + startOffset: 702 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=702 + endOffset: 986 +- name: Prioritizing Business Impact Over Publication Counts + startOffset: 986 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=986 + endOffset: 1069 +- name: 'Research Output: Project Cadence and Paper Frequency' + startOffset: 1069 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1069 + endOffset: 1147 +- name: 'Project Leadership: Ownership, Stakeholders, and Delivery' + startOffset: 1147 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1147 + endOffset: 1391 +- name: 'Model‑in‑the‑Loop Annotation Study: Design and Rationale' + startOffset: 1391 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1391 + endOffset: 1520 +- name: 'Annotation Outcomes: Time Savings and Improved Consistency' + startOffset: 1520 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1520 + endOffset: 1667 +- name: Model Evaluation Strategy and Stabilizing High‑Traffic Utterances + startOffset: 1667 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1667 + endOffset: 1903 +- name: 'Why Freelance: Impact on SMEs, Flexibility, and Entrepreneurship' + startOffset: 1903 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1903 + endOffset: 1927 +- name: 'Service Offerings: Generative AI Workshops and Use‑Case Discovery' + startOffset: 1927 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1927 + endOffset: 2135 +- name: 'Supporting Women in AI: Goals and Planned Initiatives' + startOffset: 2135 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2135 + endOffset: 2275 +- name: 'Self‑Employment Realities: Taxes, Health Insurance, and Admin' + startOffset: 2275 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2275 + endOffset: 2343 +- name: 'Crafting a Pitch Deck: Positioning, Evidence, and Rates' + startOffset: 2343 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2343 + endOffset: 2519 +- name: 'Finding Clients: Network Conversations, Mentorship, and Events' + startOffset: 2519 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2519 + endOffset: 2871 +- name: 'Choosing Generative AI: NLP Passion and Market Opportunity' + startOffset: 2871 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2871 + endOffset: 2948 +- name: 'Tailoring the Deck: Long Format, Short Versions, and Website' + startOffset: 2948 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2948 + endOffset: 3102 +- name: 'Early Leads: LinkedIn Visibility and Network Referrals' + startOffset: 3102 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3102 + endOffset: 3154 +- name: 'Content Strategy: Technical Posts, Papers, and Personal Growth' + startOffset: 3154 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3154 + endOffset: 3213 +- name: 'Side Projects & Wellbeing: Sound Baths and Creative Outlets' + startOffset: 3213 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3213 + endOffset: 3306 +- name: 'Educational Foundation: Statistics, Probability, and Reading Papers' + startOffset: 3306 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3306 + endOffset: 3473 +- name: 'Recommended Resources: Books and Podcasts' + startOffset: 3473 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3473 + endOffset: 3639 +- name: Closing Remarks and Contact Information + startOffset: 3639 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3639 + endOffset: 3593 + transcript: - header: Episode Introduction - line: This week, we'll talk about being a research scientist at Amazon, and transitioning @@ -1232,138 +1354,6 @@ transcript: sec: 3656 time: '1:00:56' who: Alexey -intro: 'How do you turn NLP research experience into a viable generative AI freelance - business — and how do you actually win clients? In this episode, Verena Weber, a - former Research Scientist at Alexa AI with 7+ years in machine learning and a background - in statistics, walks through that transition and the practical work that sells. - We cover launching a freelance generative AI business, designing and running model-in-the-loop - annotation studies (why they save time and improve consistency), model evaluation - strategies for stabilizing high‑traffic utterances, and how to package offerings - like generative AI workshops and use‑case discovery.

Verena also breaks - down the nuts-and-bolts of client acquisition: crafting pitch decks (long and short - formats), positioning, evidence and rates, LinkedIn visibility, network referrals, - events and mentorship. She doesn’t skip the realities of self-employment — taxes, - health insurance and admin — or content strategies to showcase expertise (technical - posts, papers, side projects). Listen to learn concrete steps for becoming an NLP - consultant, running annotation experiments that scale, and pitching value to SMEs - and product teams.' -description: 'Discover how to launch a generative AI freelance business: NLP services, - model-in-the-loop annotation, pitch-deck strategies, client leads & scalable workshops.' -dateadded: '2023-11-12' -duration: PT00H59M53S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=0 - endOffset: 106 -- name: 'Early Education: From Economics & Chinese to Statistics' - startOffset: 106 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=106 - endOffset: 189 -- name: Discovering Data Science During Master’s Studies - startOffset: 189 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=189 - endOffset: 219 -- name: 'Career Progression: Consulting, In‑house Roles, and Platform Data' - startOffset: 219 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=219 - endOffset: 397 -- name: 'Freelance Transition: Becoming a Generative AI Consultant' - startOffset: 397 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=397 - endOffset: 416 -- name: 'Landing a Research Role Without a PhD: Hiring Dynamics' - startOffset: 416 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=416 - endOffset: 641 -- name: 'Amazon Research: Customer‑Focused, Production‑Oriented Work' - startOffset: 641 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=641 - endOffset: 702 -- name: Using State‑of‑the‑Art Models and Publishing in Industry Tracks - startOffset: 702 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=702 - endOffset: 986 -- name: Prioritizing Business Impact Over Publication Counts - startOffset: 986 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=986 - endOffset: 1069 -- name: 'Research Output: Project Cadence and Paper Frequency' - startOffset: 1069 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1069 - endOffset: 1147 -- name: 'Project Leadership: Ownership, Stakeholders, and Delivery' - startOffset: 1147 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1147 - endOffset: 1391 -- name: 'Model‑in‑the‑Loop Annotation Study: Design and Rationale' - startOffset: 1391 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1391 - endOffset: 1520 -- name: 'Annotation Outcomes: Time Savings and Improved Consistency' - startOffset: 1520 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1520 - endOffset: 1667 -- name: Model Evaluation Strategy and Stabilizing High‑Traffic Utterances - startOffset: 1667 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1667 - endOffset: 1903 -- name: 'Why Freelance: Impact on SMEs, Flexibility, and Entrepreneurship' - startOffset: 1903 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1903 - endOffset: 1927 -- name: 'Service Offerings: Generative AI Workshops and Use‑Case Discovery' - startOffset: 1927 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1927 - endOffset: 2135 -- name: 'Supporting Women in AI: Goals and Planned Initiatives' - startOffset: 2135 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2135 - endOffset: 2275 -- name: 'Self‑Employment Realities: Taxes, Health Insurance, and Admin' - startOffset: 2275 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2275 - endOffset: 2343 -- name: 'Crafting a Pitch Deck: Positioning, Evidence, and Rates' - startOffset: 2343 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2343 - endOffset: 2519 -- name: 'Finding Clients: Network Conversations, Mentorship, and Events' - startOffset: 2519 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2519 - endOffset: 2871 -- name: 'Choosing Generative AI: NLP Passion and Market Opportunity' - startOffset: 2871 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2871 - endOffset: 2948 -- name: 'Tailoring the Deck: Long Format, Short Versions, and Website' - startOffset: 2948 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2948 - endOffset: 3102 -- name: 'Early Leads: LinkedIn Visibility and Network Referrals' - startOffset: 3102 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3102 - endOffset: 3154 -- name: 'Content Strategy: Technical Posts, Papers, and Personal Growth' - startOffset: 3154 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3154 - endOffset: 3213 -- name: 'Side Projects & Wellbeing: Sound Baths and Creative Outlets' - startOffset: 3213 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3213 - endOffset: 3306 -- name: 'Educational Foundation: Statistics, Probability, and Reading Papers' - startOffset: 3306 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3306 - endOffset: 3473 -- name: 'Recommended Resources: Books and Podcasts' - startOffset: 3473 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3473 - endOffset: 3639 -- name: Closing Remarks and Contact Information - startOffset: 3639 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3639 - endOffset: 3593 --- Links: diff --git a/_podcast/s16e06-unwritten-rules-for-success-in-machine-learning.md b/_podcast/to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md similarity index 96% rename from _podcast/s16e06-unwritten-rules-for-success-in-machine-learning.md rename to _podcast/to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md index 4b785f47..abfab51d 100644 --- a/_podcast/s16e06-unwritten-rules-for-success-in-machine-learning.md +++ b/_podcast/to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md @@ -1,19 +1,126 @@ --- +title: "Context — A career arc from software engineer to VP of ML frames concrete stories about promotion, informal leadership, stakeholder selling, demo-driven buy‑in, rapid prototyping, baseline-first experiments, domain immersion, and building full‑stack production capabilities. + +Core narrative — Success in applied machine learning is not primarily about squeezing marginal accuracy from models but about bridging technical craft and business impact: become a product‑focused, full‑stack practitioner and leader who rapidly validates hypotheses with simple baselines and demos, speaks the language of stakeholders, builds trust and reputation, communicates trade‑offs clearly, and embeds ML into real user workflows so technical work directly drives measurable outcomes." +short: The Unwritten Rules for Success in Machine Learning +season: 16 episode: 6 guests: - jackblandin +image: images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg ids: anchor: atatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk youtube: su2M058m3Lw -image: images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk apple: https://podcasts.apple.com/us/podcast/the-unwritten-rules-for-success-in-machine-learning/id1541710331?i=1000635206953 spotify: https://open.spotify.com/episode/2c8E0hZ02osih7ljEB6I6f?si=lSPp07r4TgmpGQey0cUjsA youtube: https://www.youtube.com/watch?v=su2M058m3Lw -season: 16 -short: The Unwritten Rules for Success in Machine Learning -title: 'From Engineer to VP of ML: How to Lead, Sell, and Ship Actionable ML Products' + +description: 'Discover how to lead and ship actionable ML products: master stakeholder communication, rapid POCs, demo design, and full‑stack ML to deliver business impact.' +intro: 'How do you move from software engineer to VP of Machine Learning while learning to lead, sell, and ship ML products that actually change outcomes? In this episode Jack Blandin—now VP of Data Science & Machine Learning at Fi, who transitioned from full‑stack engineering to data science and has managed teams of 2–15—walks through that exact journey.

We cover Jack’s career pivot and early leadership lessons, practical approaches to problem framing and reputation management, and how to speak the language of stakeholders (CAC, KPIs) to win buy‑in for ML projects. You’ll hear concrete tactics for selling ML: fast POCs and user‑centric demos (Gradio, Streamlit), starting with baseline heuristics and manual processes, running quick hypothesis validation experiments, and communicating model trade‑offs without obsessing over raw accuracy. Jack also explains the importance of domain immersion, full‑stack engineering for production ML, and prioritizing actionability over accuracy—illustrated by a churn model lesson.

If you lead or ship ML products, this episode delivers actionable guidance on machine learning leadership, rapid prototyping, demo design, and stakeholder communication to move models from prototype to product.' +dateadded: 2023-11-20 + +duration: PT00H53M23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=0 + endOffset: 13 +- name: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' + startOffset: 13 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=13 + endOffset: 64 +- name: 'Career Pivot: Transition from full‑stack engineering to data science' + startOffset: 64 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=64 + endOffset: 161 +- name: 'Early Leadership: Informal management and promotion at GoHealth' + startOffset: 161 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=161 + endOffset: 287 +- name: 'Rapid Advancement: Reflections on moving from IC to manager' + startOffset: 287 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=287 + endOffset: 413 +- name: 'Leadership Learning: Trial‑and‑error development of soft skills' + startOffset: 413 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=413 + endOffset: 541 +- name: 'Problem Framing: Technical context and product‑level understanding' + startOffset: 541 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=541 + endOffset: 693 +- name: 'Reputation Management: Building respect, trust, and influence' + startOffset: 693 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=693 + endOffset: 925 +- name: 'Stakeholder Communication: Speaking marketing language (CAC, KPIs)' + startOffset: 925 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=925 + endOffset: 1042 +- name: 'ML Project Complexity: Resource needs and cross‑functional buy‑in' + startOffset: 1042 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1042 + endOffset: 1248 +- name: 'Selling ML: Fast POCs and demos to generate stakeholder support' + startOffset: 1248 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1248 + endOffset: 1398 +- name: 'Demo Design: Visualizations and user‑centric proof‑of‑concepts' + startOffset: 1398 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1398 + endOffset: 1575 +- name: 'Risk Communication: Explaining model trade‑offs without raw accuracy' + startOffset: 1575 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1575 + endOffset: 1697 +- name: 'Rapid Prototyping Tools: Gradio, Streamlit, and lightweight demos' + startOffset: 1697 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1697 + endOffset: 1726 +- name: 'Baseline First: Start with heuristics and manual processes before ML' + startOffset: 1726 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1726 + endOffset: 1863 +- name: 'Hypothesis Validation: Quick experiments to test product assumptions' + startOffset: 1863 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1863 + endOffset: 2049 +- name: 'Actionability Over Accuracy: Churn model lesson on usable insights' + startOffset: 2049 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2049 + endOffset: 2204 +- name: 'Outcome Focus: Avoiding technical tunnel vision on ML tuning' + startOffset: 2204 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2204 + endOffset: 2254 +- name: 'Data Generative Process: Treating data as a shadow of reality' + startOffset: 2254 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2254 + endOffset: 2437 +- name: 'Domain Immersion: Customer empathy through product usage' + startOffset: 2437 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2437 + endOffset: 2675 +- name: 'Full‑Stack ML: Importance of software engineering for production ML' + startOffset: 2675 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2675 + endOffset: 2878 +- name: 'Content & Community: Daily LinkedIn posts and where to follow Jack' + startOffset: 2878 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2878 + endOffset: 3037 +- name: 'New Venture: Reimagining hiring and recruiting for ML/data roles' + startOffset: 3037 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3037 + endOffset: 3182 +- name: Episode Wrap‑Up and Final Remarks + startOffset: 3182 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3182 + endOffset: 3203 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' @@ -1134,122 +1241,6 @@ transcript: sec: 3216 time: '53:36' who: Jack -description: 'Discover how to lead and ship actionable ML products: master stakeholder - communication, rapid POCs, demo design, and full‑stack ML to deliver business impact.' -intro: 'How do you move from software engineer to VP of Machine Learning while learning - to lead, sell, and ship ML products that actually change outcomes? In this episode - Jack Blandin—now VP of Data Science & Machine Learning at Fi, who transitioned from - full‑stack engineering to data science and has managed teams of 2–15—walks through - that exact journey.

We cover Jack’s career pivot and early leadership lessons, - practical approaches to problem framing and reputation management, and how to speak - the language of stakeholders (CAC, KPIs) to win buy‑in for ML projects. You’ll hear - concrete tactics for selling ML: fast POCs and user‑centric demos (Gradio, Streamlit), - starting with baseline heuristics and manual processes, running quick hypothesis - validation experiments, and communicating model trade‑offs without obsessing over - raw accuracy. Jack also explains the importance of domain immersion, full‑stack - engineering for production ML, and prioritizing actionability over accuracy—illustrated - by a churn model lesson.

If you lead or ship ML products, this episode - delivers actionable guidance on machine learning leadership, rapid prototyping, - demo design, and stakeholder communication to move models from prototype to product.' -dateadded: '2023-11-20' -duration: PT00H53M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=0 - endOffset: 13 -- name: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' - startOffset: 13 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=13 - endOffset: 64 -- name: 'Career Pivot: Transition from full‑stack engineering to data science' - startOffset: 64 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=64 - endOffset: 161 -- name: 'Early Leadership: Informal management and promotion at GoHealth' - startOffset: 161 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=161 - endOffset: 287 -- name: 'Rapid Advancement: Reflections on moving from IC to manager' - startOffset: 287 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=287 - endOffset: 413 -- name: 'Leadership Learning: Trial‑and‑error development of soft skills' - startOffset: 413 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=413 - endOffset: 541 -- name: 'Problem Framing: Technical context and product‑level understanding' - startOffset: 541 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=541 - endOffset: 693 -- name: 'Reputation Management: Building respect, trust, and influence' - startOffset: 693 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=693 - endOffset: 925 -- name: 'Stakeholder Communication: Speaking marketing language (CAC, KPIs)' - startOffset: 925 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=925 - endOffset: 1042 -- name: 'ML Project Complexity: Resource needs and cross‑functional buy‑in' - startOffset: 1042 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1042 - endOffset: 1248 -- name: 'Selling ML: Fast POCs and demos to generate stakeholder support' - startOffset: 1248 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1248 - endOffset: 1398 -- name: 'Demo Design: Visualizations and user‑centric proof‑of‑concepts' - startOffset: 1398 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1398 - endOffset: 1575 -- name: 'Risk Communication: Explaining model trade‑offs without raw accuracy' - startOffset: 1575 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1575 - endOffset: 1697 -- name: 'Rapid Prototyping Tools: Gradio, Streamlit, and lightweight demos' - startOffset: 1697 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1697 - endOffset: 1726 -- name: 'Baseline First: Start with heuristics and manual processes before ML' - startOffset: 1726 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1726 - endOffset: 1863 -- name: 'Hypothesis Validation: Quick experiments to test product assumptions' - startOffset: 1863 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1863 - endOffset: 2049 -- name: 'Actionability Over Accuracy: Churn model lesson on usable insights' - startOffset: 2049 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2049 - endOffset: 2204 -- name: 'Outcome Focus: Avoiding technical tunnel vision on ML tuning' - startOffset: 2204 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2204 - endOffset: 2254 -- name: 'Data Generative Process: Treating data as a shadow of reality' - startOffset: 2254 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2254 - endOffset: 2437 -- name: 'Domain Immersion: Customer empathy through product usage' - startOffset: 2437 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2437 - endOffset: 2675 -- name: 'Full‑Stack ML: Importance of software engineering for production ML' - startOffset: 2675 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2675 - endOffset: 2878 -- name: 'Content & Community: Daily LinkedIn posts and where to follow Jack' - startOffset: 2878 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2878 - endOffset: 3037 -- name: 'New Venture: Reimagining hiring and recruiting for ML/data roles' - startOffset: 3037 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3037 - endOffset: 3182 -- name: Episode Wrap‑Up and Final Remarks - startOffset: 3182 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3182 - endOffset: 3203 --- Links: diff --git a/_podcast/s16e07-cracking-code-machine-learning-made-understandable.md b/_podcast/to-update/s16e07-cracking-code-machine-learning-made-understandable.md similarity index 96% rename from _podcast/s16e07-cracking-code-machine-learning-made-understandable.md rename to _podcast/to-update/s16e07-cracking-code-machine-learning-made-understandable.md index b13537c7..62e85035 100644 --- a/_podcast/s16e07-cracking-code-machine-learning-made-understandable.md +++ b/_podcast/to-update/s16e07-cracking-code-machine-learning-made-understandable.md @@ -1,20 +1,131 @@ --- +title: "Context: Christoph Molnar’s journey from statistician and Kaggle competitor to full‑time technical author frames a consistent practice: hands‑on modeling, careful documentation, and public, iterative teaching about interpretable machine learning techniques (SHAP, conformal prediction, etc.), plus the practical mechanics of publishing and staying current. + +Core narrative: At the episode’s center is the idea that trustworthy, useful machine learning emerges not from opaque accuracy chasing but from a disciplined loop of hands‑on experimentation, clear interpretation, and open communication — using interpretable methods and calibrated uncertainty to debug and understand models, keeping meticulous logs and competitions to stay sharp, and publishing incrementally (with feedback and transparency) to teach others while refining your own understanding. This unified through‑line ties together the technical tools, the writing and publishing choices, and the everyday workflows that make complex ML accessible, reproducible, and actionable." +short: 'Cracking the Code: Machine Learning Made Understandable' +season: 16 episode: 7 guests: - christophmolnar +image: images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg ids: anchor: atatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 youtube: LBuGzyOkx7c -image: images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 apple: https://podcasts.apple.com/us/podcast/cracking-the-code-machine-learning-made/id1541710331?i=1000636448000 spotify: https://open.spotify.com/episode/3SjDB0E2of9IS9TXn2Fof3?si=FwWH99FGTgmL1OGI3-sLAg youtube: https://www.youtube.com/watch?v=LBuGzyOkx7c -season: 16 -short: 'Cracking the Code: Machine Learning Made Understandable' -title: 'Interpretable ML & Technical Writing: SHAP, Conformal Prediction, Python & - Self-Publishing' + +description: Discover Interpretable ML, SHAP and Conformal Prediction with Python examples and self-publishing tips, debug models, calibrate uncertainty, and publish +intro: How can we make machine learning interpretable in practice — and how do you turn that expertise into clear, usable technical writing? In this episode, Christoph Molnar, statistician, machine learner, and author of Interpretable ML, walks through the tools and workflows he uses to answer that question.

Christoph traces his path from statistics and Kaggle competitions to becoming a full‑time technical writer, and drills into core topics like SHAP for debugging models, conformal prediction for calibrated uncertainty and prediction sets, and practical Python examples. We also cover interpretability vs. accuracy, terminology around explainable AI, and keeping skills sharp through competitions and an Obsidian logbook.

On the writing side, Christoph explains his chapter‑by‑chapter “publishing in public” workflow, self‑publishing choices (Leanpub, Amazon KDP, print‑on‑demand), feedback strategies with beta readers, and advice for aspiring technical writers. Listen for actionable guidance on applying interpretable machine learning techniques and concrete steps for turning technical work into publishable, useful content +dateadded: 2023-11-27 + +duration: PT00H56M20S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=0 + endOffset: 42 +- name: 'Guest Intro: Christoph Molnar, Interpretable ML Author' + startOffset: 42 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=42 + endOffset: 92 +- name: 'Career Journey: From Statistics to Tech Writing' + startOffset: 92 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=92 + endOffset: 225 +- name: Becoming a Full‑Time Technical Writer + startOffset: 225 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=225 + endOffset: 397 +- name: 'Kaggle Beginnings: Linear Models to Practical ML' + startOffset: 397 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=397 + endOffset: 470 +- name: 'Origin Story: Interest in Interpretable Machine Learning' + startOffset: 470 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=470 + endOffset: 567 +- name: 'Interpretability vs Accuracy: Debugging Models with SHAP' + startOffset: 567 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=567 + endOffset: 719 +- name: 'Active Competition: River Flow Forecasting Project' + startOffset: 719 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=719 + endOffset: 837 +- name: 'Choosing Book Topics: Audience Data and Personal Curiosity' + startOffset: 837 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=837 + endOffset: 955 +- name: 'Publishing in Public: Chapter‑by‑Chapter Workflow' + startOffset: 955 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=955 + endOffset: 1027 +- name: 'Self‑Publishing vs Publishers: Control, Editors, Royalties' + startOffset: 1027 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1027 + endOffset: 1138 +- name: 'Book Overview: Interpretable ML; Modeling Mindsets; Conformal Prediction; + SHAP' + startOffset: 1138 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1138 + endOffset: 1227 +- name: 'Conformal Prediction: Calibrated Uncertainty and Prediction Sets' + startOffset: 1227 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1227 + endOffset: 1424 +- name: 'SHAP Deep Dive: Practical Guide and Python Examples' + startOffset: 1424 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1424 + endOffset: 1577 +- name: 'Terminology: Explainable AI vs Interpretable Machine Learning' + startOffset: 1577 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1577 + endOffset: 1800 +- name: 'Work Style: Solo Writing, Collaboration, and Co‑authoring' + startOffset: 1800 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1800 + endOffset: 1987 +- name: 'Staying Hands‑On: Competitions to Maintain Practical Skills' + startOffset: 1987 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1987 + endOffset: 2181 +- name: 'Logbook Practice: Obsidian Notes for Experiments and Reflection' + startOffset: 2181 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2181 + endOffset: 2541 +- name: 'Writing Expertise: Teaching to Learn vs Being a Beginner' + startOffset: 2541 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2541 + endOffset: 2691 +- name: 'Feedback Strategy: Open Drafts, Beta Readers, and Iteration' + startOffset: 2691 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2691 + endOffset: 2916 +- name: 'Advice for Aspiring Technical Writers: Start Small and Publish' + startOffset: 2916 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2916 + endOffset: 3000 +- name: 'Becoming a Full‑Time Author: Timeframe, Income, and Workload' + startOffset: 3000 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3000 + endOffset: 3229 +- name: 'Publishing Logistics: Leanpub, Amazon KDP, and Print‑on‑Demand' + startOffset: 3229 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3229 + endOffset: 3376 +- name: 'Where to Find Christoph: Website, Newsletter, and Socials' + startOffset: 3376 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3376 + endOffset: 3413 +- name: Closing Remarks and Episode Wrap‑Up + startOffset: 3413 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3413 + endOffset: 3380 + transcript: - header: Podcast Introduction - header: 'Guest Intro: Christoph Molnar, Interpretable ML Author' @@ -1206,126 +1317,6 @@ transcript: sec: 3422 time: '57:02' who: Alexey -description: Discover Interpretable ML, SHAP and Conformal Prediction with Python - examples and self-publishing tips, debug models, calibrate uncertainty, and publish. -intro: How can we make machine learning interpretable in practice — and how do you - turn that expertise into clear, usable technical writing? In this episode, Christoph - Molnar, statistician, machine learner, and author of Interpretable ML, walks through - the tools and workflows he uses to answer that question.

Christoph traces - his path from statistics and Kaggle competitions to becoming a full‑time technical - writer, and drills into core topics like SHAP for debugging models, conformal prediction - for calibrated uncertainty and prediction sets, and practical Python examples. We - also cover interpretability vs. accuracy, terminology around explainable AI, and - keeping skills sharp through competitions and an Obsidian logbook.

On the - writing side, Christoph explains his chapter‑by‑chapter “publishing in public” workflow, - self‑publishing choices (Leanpub, Amazon KDP, print‑on‑demand), feedback strategies - with beta readers, and advice for aspiring technical writers. Listen for actionable - guidance on applying interpretable machine learning techniques and concrete steps - for turning technical work into publishable, useful content. -dateadded: '2023-11-27' -duration: PT00H56M20S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=0 - endOffset: 42 -- name: 'Guest Intro: Christoph Molnar, Interpretable ML Author' - startOffset: 42 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=42 - endOffset: 92 -- name: 'Career Journey: From Statistics to Tech Writing' - startOffset: 92 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=92 - endOffset: 225 -- name: Becoming a Full‑Time Technical Writer - startOffset: 225 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=225 - endOffset: 397 -- name: 'Kaggle Beginnings: Linear Models to Practical ML' - startOffset: 397 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=397 - endOffset: 470 -- name: 'Origin Story: Interest in Interpretable Machine Learning' - startOffset: 470 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=470 - endOffset: 567 -- name: 'Interpretability vs Accuracy: Debugging Models with SHAP' - startOffset: 567 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=567 - endOffset: 719 -- name: 'Active Competition: River Flow Forecasting Project' - startOffset: 719 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=719 - endOffset: 837 -- name: 'Choosing Book Topics: Audience Data and Personal Curiosity' - startOffset: 837 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=837 - endOffset: 955 -- name: 'Publishing in Public: Chapter‑by‑Chapter Workflow' - startOffset: 955 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=955 - endOffset: 1027 -- name: 'Self‑Publishing vs Publishers: Control, Editors, Royalties' - startOffset: 1027 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1027 - endOffset: 1138 -- name: 'Book Overview: Interpretable ML; Modeling Mindsets; Conformal Prediction; - SHAP' - startOffset: 1138 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1138 - endOffset: 1227 -- name: 'Conformal Prediction: Calibrated Uncertainty and Prediction Sets' - startOffset: 1227 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1227 - endOffset: 1424 -- name: 'SHAP Deep Dive: Practical Guide and Python Examples' - startOffset: 1424 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1424 - endOffset: 1577 -- name: 'Terminology: Explainable AI vs Interpretable Machine Learning' - startOffset: 1577 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1577 - endOffset: 1800 -- name: 'Work Style: Solo Writing, Collaboration, and Co‑authoring' - startOffset: 1800 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1800 - endOffset: 1987 -- name: 'Staying Hands‑On: Competitions to Maintain Practical Skills' - startOffset: 1987 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1987 - endOffset: 2181 -- name: 'Logbook Practice: Obsidian Notes for Experiments and Reflection' - startOffset: 2181 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2181 - endOffset: 2541 -- name: 'Writing Expertise: Teaching to Learn vs Being a Beginner' - startOffset: 2541 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2541 - endOffset: 2691 -- name: 'Feedback Strategy: Open Drafts, Beta Readers, and Iteration' - startOffset: 2691 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2691 - endOffset: 2916 -- name: 'Advice for Aspiring Technical Writers: Start Small and Publish' - startOffset: 2916 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2916 - endOffset: 3000 -- name: 'Becoming a Full‑Time Author: Timeframe, Income, and Workload' - startOffset: 3000 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3000 - endOffset: 3229 -- name: 'Publishing Logistics: Leanpub, Amazon KDP, and Print‑on‑Demand' - startOffset: 3229 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3229 - endOffset: 3376 -- name: 'Where to Find Christoph: Website, Newsletter, and Socials' - startOffset: 3376 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3376 - endOffset: 3413 -- name: Closing Remarks and Episode Wrap‑Up - startOffset: 3413 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3413 - endOffset: 3380 --- Links: diff --git a/_podcast/s16e08-ai-for-digital-health.md b/_podcast/to-update/s16e08-ai-for-digital-health.md similarity index 95% rename from _podcast/s16e08-ai-for-digital-health.md rename to _podcast/to-update/s16e08-ai-for-digital-health.md index 564dbd07..61d3700e 100644 --- a/_podcast/s16e08-ai-for-digital-health.md +++ b/_podcast/to-update/s16e08-ai-for-digital-health.md @@ -1,19 +1,142 @@ --- +title: "Context: The episode traces a founder’s shift from engineering to healthcare entrepreneurship, driven by the opportunity to digitize fragmented medical systems. It covers pragmatic founder tactics (immersion, rapid MVPs, cold outreach), an unusual AR MVP to collect engagement data, and a discovery that everyday lifestyle interactions reveal skin‑health signals. The conversation ties product experimentation and iterative pivots to building a digital clinic flow (diagnosis → prescription → telemedicine), while confronting legacy infrastructure, rural access gaps, ethical UX, and regional go‑to‑market limits. Growth topics — community‑first data strategies, personalization, retention, hiring, fundraising, and monetization via SaaS/partnerships — are framed alongside human considerations like leadership choices and work‑life integration. + +Core theme: Building an ethical, product‑first digital healthcare startup by using rapid experimentation and community‑driven engagement to bootstrap meaningful clinical data and align AI capabilities with real patient workflows and viable business models—solving legacy access and workflow problems regionally, iterating from MVP to product‑market fit, and scaling sustainably while keeping human needs and ethics central." +short: AI for Digital Health +season: 16 episode: 8 guests: - mariabruckert +image: images/podcast/s16e08-ai-for-digital-health.jpg ids: anchor: atatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc youtube: whpkDmVVGUE -image: images/podcast/s16e08-ai-for-digital-health.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc apple: https://podcasts.apple.com/us/podcast/ai-for-digital-health-maria-bruckert/id1541710331?i=1000637212773 spotify: https://open.spotify.com/episode/2NE0vbiYwXxOuqychHIqBR?si=QdRyuJvSRE2V3bLwHaEv-Q youtube: https://www.youtube.com/watch?v=whpkDmVVGUE -season: 16 -short: AI for Digital Health -title: 'Build & Scale a Digital Clinic: AI Skin Health, Telemedicine & AR MVP' + +description: Discover how to build a digital clinic with AI skin health and telemedicine—learn go-to-market, data strategy, monetization, and hiring wins +intro: How do you build and scale a digital clinic that blends AI-driven skin health, telemedicine, and an AR MVP? In this episode, Maria-Liisa Bruckert, Co‑Founder and Co‑CEO of SQIN and recipient of the Google Female Founder Immersion 2020 and Google Play Best of 2020, walks through the practical steps she took to turn an engineering mindset into a digital health business.

We cover industry immersion and MVP development, why healthcare digitization matters, and real operational challenges like data gaps, rural access, and legacy workflows. Maria explains the AR lipstick try-on as a data collection and engagement tactic, how to surface skin health signals from everyday interactions, and aligning AI capabilities with clear business cases. You’ll also hear about building a digital clinic flow from diagnosis to prescription, telemedicine’s role in remote follow-up and efficiency, ethics and inclusive UX, regional go-to-market tactics, data strategy for bootstrapping datasets, and early hiring, fundraising, and monetization approaches.

Listeners interested in digital clinic design, AI skin health, telemedicine implementation, or launching an AR MVP will find actionable tactics and lessons to apply to product-market fit, data strategy, and go-to-market execution +dateadded: 2023-12-03 + +duration: PT00H52M27S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=0 + endOffset: 49 +- name: 'Career Journey: From Electrical Engineering to Founding SQIN' + startOffset: 49 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=49 + endOffset: 125 +- name: 'Founder Approach: Industry Immersion, MVP Development' + startOffset: 125 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=125 + endOffset: 250 +- name: 'Why Healthcare: Digitization Opportunity in Medical Systems' + startOffset: 250 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=250 + endOffset: 307 +- name: 'Healthcare Challenges: Data Gaps, Rural Access, and Legacy Workflows' + startOffset: 307 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=307 + endOffset: 371 +- name: 'Legacy Infrastructure: Fax, Fragmentation, and Slow Adoption' + startOffset: 371 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=371 + endOffset: 493 +- name: 'Regional Perspective: Access Issues in Southern Brandenburg' + startOffset: 493 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=493 + endOffset: 740 +- name: 'Market Research Tactics: Cold Outreach, Accelerators, Clinical Meetings' + startOffset: 740 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=740 + endOffset: 775 +- name: 'AR MVP: Lipstick Try-On as a Data Collection & Engagement Tool' + startOffset: 775 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=775 + endOffset: 949 +- name: 'Discovery: Skin Health Signals Hidden in Lifestyle Interactions' + startOffset: 949 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=949 + endOffset: 1107 +- name: 'Founder Lessons: Experimentation, Pivoting, and Plan B Flexibility' + startOffset: 1107 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1107 + endOffset: 1292 +- name: 'Product-Market Fit: Aligning AI Capabilities with Business Cases' + startOffset: 1292 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1292 + endOffset: 1420 +- name: 'SQIN Product: Digital Clinic Flow from Diagnosis to Prescription' + startOffset: 1420 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1420 + endOffset: 1448 +- name: 'Ethics & UX: Sensitive AI Messaging and Inclusive Design' + startOffset: 1448 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1448 + endOffset: 1665 +- name: 'Go-to-Market Strategy: Regional Focus, Limitations, and Fallbacks' + startOffset: 1665 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1665 + endOffset: 1783 +- name: 'Data Strategy: Leveraging Community Reach to Bootstrap Datasets' + startOffset: 1783 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1783 + endOffset: 1844 +- name: 'Community Productization: Daily Lifestyle Integration & Retention' + startOffset: 1844 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1844 + endOffset: 1960 +- name: 'Audience Expansion: Reaching Multiple Genders and Demographics' + startOffset: 1960 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1960 + endOffset: 2157 +- name: 'Telemedicine Impact: Remote Follow-Up, Prescriptions, and Efficiency' + startOffset: 2157 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2157 + endOffset: 2285 +- name: 'Feedback Loops: Support Channels and User Bug Reporting' + startOffset: 2285 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2285 + endOffset: 2370 +- name: 'Personalization: Archetypes, Gamification, and Educational Content' + startOffset: 2370 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2370 + endOffset: 2492 +- name: 'Company Growth: Team Size, Hiring Needs (AI, Full-Stack, Backend)' + startOffset: 2492 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2492 + endOffset: 2624 +- name: 'Fundraising: Proving Profitability and Technical Credibility to Investors' + startOffset: 2624 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2624 + endOffset: 2768 +- name: 'Monetization: SaaS Integrations, Partnerships, and E‑commerce Cuts' + startOffset: 2768 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2768 + endOffset: 2879 +- name: 'Leadership Structure: First Hires and Product vs. CEO Roles' + startOffset: 2879 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2879 + endOffset: 2914 +- name: 'Work-Life Integration: Parenting While Building a Startup' + startOffset: 2914 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2914 + endOffset: 3067 +- name: 'Cultural Upside: Entrepreneurial Mindset Passed to Children' + startOffset: 3067 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3067 + endOffset: 3138 +- name: Closing Remarks and Next Steps + startOffset: 3138 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3138 + endOffset: 3147 + transcript: - header: Podcast Introduction - line: This week, we will talk about AI for digital healthcare. We have a special @@ -1045,139 +1168,6 @@ transcript: sec: 3147 time: '52:27' who: Maria -description: Discover how to build a digital clinic with AI skin health and telemedicine—learn - go-to-market, data strategy, monetization, and hiring wins. -intro: How do you build and scale a digital clinic that blends AI-driven skin health, - telemedicine, and an AR MVP? In this episode, Maria-Liisa Bruckert, Co‑Founder and - Co‑CEO of SQIN and recipient of the Google Female Founder Immersion 2020 and Google - Play Best of 2020, walks through the practical steps she took to turn an engineering - mindset into a digital health business.

We cover industry immersion and - MVP development, why healthcare digitization matters, and real operational challenges - like data gaps, rural access, and legacy workflows. Maria explains the AR lipstick - try-on as a data collection and engagement tactic, how to surface skin health signals - from everyday interactions, and aligning AI capabilities with clear business cases. - You’ll also hear about building a digital clinic flow from diagnosis to prescription, - telemedicine’s role in remote follow-up and efficiency, ethics and inclusive UX, - regional go-to-market tactics, data strategy for bootstrapping datasets, and early - hiring, fundraising, and monetization approaches.

Listeners interested - in digital clinic design, AI skin health, telemedicine implementation, or launching - an AR MVP will find actionable tactics and lessons to apply to product-market fit, - data strategy, and go-to-market execution. -dateadded: '2023-12-03' -duration: PT00H52M27S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=0 - endOffset: 49 -- name: 'Career Journey: From Electrical Engineering to Founding SQIN' - startOffset: 49 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=49 - endOffset: 125 -- name: 'Founder Approach: Industry Immersion, MVP Development' - startOffset: 125 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=125 - endOffset: 250 -- name: 'Why Healthcare: Digitization Opportunity in Medical Systems' - startOffset: 250 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=250 - endOffset: 307 -- name: 'Healthcare Challenges: Data Gaps, Rural Access, and Legacy Workflows' - startOffset: 307 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=307 - endOffset: 371 -- name: 'Legacy Infrastructure: Fax, Fragmentation, and Slow Adoption' - startOffset: 371 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=371 - endOffset: 493 -- name: 'Regional Perspective: Access Issues in Southern Brandenburg' - startOffset: 493 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=493 - endOffset: 740 -- name: 'Market Research Tactics: Cold Outreach, Accelerators, Clinical Meetings' - startOffset: 740 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=740 - endOffset: 775 -- name: 'AR MVP: Lipstick Try-On as a Data Collection & Engagement Tool' - startOffset: 775 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=775 - endOffset: 949 -- name: 'Discovery: Skin Health Signals Hidden in Lifestyle Interactions' - startOffset: 949 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=949 - endOffset: 1107 -- name: 'Founder Lessons: Experimentation, Pivoting, and Plan B Flexibility' - startOffset: 1107 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1107 - endOffset: 1292 -- name: 'Product-Market Fit: Aligning AI Capabilities with Business Cases' - startOffset: 1292 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1292 - endOffset: 1420 -- name: 'SQIN Product: Digital Clinic Flow from Diagnosis to Prescription' - startOffset: 1420 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1420 - endOffset: 1448 -- name: 'Ethics & UX: Sensitive AI Messaging and Inclusive Design' - startOffset: 1448 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1448 - endOffset: 1665 -- name: 'Go-to-Market Strategy: Regional Focus, Limitations, and Fallbacks' - startOffset: 1665 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1665 - endOffset: 1783 -- name: 'Data Strategy: Leveraging Community Reach to Bootstrap Datasets' - startOffset: 1783 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1783 - endOffset: 1844 -- name: 'Community Productization: Daily Lifestyle Integration & Retention' - startOffset: 1844 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1844 - endOffset: 1960 -- name: 'Audience Expansion: Reaching Multiple Genders and Demographics' - startOffset: 1960 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1960 - endOffset: 2157 -- name: 'Telemedicine Impact: Remote Follow-Up, Prescriptions, and Efficiency' - startOffset: 2157 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2157 - endOffset: 2285 -- name: 'Feedback Loops: Support Channels and User Bug Reporting' - startOffset: 2285 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2285 - endOffset: 2370 -- name: 'Personalization: Archetypes, Gamification, and Educational Content' - startOffset: 2370 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2370 - endOffset: 2492 -- name: 'Company Growth: Team Size, Hiring Needs (AI, Full-Stack, Backend)' - startOffset: 2492 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2492 - endOffset: 2624 -- name: 'Fundraising: Proving Profitability and Technical Credibility to Investors' - startOffset: 2624 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2624 - endOffset: 2768 -- name: 'Monetization: SaaS Integrations, Partnerships, and E‑commerce Cuts' - startOffset: 2768 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2768 - endOffset: 2879 -- name: 'Leadership Structure: First Hires and Product vs. CEO Roles' - startOffset: 2879 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2879 - endOffset: 2914 -- name: 'Work-Life Integration: Parenting While Building a Startup' - startOffset: 2914 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2914 - endOffset: 3067 -- name: 'Cultural Upside: Entrepreneurial Mindset Passed to Children' - startOffset: 3067 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3067 - endOffset: 3138 -- name: Closing Remarks and Next Steps - startOffset: 3138 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3138 - endOffset: 3147 --- Links: diff --git a/_podcast/s16e09-become-data-freelancer.md b/_podcast/to-update/s16e09-become-data-freelancer.md similarity index 97% rename from _podcast/s16e09-become-data-freelancer.md rename to _podcast/to-update/s16e09-become-data-freelancer.md index 25280ce4..923edf45 100644 --- a/_podcast/s16e09-become-data-freelancer.md +++ b/_podcast/to-update/s16e09-become-data-freelancer.md @@ -1,20 +1,137 @@ --- +title: "This episode centers on one clear idea: transitioning from employee to sustainable data freelancer is not a leap of faith but a deliberate, staged business transformation—one that combines technical credibility with market research, proactive outreach, sound pricing and contract choices, client vetting, and financial/legal safeguards so you can manage risk, build repeatable pipelines, and turn independence into a reliable, purpose-driven career." +short: Become a Data Freelancer +season: 16 episode: 9 guests: - dimitrivisnadi -date: 2025-11-07 +image: images/podcast/s16e09-become-data-freelancer.jpg ids: anchor: atatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 youtube: R_EnSa9aZtE -image: images/podcast/s16e09-become-data-freelancer.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 apple: https://podcasts.apple.com/us/podcast/become-a-data-freelancer-dimitri-visnadi/id1541710331?i=1000637962993 spotify: https://open.spotify.com/episode/5OJfRiQ64JtLUmIkvadohg?si=uUEdvZwARN2hVGEfz73URg youtube: https://www.youtube.com/watch?v=R_EnSa9aZtE -season: 16 -short: Become a Data Freelancer -title: 'Launch Your Data Freelancer Career: Pricing, Outreach, Contracts & Risk' + +description: 'Launch your data freelancer career: pricing, outreach & contracts tactics, client vetting, legal risk and runway tips to win steady projects.' +intro: 'How do you move from corporate data roles into a sustainable freelance data career while setting rates, winning clients, and managing legal risk? In this episode Dimitri Visnadi — an independent data consultant who has advised brands like Unilever, Ferrero, Heineken and Red Bull, worked in HP’s data teams and a Google‑partner consulting firm, and holds an MSc in Business Analytics from UCL — walks through the practical steps he used to launch The Data Freelancer.

We cover the full arc of transition: career pivot and early outreach, market research and recruiter channels, pricing strategy across platforms vs direct clients, subcontracting and cutting out middlemen, and the contract risks around dependent contractor status. Dimitri also breaks down client vetting, handling corporate payment delays, recommended runway before quitting, and common pitfalls like mispositioning and mispricing. Listeners will leave with concrete tactics for freelance data consulting — outreach scripts, benchmarking approaches for rates, contract checkpoints, and resources (courses, mentors, newsletters) to reduce risk and build a reliable pipeline. Ideal for aspiring data freelancers seeking practical guidance on pricing, outreach, contracts and risk.' +dateadded: 2023-12-09 +date: 2025-11-07 + +duration: PT00H59M49S + +quotableClips: +- name: Podcast Introduction + startOffset: 91 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=91 + endOffset: 140 +- name: 'Career Path: From Marketing to Data' + startOffset: 140 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=140 + endOffset: 184 +- name: 'Startup Experience: Translation, SQL & User Analysis' + startOffset: 184 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=184 + endOffset: 343 +- name: 'Corporate Analytics: Hewlett Packard Sales BI & KPIs' + startOffset: 343 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=343 + endOffset: 458 +- name: 'Education & Transition: UCL Master''s to Data Scientist' + startOffset: 458 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=458 + endOffset: 548 +- name: 'Consulting Exposure: Google Partner & Consulting Foundations' + startOffset: 548 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=548 + endOffset: 685 +- name: 'Motivation to Freelance: Frustration, Freedom & Purpose' + startOffset: 685 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=685 + endOffset: 809 +- name: 'Making the Leap: Resignation, Outreach & Early Leads' + startOffset: 809 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=809 + endOffset: 953 +- name: 'Market Research: Cold Outreach to Established Freelancers' + startOffset: 953 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=953 + endOffset: 1042 +- name: 'Practical Setup: Mentors, Registration & Logistics' + startOffset: 1042 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1042 + endOffset: 1100 +- name: 'Recruiter Channels: Engaging Agencies Before Launch' + startOffset: 1100 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1100 + endOffset: 1270 +- name: 'Contracting Risks: Dependent Contractor & Legal Considerations' + startOffset: 1270 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1270 + endOffset: 1524 +- name: 'Pricing Strategy: Platforms, Recruiters & Rate Benchmarking' + startOffset: 1524 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1524 + endOffset: 1927 +- name: 'Client Relationships: Building Referrals & Long-Term Pipeline' + startOffset: 1927 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1927 + endOffset: 2024 +- name: 'Proactive Outreach: Creative Self-Marketing Tactics' + startOffset: 2024 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2024 + endOffset: 2137 +- name: 'Independence & Offerings: Freelance Business Model Explained' + startOffset: 2137 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2137 + endOffset: 2230 +- name: 'Contract Formats: Platform Terms vs Direct Agreements' + startOffset: 2230 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2230 + endOffset: 2330 +- name: 'Direct Client Work: Project Pricing, Subcontracting & Cutting the Middleman' + startOffset: 2330 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2330 + endOffset: 2621 +- name: 'Vetting Clients: Ratings, Company Research & Payment Assurance' + startOffset: 2621 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2621 + endOffset: 2785 +- name: 'Payment Challenges: Corporate Bureaucracy & Delays' + startOffset: 2785 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2785 + endOffset: 2905 +- name: 'Transition Paths: Weekend, Part-Time & Full-Time Approaches' + startOffset: 2905 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2905 + endOffset: 3040 +- name: 'Running the Business: Income Variability, Risk & Purpose' + startOffset: 3040 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3040 + endOffset: 3251 +- name: 'Financial Planning: Recommended Runway Before Quitting' + startOffset: 3251 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3251 + endOffset: 3301 +- name: 'Common Pitfalls: Mispositioning, Mispricing & False Expectations' + startOffset: 3301 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3301 + endOffset: 3454 +- name: 'Learning Resources: Marketing Courses, Mentors & Newsletters' + startOffset: 3454 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3454 + endOffset: 3651 +- name: 'Follow-Up: The Data Freelancer Newsletter & Contact Channels' + startOffset: 3651 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3651 + endOffset: 3667 +- name: Episode Wrap-Up + startOffset: 3667 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3667 + endOffset: 3589 + transcript: - header: Podcast Introduction - line: This week, we'll talk about doing data freelancing. We have a very special @@ -1178,134 +1295,6 @@ transcript: sec: 3680 time: '1:01:20' who: Dimitri -intro: 'How do you move from corporate data roles into a sustainable freelance data - career while setting rates, winning clients, and managing legal risk? In this episode - Dimitri Visnadi — an independent data consultant who has advised brands like Unilever, - Ferrero, Heineken and Red Bull, worked in HP’s data teams and a Google‑partner consulting - firm, and holds an MSc in Business Analytics from UCL — walks through the practical - steps he used to launch The Data Freelancer.

We cover the full arc of transition: - career pivot and early outreach, market research and recruiter channels, pricing - strategy across platforms vs direct clients, subcontracting and cutting out middlemen, - and the contract risks around dependent contractor status. Dimitri also breaks down - client vetting, handling corporate payment delays, recommended runway before quitting, - and common pitfalls like mispositioning and mispricing. Listeners will leave with - concrete tactics for freelance data consulting — outreach scripts, benchmarking - approaches for rates, contract checkpoints, and resources (courses, mentors, newsletters) - to reduce risk and build a reliable pipeline. Ideal for aspiring data freelancers - seeking practical guidance on pricing, outreach, contracts and risk.' -description: 'Launch your data freelancer career: pricing, outreach & contracts tactics, - client vetting, legal risk and runway tips to win steady projects.' -dateadded: '2023-12-09' -duration: PT00H59M49S -quotableClips: -- name: Podcast Introduction - startOffset: 91 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=91 - endOffset: 140 -- name: 'Career Path: From Marketing to Data' - startOffset: 140 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=140 - endOffset: 184 -- name: 'Startup Experience: Translation, SQL & User Analysis' - startOffset: 184 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=184 - endOffset: 343 -- name: 'Corporate Analytics: Hewlett Packard Sales BI & KPIs' - startOffset: 343 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=343 - endOffset: 458 -- name: 'Education & Transition: UCL Master''s to Data Scientist' - startOffset: 458 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=458 - endOffset: 548 -- name: 'Consulting Exposure: Google Partner & Consulting Foundations' - startOffset: 548 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=548 - endOffset: 685 -- name: 'Motivation to Freelance: Frustration, Freedom & Purpose' - startOffset: 685 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=685 - endOffset: 809 -- name: 'Making the Leap: Resignation, Outreach & Early Leads' - startOffset: 809 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=809 - endOffset: 953 -- name: 'Market Research: Cold Outreach to Established Freelancers' - startOffset: 953 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=953 - endOffset: 1042 -- name: 'Practical Setup: Mentors, Registration & Logistics' - startOffset: 1042 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1042 - endOffset: 1100 -- name: 'Recruiter Channels: Engaging Agencies Before Launch' - startOffset: 1100 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1100 - endOffset: 1270 -- name: 'Contracting Risks: Dependent Contractor & Legal Considerations' - startOffset: 1270 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1270 - endOffset: 1524 -- name: 'Pricing Strategy: Platforms, Recruiters & Rate Benchmarking' - startOffset: 1524 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1524 - endOffset: 1927 -- name: 'Client Relationships: Building Referrals & Long-Term Pipeline' - startOffset: 1927 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1927 - endOffset: 2024 -- name: 'Proactive Outreach: Creative Self-Marketing Tactics' - startOffset: 2024 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2024 - endOffset: 2137 -- name: 'Independence & Offerings: Freelance Business Model Explained' - startOffset: 2137 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2137 - endOffset: 2230 -- name: 'Contract Formats: Platform Terms vs Direct Agreements' - startOffset: 2230 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2230 - endOffset: 2330 -- name: 'Direct Client Work: Project Pricing, Subcontracting & Cutting the Middleman' - startOffset: 2330 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2330 - endOffset: 2621 -- name: 'Vetting Clients: Ratings, Company Research & Payment Assurance' - startOffset: 2621 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2621 - endOffset: 2785 -- name: 'Payment Challenges: Corporate Bureaucracy & Delays' - startOffset: 2785 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2785 - endOffset: 2905 -- name: 'Transition Paths: Weekend, Part-Time & Full-Time Approaches' - startOffset: 2905 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2905 - endOffset: 3040 -- name: 'Running the Business: Income Variability, Risk & Purpose' - startOffset: 3040 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3040 - endOffset: 3251 -- name: 'Financial Planning: Recommended Runway Before Quitting' - startOffset: 3251 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3251 - endOffset: 3301 -- name: 'Common Pitfalls: Mispositioning, Mispricing & False Expectations' - startOffset: 3301 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3301 - endOffset: 3454 -- name: 'Learning Resources: Marketing Courses, Mentors & Newsletters' - startOffset: 3454 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3454 - endOffset: 3651 -- name: 'Follow-Up: The Data Freelancer Newsletter & Contact Channels' - startOffset: 3651 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3651 - endOffset: 3667 -- name: Episode Wrap-Up - startOffset: 3667 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3667 - endOffset: 3589 --- Links: diff --git a/_podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md b/_podcast/to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md similarity index 97% rename from _podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md rename to _podcast/to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md index f2ba3879..c470d923 100644 --- a/_podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md +++ b/_podcast/to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md @@ -1,12 +1,22 @@ --- +title: "Turning hands‑on consulting and hard‑won data engineering experience into a library‑first, open‑source company that solves a concrete pain—declarative JSON→relational transformations for Python users—by validating through workshops and docs, iterating with real user feedback, and scaling via bottom‑up adoption, ecosystem integrations, and paid complementary offerings rather than agency growth or platform lock‑in." +short: 'The Entrepreneurship Journey: From Freelancing to Starting a Company' +season: 17 episode: 1 guests: - adrianbrudaru -date: 2025-11-07 +image: images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg ids: anchor: atatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k youtube: vOpEQiCsaLw -image: images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg +links: + anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k + apple: https://podcasts.apple.com/us/podcast/the-entrepreneurship-journey-from-freelancing-to/id1541710331?i=1000638715212 + spotify: https://open.spotify.com/episode/7wBmJHSXPHoW0mEIbNDgqr?si=z7klLtveT1ioGi6bg8hR7Q + youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw + +description: Discover building open-source JSON-to-Relational data pipelines in Python, practical DLT patterns, anti-pattern fixes, bootstrap tips to speed adoption +intro: 'How do you build an open-source data company that helps Python developers turn messy JSON into reliable relational tables? In this episode, Adrian Brudaru — an economics-trained, Berlin-based founder who moved from startups to freelancing and now co‑founded a data tooling company — walks through the journey of launching developer-focused open‑source software for data engineering.

We cover why dumping JSON into data warehouses is an anti‑pattern and introduce the core DLT concept: a declarative JSON→relational transformation engine aimed at Python devs. Adrian explains product iteration (engine, abstractions, user feedback), running workshops as a validation loop, treating documentation as a product asset, and practical bootstrapping strategies (savings, consulting revenue, scrappy operations). He also discusses team formation via projects, go‑to‑market tactics with a bottom‑up, library‑first approach, ecosystem partnerships (DocDB integration and joint demos), roadmap plans for a paid complement to the open‑source library, and experiments with source generation like OpenAPI generators for pipelines.

Listen if you want concrete technical and GTM guidance on building an open‑source data company, implementing declarative JSON→relational workflows for Python, and how to validate and scale developer tooling without prematurely becoming a platform.' topics: - entrepreneurship - freelance @@ -15,15 +25,133 @@ topics: - leadership - career growth - consulting -links: - anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k - apple: https://podcasts.apple.com/us/podcast/the-entrepreneurship-journey-from-freelancing-to/id1541710331?i=1000638715212 - spotify: https://open.spotify.com/episode/7wBmJHSXPHoW0mEIbNDgqr?si=z7klLtveT1ioGi6bg8hR7Q - youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw -season: 17 -short: 'The Entrepreneurship Journey: From Freelancing to Starting a Company' -title: 'Launch an Open-Source Data Company: Declarative JSON to Relational DLT for - Python Devs' +dateadded: 2023-12-18 +date: 2025-11-07 + +duration: PT00H59M43S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=0 + endOffset: 113 +- name: 'Episode Overview: Building an Open‑Source Data Company' + startOffset: 113 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=113 + endOffset: 199 +- name: 'Career Origins: 2012 Berlin Startups and Corporate Exit' + startOffset: 199 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=199 + endOffset: 243 +- name: 'Freelancing Experience: Autonomy, Savings, Diverse Projects' + startOffset: 243 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=243 + endOffset: 320 +- name: From Hourly Billing to Project-Based Work and Subcontracting + startOffset: 320 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=320 + endOffset: 438 +- name: 'Freelancing Lifestyle: Flexibility and Long‑Term Boredom' + startOffset: 438 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=438 + endOffset: 526 +- name: 'Subcontracting Growth: Agency‑like Management Tradeoffs' + startOffset: 526 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=526 + endOffset: 651 +- name: 'Agency Challenges: Responsibility, Incentives, and Misalignment' + startOffset: 651 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=651 + endOffset: 751 +- name: Choosing Product Building Over Growing an Agency + startOffset: 751 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=751 + endOffset: 822 +- name: 'Recurring Pain: Stakeholder Alignment vs Technical Setup' + startOffset: 822 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=822 + endOffset: 976 +- name: 'Target Users: Empowering Python Users with Dev Tooling' + startOffset: 976 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=976 + endOffset: 1071 +- name: 'Anti‑patterns: Dumping JSON into Data Warehouses' + startOffset: 1071 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1071 + endOffset: 1178 +- name: 'DLT Concept: Declarative JSON→Relational Transformation' + startOffset: 1178 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1178 + endOffset: 1410 +- name: 'Product Iteration: Engine, Abstractions, and User Feedback' + startOffset: 1410 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1410 + endOffset: 1523 +- name: 'Team Formation: Meeting Co‑founders Through Projects' + startOffset: 1523 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1523 + endOffset: 1659 +- name: 'Founding as Investment: Time, Risk, and Opportunity Cost' + startOffset: 1659 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1659 + endOffset: 1868 +- name: 'Bootstrapping Strategy: Savings, Consulting Revenue, and Payroll' + startOffset: 1868 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1868 + endOffset: 2060 +- name: 'Scrappy Operations: Office Squatting and Cost Management' + startOffset: 2060 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2060 + endOffset: 2160 +- name: 'Workshop Validation: Teaching as a Product Feedback Loop' + startOffset: 2160 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2160 + endOffset: 2248 +- name: 'Workshop Design: Checkpoints, Live Support, and CodeSpaces' + startOffset: 2248 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2248 + endOffset: 2450 +- name: 'Product Identity: DLT as a Developer‑Focused Library' + startOffset: 2450 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2450 + endOffset: 2483 +- name: 'Documentation Investment: When Docs Become Productive Assets' + startOffset: 2483 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2483 + endOffset: 2640 +- name: 'Product–Market Fit Signals: Core Adoption and Removal Test' + startOffset: 2640 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2640 + endOffset: 2876 +- name: 'Current Focus: Leading Go‑to‑Market and Bottom‑Up Strategy' + startOffset: 2876 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2876 + endOffset: 3053 +- name: 'Ecosystem Partnerships: DocDB Integration and Joint Demos' + startOffset: 3053 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3053 + endOffset: 3310 +- name: 'Roadmap: Paid Complement to the Open‑Source Library' + startOffset: 3310 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3310 + endOffset: 3430 +- name: 'Source Generation Experiments: OpenAPI Generators for Pipelines' + startOffset: 3430 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3430 + endOffset: 3491 +- name: 'Positioning vs Platforms: Library‑First vs Airbyte/Fivetran' + startOffset: 3491 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3491 + endOffset: 3641 +- name: 'Recommended Reading: "From Survival to Thrival" on PMF' + startOffset: 3641 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3641 + endOffset: 3656 +- name: Episode Wrap‑Up and Next Steps + startOffset: 3656 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3656 + endOffset: 3583 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Building an Open‑Source Data Company' @@ -1170,148 +1298,6 @@ transcript: sec: 3696 time: '1:01:36' who: Alexey -intro: 'How do you build an open-source data company that helps Python developers - turn messy JSON into reliable relational tables? In this episode, Adrian Brudaru - — an economics-trained, Berlin-based founder who moved from startups to freelancing - and now co‑founded a data tooling company — walks through the journey of launching - developer-focused open‑source software for data engineering.

We cover why - dumping JSON into data warehouses is an anti‑pattern and introduce the core DLT - concept: a declarative JSON→relational transformation engine aimed at Python devs. - Adrian explains product iteration (engine, abstractions, user feedback), running - workshops as a validation loop, treating documentation as a product asset, and practical - bootstrapping strategies (savings, consulting revenue, scrappy operations). He also - discusses team formation via projects, go‑to‑market tactics with a bottom‑up, library‑first - approach, ecosystem partnerships (DocDB integration and joint demos), roadmap plans - for a paid complement to the open‑source library, and experiments with source generation - like OpenAPI generators for pipelines.

Listen if you want concrete technical - and GTM guidance on building an open‑source data company, implementing declarative - JSON→relational workflows for Python, and how to validate and scale developer tooling - without prematurely becoming a platform.' -description: Discover building open-source JSON-to-Relational data pipelines in Python, - practical DLT patterns, anti-pattern fixes, bootstrap tips to speed adoption. -dateadded: '2023-12-18' -duration: PT00H59M43S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=0 - endOffset: 113 -- name: 'Episode Overview: Building an Open‑Source Data Company' - startOffset: 113 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=113 - endOffset: 199 -- name: 'Career Origins: 2012 Berlin Startups and Corporate Exit' - startOffset: 199 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=199 - endOffset: 243 -- name: 'Freelancing Experience: Autonomy, Savings, Diverse Projects' - startOffset: 243 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=243 - endOffset: 320 -- name: From Hourly Billing to Project-Based Work and Subcontracting - startOffset: 320 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=320 - endOffset: 438 -- name: 'Freelancing Lifestyle: Flexibility and Long‑Term Boredom' - startOffset: 438 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=438 - endOffset: 526 -- name: 'Subcontracting Growth: Agency‑like Management Tradeoffs' - startOffset: 526 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=526 - endOffset: 651 -- name: 'Agency Challenges: Responsibility, Incentives, and Misalignment' - startOffset: 651 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=651 - endOffset: 751 -- name: Choosing Product Building Over Growing an Agency - startOffset: 751 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=751 - endOffset: 822 -- name: 'Recurring Pain: Stakeholder Alignment vs Technical Setup' - startOffset: 822 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=822 - endOffset: 976 -- name: 'Target Users: Empowering Python Users with Dev Tooling' - startOffset: 976 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=976 - endOffset: 1071 -- name: 'Anti‑patterns: Dumping JSON into Data Warehouses' - startOffset: 1071 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1071 - endOffset: 1178 -- name: 'DLT Concept: Declarative JSON→Relational Transformation' - startOffset: 1178 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1178 - endOffset: 1410 -- name: 'Product Iteration: Engine, Abstractions, and User Feedback' - startOffset: 1410 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1410 - endOffset: 1523 -- name: 'Team Formation: Meeting Co‑founders Through Projects' - startOffset: 1523 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1523 - endOffset: 1659 -- name: 'Founding as Investment: Time, Risk, and Opportunity Cost' - startOffset: 1659 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1659 - endOffset: 1868 -- name: 'Bootstrapping Strategy: Savings, Consulting Revenue, and Payroll' - startOffset: 1868 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1868 - endOffset: 2060 -- name: 'Scrappy Operations: Office Squatting and Cost Management' - startOffset: 2060 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2060 - endOffset: 2160 -- name: 'Workshop Validation: Teaching as a Product Feedback Loop' - startOffset: 2160 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2160 - endOffset: 2248 -- name: 'Workshop Design: Checkpoints, Live Support, and CodeSpaces' - startOffset: 2248 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2248 - endOffset: 2450 -- name: 'Product Identity: DLT as a Developer‑Focused Library' - startOffset: 2450 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2450 - endOffset: 2483 -- name: 'Documentation Investment: When Docs Become Productive Assets' - startOffset: 2483 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2483 - endOffset: 2640 -- name: 'Product–Market Fit Signals: Core Adoption and Removal Test' - startOffset: 2640 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2640 - endOffset: 2876 -- name: 'Current Focus: Leading Go‑to‑Market and Bottom‑Up Strategy' - startOffset: 2876 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2876 - endOffset: 3053 -- name: 'Ecosystem Partnerships: DocDB Integration and Joint Demos' - startOffset: 3053 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3053 - endOffset: 3310 -- name: 'Roadmap: Paid Complement to the Open‑Source Library' - startOffset: 3310 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3310 - endOffset: 3430 -- name: 'Source Generation Experiments: OpenAPI Generators for Pipelines' - startOffset: 3430 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3430 - endOffset: 3491 -- name: 'Positioning vs Platforms: Library‑First vs Airbyte/Fivetran' - startOffset: 3491 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3491 - endOffset: 3641 -- name: 'Recommended Reading: "From Survival to Thrival" on PMF' - startOffset: 3641 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3641 - endOffset: 3656 -- name: Episode Wrap‑Up and Next Steps - startOffset: 3656 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3656 - endOffset: 3583 --- Links: diff --git a/_podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md b/_podcast/to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md similarity index 97% rename from _podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md rename to _podcast/to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md index 14bd51f3..c70f17bc 100644 --- a/_podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md +++ b/_podcast/to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md @@ -1,38 +1,105 @@ --- -description: 'Discover RAG and vector DBs strategies for search: build podcast chatbots, - optimize embeddings, reduce LLM hallucinations and boost personalization.' -intro: 'How do you modernize search systems with vector search and retrieval‑augmented - generation (RAG) without trading away relevance or inviting hallucinations? In this - episode we talk with a search practitioner rooted in information retrieval who has - worked with Solr, Lucene and the Semantic Web era and later in search consulting - and teaching at Lucidworks and OpenSource Connections.

We cover the arc - from classic keyword search to NLP, embeddings and vector databases (including Qdrant - and plug‑and‑play vector search), and practical migration decisions: when to add - vectors to an existing stack versus adopting a standalone vector DB. You’ll hear - concrete guidance on RAG concepts to reduce LLM hallucinations, building a chatbot - from podcast transcripts using Whisper, ingest strategies (chunking, overlap, embedding - models), and orchestration with tools like LangChain. The episode also digs into - prompt design, citation strategies, multi‑level RAG evaluation with human‑in‑the‑loop - testing, and personalization approaches such as session‑based recommendations and - re‑ranking.

Listen to gain actionable techniques for vector search, embeddings, - RAG pipelines, evaluation metrics, and resources to deepen your knowledge.' +title: "Search today is less about keywords and more about constructing a reliable retrieval‑plus‑generation system: the core through‑line is that effective modern search combines classical IR principles (indexing, ranking, evaluation) with semantic vector representations, embedding stores or vector databases, and LLMs—stitched together by careful ingestion, orchestration, prompt design, and human‑in‑the‑loop evaluation—to deliver accurate, contextualized, and personalized answers." +short: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search Technologies' +season: 17 episode: 2 guests: - atitaarora +image: images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg ids: anchor: atatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps youtube: _fbe1QyJ1PY -image: images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps apple: https://podcasts.apple.com/us/podcast/navigating-challenges-and-innovations-in-search/id1541710331?i=1000639476594 spotify: https://open.spotify.com/episode/7mUMvxP4Efyeh0lhF5CvT6?si=7qqKrsMfQxaZy435s3XIEA youtube: https://www.youtube.com/watch?v=_fbe1QyJ1PY -season: 17 -short: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search - Technologies' -title: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search - Technologies' + +description: 'Discover RAG and vector DBs strategies for search: build podcast chatbots, optimize embeddings, reduce LLM hallucinations and boost personalization.' +intro: 'How do you modernize search systems with vector search and retrieval‑augmented generation (RAG) without trading away relevance or inviting hallucinations? In this episode we talk with a search practitioner rooted in information retrieval who has worked with Solr, Lucene and the Semantic Web era and later in search consulting and teaching at Lucidworks and OpenSource Connections.

We cover the arc from classic keyword search to NLP, embeddings and vector databases (including Qdrant and plug‑and‑play vector search), and practical migration decisions: when to add vectors to an existing stack versus adopting a standalone vector DB. You’ll hear concrete guidance on RAG concepts to reduce LLM hallucinations, building a chatbot from podcast transcripts using Whisper, ingest strategies (chunking, overlap, embedding models), and orchestration with tools like LangChain. The episode also digs into prompt design, citation strategies, multi‑level RAG evaluation with human‑in‑the‑loop testing, and personalization approaches such as session‑based recommendations and re‑ranking.

Listen to gain actionable techniques for vector search, embeddings, RAG pipelines, evaluation metrics, and resources to deepen your knowledge.' +dateadded: 2024-01-07 + +duration: PT00H59M13S + +quotableClips: +- name: 'Episode Introduction: search focus and guest overview' + startOffset: 115 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=115 + endOffset: 158 +- name: Background & career beginnings in information retrieval + startOffset: 158 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=158 + endOffset: 282 +- name: 'Early search stack: Solr, Lucene and the Semantic Web era' + startOffset: 282 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=282 + endOffset: 558 +- name: 'NLP and search: matching queries to content' + startOffset: 558 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=558 + endOffset: 689 +- name: 'Search consulting & teaching: Lucidworks and OpenSource Connections' + startOffset: 689 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=689 + endOffset: 1021 +- name: 'Vector databases overview: Qdrant and plug‑and‑play vector search' + startOffset: 1021 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1021 + endOffset: 1227 +- name: 'Migration decisions: vectors in existing search vs. standalone DBs' + startOffset: 1227 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1227 + endOffset: 1380 +- name: 'Evolution of search: NLP, personalization, learning‑to‑rank and LLMs' + startOffset: 1380 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1380 + endOffset: 1838 +- name: 'RAG concepts: retrieval plus generation to reduce LLM hallucinations' + startOffset: 1838 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1838 + endOffset: 2149 +- name: Building a chatbot from podcast transcripts and Whisper + startOffset: 2149 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2149 + endOffset: 2304 +- name: 'Ingest strategy: chunking, overlap, embedding models and vectorization' + startOffset: 2304 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2304 + endOffset: 2492 +- name: 'Orchestration tools: Langchain’s role in RAG pipelines' + startOffset: 2492 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2492 + endOffset: 2569 +- name: 'Retrieval → augmentation → generation: prompt design and citations' + startOffset: 2569 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2569 + endOffset: 2889 +- name: 'RAG evaluation: multi‑level metrics, offline tests and human‑in‑the‑loop' + startOffset: 2889 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2889 + endOffset: 3052 +- name: 'Evaluation reading: Human‑in‑the‑Loop and practical methodologies' + startOffset: 3052 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3052 + endOffset: 3127 +- name: 'Vector databases for ML: session‑based recommendations and re‑ranking' + startOffset: 3127 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3127 + endOffset: 3294 +- name: 'Personalization approaches: session‑based vs collaborative filtering' + startOffset: 3294 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3294 + endOffset: 3470 +- name: 'Learning resources: Intro to Information Retrieval, Relevant Search, Vector + Hub' + startOffset: 3470 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3470 + endOffset: 3624 +- name: Episode wrap‑up, links and next steps + startOffset: 3624 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3624 + endOffset: 3553 + transcript: - header: 'Episode Introduction: search focus and guest overview' - line: This week, we'll talk about search. We have a very special guest today, Atita. @@ -1155,86 +1222,6 @@ transcript: sec: 3668 time: '1:01:08' who: Atita -dateadded: '2024-01-07' -duration: PT00H59M13S -quotableClips: -- name: 'Episode Introduction: search focus and guest overview' - startOffset: 115 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=115 - endOffset: 158 -- name: Background & career beginnings in information retrieval - startOffset: 158 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=158 - endOffset: 282 -- name: 'Early search stack: Solr, Lucene and the Semantic Web era' - startOffset: 282 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=282 - endOffset: 558 -- name: 'NLP and search: matching queries to content' - startOffset: 558 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=558 - endOffset: 689 -- name: 'Search consulting & teaching: Lucidworks and OpenSource Connections' - startOffset: 689 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=689 - endOffset: 1021 -- name: 'Vector databases overview: Qdrant and plug‑and‑play vector search' - startOffset: 1021 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1021 - endOffset: 1227 -- name: 'Migration decisions: vectors in existing search vs. standalone DBs' - startOffset: 1227 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1227 - endOffset: 1380 -- name: 'Evolution of search: NLP, personalization, learning‑to‑rank and LLMs' - startOffset: 1380 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1380 - endOffset: 1838 -- name: 'RAG concepts: retrieval plus generation to reduce LLM hallucinations' - startOffset: 1838 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1838 - endOffset: 2149 -- name: Building a chatbot from podcast transcripts and Whisper - startOffset: 2149 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2149 - endOffset: 2304 -- name: 'Ingest strategy: chunking, overlap, embedding models and vectorization' - startOffset: 2304 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2304 - endOffset: 2492 -- name: 'Orchestration tools: Langchain’s role in RAG pipelines' - startOffset: 2492 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2492 - endOffset: 2569 -- name: 'Retrieval → augmentation → generation: prompt design and citations' - startOffset: 2569 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2569 - endOffset: 2889 -- name: 'RAG evaluation: multi‑level metrics, offline tests and human‑in‑the‑loop' - startOffset: 2889 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2889 - endOffset: 3052 -- name: 'Evaluation reading: Human‑in‑the‑Loop and practical methodologies' - startOffset: 3052 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3052 - endOffset: 3127 -- name: 'Vector databases for ML: session‑based recommendations and re‑ranking' - startOffset: 3127 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3127 - endOffset: 3294 -- name: 'Personalization approaches: session‑based vs collaborative filtering' - startOffset: 3294 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3294 - endOffset: 3470 -- name: 'Learning resources: Intro to Information Retrieval, Relevant Search, Vector - Hub' - startOffset: 3470 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3470 - endOffset: 3624 -- name: Episode wrap‑up, links and next steps - startOffset: 3624 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3624 - endOffset: 3553 --- Links: diff --git a/_podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.md b/_podcast/to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md similarity index 95% rename from _podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.md rename to _podcast/to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md index 8a9bd6d2..5ad7ffda 100644 --- a/_podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.md +++ b/_podcast/to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md @@ -1,20 +1,138 @@ --- +title: "Context: This episode follows Ivan Brigida’s path from finance to analytics and walks listeners step‑by‑step through the practical craft of retail algorithmic investing — covering data sources and quality, time‑series market formats, strategy ideas (like mean reversion), rigorous backtesting and walk‑forward validation, risk management and execution, feature engineering and model choice, explainability, deployment, and learning resources. + +Core: The unifying idea is that successful retail algorithmic trading is built like an engineering pipeline — start with clean, well‑understood data; define precise prediction targets; design simple, interpretable models and handcrafted features; validate performance with rigorous, leakage‑free backtests and walk‑forward simulations; embed strict risk controls and disciplined execution; and iterate toward partial automation and reproducible deployment while treating the whole process as a continuous learning project rather than a shortcut to quick profits." +short: Stock Market Analysis with Python and Machine Learning +season: 17 episode: 3 guests: - ivanbrigida +image: images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg ids: anchor: atatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 youtube: NThHAEIazFk -image: images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 apple: https://podcasts.apple.com/us/podcast/stock-market-analysis-with-python-and-machine/id1541710331?i=1000641465239 spotify: https://open.spotify.com/episode/1ZXAeGr4Kx7F6oLQUip8Cc?si=KJwpYL-3SvuX8nPdc2cyOg youtube: https://www.youtube.com/watch?v=NThHAEIazFk -season: 17 -short: Stock Market Analysis with Python and Machine Learning -title: 'Algorithmic Trading & Mean Reversion: Backtesting, Data APIs, Risk Management - & ML' + +description: 'Discover algorithmic trading & mean reversion: practical backtesting, data APIs, risk management, model choices and trade execution to boost strategy ROI.' +intro: 'How do you build, backtest, and deploy a robust mean-reversion algorithm without falling prey to bad data or time‑series leakage? In this episode, Ivan Brigida — Analytics Lead and creator of PythonInvest — draws on 10+ years in business intelligence, econometrics, forecasting, machine learning and finance to answer that question.

We walk through practical steps for algorithmic trading: choosing retail-friendly data APIs (Yahoo, Quandl, Polygon), understanding market data formats like OHLCV and adjusted close, and cleaning for data quality. Ivan explains mean reversion strategy design, risk management fundamentals including stop‑loss and position sizing, and rigorous backtesting methods—covering time‑series leakage and walk‑forward simulation. He also breaks down prediction targets, feature engineering with time‑window statistics, and model choices from logistic regression to XGBoost and neural networks, plus approaches to explainability and evaluation metrics (ROI, precision, trading fees). Finally, deployment options (cron, Airflow, APIs) and learning resources from PythonInvest are discussed.

Listen to gain actionable guidance on backtesting, data sources, risk controls, and machine learning techniques to move a mean‑reversion idea toward a reproducible algorithmic trading workflow.' +dateadded: 2024-01-24 + +duration: PT01H40S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=0 + endOffset: 95 +- name: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' + startOffset: 95 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=95 + endOffset: 128 +- name: 'Disclaimer: Financial discussion, not investment advice' + startOffset: 128 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=128 + endOffset: 233 +- name: Background & career trajectory from finance to analytics + startOffset: 233 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=233 + endOffset: 402 +- name: Google experience and role transitions + startOffset: 402 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=402 + endOffset: 449 +- name: Choosing individual contributor work over people management + startOffset: 449 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=449 + endOffset: 565 +- name: 'Investing interest: economics education to practical trading' + startOffset: 565 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=565 + endOffset: 707 +- name: Blogging & building a pet project to test strategies + startOffset: 707 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=707 + endOffset: 795 +- name: Financial data sources and APIs for retail investors (Yahoo, Quandl, Polygon) + startOffset: 795 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=795 + endOffset: 923 +- name: 'Market data format explained: OHLCV time series' + startOffset: 923 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=923 + endOffset: 1119 +- name: Adjusted close and data quality considerations + startOffset: 1119 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1119 + endOffset: 1187 +- name: 'Mean reversion strategy: concept and application' + startOffset: 1187 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1187 + endOffset: 1334 +- name: Risk management fundamentals and stop‑loss thresholds + startOffset: 1334 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1334 + endOffset: 1608 +- name: Backtesting methodology and avoiding time‑series data leakage + startOffset: 1608 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1608 + endOffset: 1784 +- name: 'Walk‑forward simulation: weekly predictions and selection rules' + startOffset: 1784 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1784 + endOffset: 2115 +- name: Trade execution and position sizing for algorithmic strategies + startOffset: 2115 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2115 + endOffset: 2304 +- name: 'Discipline: sticking to strategy vs emotional trading' + startOffset: 2304 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2304 + endOffset: 2451 +- name: 'Evaluation metrics: ROI, precision focus, and trading fees impact' + startOffset: 2451 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2451 + endOffset: 2619 +- name: 'Prediction target definition: binary growth thresholds (e.g., 5%)' + startOffset: 2619 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2619 + endOffset: 2755 +- name: 'Feature engineering: time‑window stats and handcrafted indicators' + startOffset: 2755 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2755 + endOffset: 2882 +- name: 'Model choices: logistic regression, XGBoost, NN for stock prediction' + startOffset: 2882 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2882 + endOffset: 2998 +- name: 'Explainability: feature importance and model debugging' + startOffset: 2998 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2998 + endOffset: 3106 +- name: 'Deployment options: cron, Airflow, APIs and partial automation' + startOffset: 3106 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3106 + endOffset: 3305 +- name: 'Learning pathways: MLOps, ML Zoomcamp, and practical projects' + startOffset: 3305 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3305 + endOffset: 3449 +- name: 'PythonInvest content: API guides, models, portfolio allocation stories' + startOffset: 3449 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3449 + endOffset: 3666 +- name: Course plans, sign‑up, and community building + startOffset: 3666 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3666 + endOffset: 3696 +- name: Episode Wrap‑up and final reminder (not financial advice) + startOffset: 3696 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3696 + endOffset: 3640 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' @@ -1016,136 +1134,6 @@ transcript: sec: 3735 time: '1:02:15' who: Ivan -description: 'Discover algorithmic trading & mean reversion: practical backtesting, - data APIs, risk management, model choices and trade execution to boost strategy - ROI.' -intro: 'How do you build, backtest, and deploy a robust mean-reversion algorithm without - falling prey to bad data or time‑series leakage? In this episode, Ivan Brigida — - Analytics Lead and creator of PythonInvest — draws on 10+ years in business intelligence, - econometrics, forecasting, machine learning and finance to answer that question. -

We walk through practical steps for algorithmic trading: choosing retail-friendly - data APIs (Yahoo, Quandl, Polygon), understanding market data formats like OHLCV - and adjusted close, and cleaning for data quality. Ivan explains mean reversion - strategy design, risk management fundamentals including stop‑loss and position sizing, - and rigorous backtesting methods—covering time‑series leakage and walk‑forward simulation. - He also breaks down prediction targets, feature engineering with time‑window statistics, - and model choices from logistic regression to XGBoost and neural networks, plus - approaches to explainability and evaluation metrics (ROI, precision, trading fees). - Finally, deployment options (cron, Airflow, APIs) and learning resources from PythonInvest - are discussed.

Listen to gain actionable guidance on backtesting, data - sources, risk controls, and machine learning techniques to move a mean‑reversion - idea toward a reproducible algorithmic trading workflow.' -dateadded: '2024-01-24' -duration: PT01H40S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=0 - endOffset: 95 -- name: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' - startOffset: 95 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=95 - endOffset: 128 -- name: 'Disclaimer: Financial discussion, not investment advice' - startOffset: 128 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=128 - endOffset: 233 -- name: Background & career trajectory from finance to analytics - startOffset: 233 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=233 - endOffset: 402 -- name: Google experience and role transitions - startOffset: 402 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=402 - endOffset: 449 -- name: Choosing individual contributor work over people management - startOffset: 449 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=449 - endOffset: 565 -- name: 'Investing interest: economics education to practical trading' - startOffset: 565 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=565 - endOffset: 707 -- name: Blogging & building a pet project to test strategies - startOffset: 707 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=707 - endOffset: 795 -- name: Financial data sources and APIs for retail investors (Yahoo, Quandl, Polygon) - startOffset: 795 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=795 - endOffset: 923 -- name: 'Market data format explained: OHLCV time series' - startOffset: 923 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=923 - endOffset: 1119 -- name: Adjusted close and data quality considerations - startOffset: 1119 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1119 - endOffset: 1187 -- name: 'Mean reversion strategy: concept and application' - startOffset: 1187 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1187 - endOffset: 1334 -- name: Risk management fundamentals and stop‑loss thresholds - startOffset: 1334 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1334 - endOffset: 1608 -- name: Backtesting methodology and avoiding time‑series data leakage - startOffset: 1608 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1608 - endOffset: 1784 -- name: 'Walk‑forward simulation: weekly predictions and selection rules' - startOffset: 1784 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1784 - endOffset: 2115 -- name: Trade execution and position sizing for algorithmic strategies - startOffset: 2115 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2115 - endOffset: 2304 -- name: 'Discipline: sticking to strategy vs emotional trading' - startOffset: 2304 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2304 - endOffset: 2451 -- name: 'Evaluation metrics: ROI, precision focus, and trading fees impact' - startOffset: 2451 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2451 - endOffset: 2619 -- name: 'Prediction target definition: binary growth thresholds (e.g., 5%)' - startOffset: 2619 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2619 - endOffset: 2755 -- name: 'Feature engineering: time‑window stats and handcrafted indicators' - startOffset: 2755 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2755 - endOffset: 2882 -- name: 'Model choices: logistic regression, XGBoost, NN for stock prediction' - startOffset: 2882 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2882 - endOffset: 2998 -- name: 'Explainability: feature importance and model debugging' - startOffset: 2998 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2998 - endOffset: 3106 -- name: 'Deployment options: cron, Airflow, APIs and partial automation' - startOffset: 3106 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3106 - endOffset: 3305 -- name: 'Learning pathways: MLOps, ML Zoomcamp, and practical projects' - startOffset: 3305 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3305 - endOffset: 3449 -- name: 'PythonInvest content: API guides, models, portfolio allocation stories' - startOffset: 3449 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3449 - endOffset: 3666 -- name: Course plans, sign‑up, and community building - startOffset: 3666 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3666 - endOffset: 3696 -- name: Episode Wrap‑up and final reminder (not financial advice) - startOffset: 3696 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3696 - endOffset: 3640 --- Links: diff --git a/_podcast/s17e04-bayesian-modeling-and-probabilistic-programming.md b/_podcast/to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md similarity index 96% rename from _podcast/s17e04-bayesian-modeling-and-probabilistic-programming.md rename to _podcast/to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md index c168f4a1..91a36198 100644 --- a/_podcast/s17e04-bayesian-modeling-and-probabilistic-programming.md +++ b/_podcast/to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md @@ -1,20 +1,130 @@ --- +title: "Context: This episode centers on Rob Zinkov and the Hakaru probabilistic programming project, tracing his career shift into Bayesian machine learning, contrasting tools (Hakaru, PyMC, Stan), and practical techniques (priors, likelihoods, sampling, MCMC/HMC/NUTS) alongside the skills and learning resources needed to apply them. + +Central narrative: Probabilistic programming and the Bayesian workflow offer a practical, composable way to bring honest uncertainty quantification into real-world problems by turning statistical models into executable programs—models you can build incrementally, check, and refine. The core unifying idea is that encoding assumptions as programs makes intractable integrals manageable through numerical approximation (sampling and MCMC), lets you compose and reuse model parts, and shifts modeling toward an iterative, testable practice; doing this effectively requires foundational math and a mindset of principled model-building rather than chasing point estimates." +short: Bayesian Modeling and Probabilistic Programming +season: 17 episode: 4 guests: - robzinkov +image: images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg ids: anchor: atatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 youtube: kcKvUSInm-M -image: images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 apple: https://podcasts.apple.com/us/podcast/bayesian-modeling-and-probabilistic-programming-rob/id1541710331?i=1000642253191 spotify: https://open.spotify.com/episode/5WUKDcTYv8ZvnqeHSQT7FF?si=K10siPBHQwmegCCXJ1VpIA youtube: https://www.youtube.com/watch?v=kcKvUSInm-M -season: 17 -short: Bayesian Modeling and Probabilistic Programming -title: 'Master Bayesian Modeling & Probabilistic Programming: MCMC, HMC/NUTS, Sampling - with Hakaru & PyMC' + +description: Master Bayesian modeling, MCMC/HMC/NUTS and probabilistic programming with Hakaru & PyMC—learn sampling, priors, posteriors and practical model building +intro: 'How do you move from point estimates to full Bayesian models and pick the right sampler for real problems? In this episode, Rob Zinkov — machine learning engineer, data scientist, and former lead developer of the Hakaru probabilistic programming language — walks through mastering Bayesian modeling and probabilistic programming, focusing on practical tools like MCMC, HMC/NUTS, sampling, Hakaru, and PyMC.

We cover the core Bayesian workflow: priors, likelihoods, and posterior distributions; why integrals become intractable and how numerical integration via sampling approximates expectations; and the fundamentals of Markov chain Monte Carlo for exploring high‑probability regions. Rob contrasts frequentist point estimates with Bayesian distributions, explains composability and incremental model building in probabilistic languages, and discusses language vs library design and Hakaru’s role in generating samplers. You’ll hear a concrete PyMC rainfall model example, strategies for interpreting posteriors, encoding spatial and hierarchical dependencies, and handling multimodality and uncertainty. The episode closes with practical learning resources (PyMC book, Statistical Rethinking) to support your self‑study. Tune in to get actionable guidance on building, sampling, and refining Bayesian models.' +dateadded: '2024-01-22' + +duration: PT01H05M05S + +quotableClips: +- name: Episode Introduction & Topic Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=0 + endOffset: 104 +- name: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming project' + startOffset: 104 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=104 + endOffset: 166 +- name: 'Career Journey: From software engineering to machine learning research' + startOffset: 166 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=166 + endOffset: 237 +- name: 'Industry vs Academia: Applying Bayesian tools in real problems' + startOffset: 237 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=237 + endOffset: 400 +- name: 'Transitioning Skills: Embracing calculus, integrals, and optimization' + startOffset: 400 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=400 + endOffset: 492 +- name: 'Core Technical Skills: Linear algebra and optimization for ML' + startOffset: 492 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=492 + endOffset: 572 +- name: 'Self‑Study Path: Learning statistics without formal classes' + startOffset: 572 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=572 + endOffset: 887 +- name: 'Statistical Paradigms: Frequentist point estimates vs Bayesian distributions' + startOffset: 887 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=887 + endOffset: 1146 +- name: 'Bayesian Workflow: Priors, likelihoods, and posterior distributions' + startOffset: 1146 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1146 + endOffset: 1291 +- name: 'Bayesian Advantages: Composability and incremental model building' + startOffset: 1291 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1291 + endOffset: 1425 +- name: 'Probabilistic Programming: Automating Bayesian model tasks' + startOffset: 1425 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1425 + endOffset: 1469 +- name: 'Why Integrals Matter: Intractable integrals in probabilistic models' + startOffset: 1469 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1469 + endOffset: 1600 +- name: 'Numerical Integration: Sampling as an approximation technique' + startOffset: 1600 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1600 + endOffset: 1757 +- name: 'Samplers Overview: Using draws to estimate posterior expectations' + startOffset: 1757 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1757 + endOffset: 2028 +- name: 'MCMC Fundamentals: Markov chains and exploring high‑probability regions' + startOffset: 2028 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2028 + endOffset: 2199 +- name: 'Probabilistic Languages: Hakaru’s role in generating samplers' + startOffset: 2199 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2199 + endOffset: 2378 +- name: 'Language vs Library: Model semantics, control flow, and ASTs' + startOffset: 2378 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2378 + endOffset: 2600 +- name: 'PyMC Example: Building a rainfall model and computational graph' + startOffset: 2600 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2600 + endOffset: 2890 +- name: 'Interpreting Posteriors: Model checks and iterative refinement' + startOffset: 2890 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2890 + endOffset: 3077 +- name: 'Encoding Dependencies: Spatial models and hierarchical structure' + startOffset: 3077 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3077 + endOffset: 3192 +- name: 'Multimodality & Uncertainty: Representing multiple plausible outcomes' + startOffset: 3192 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3192 + endOffset: 3341 +- name: 'Stan & HMC/NUTS: Advances in efficient sampling algorithms' + startOffset: 3341 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3341 + endOffset: 3647 +- name: 'Learning Resources: PyMC book, Statistical Rethinking course, and tutorials' + startOffset: 3647 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3647 + endOffset: 3953 +- name: 'Consulting & Contact: Rob’s statistical consulting and email' + startOffset: 3953 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3953 + endOffset: 3991 +- name: Episode Wrap‑up, Links, and Next Steps + startOffset: 3991 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3991 + endOffset: 3905 + transcript: - header: Episode Introduction & Topic Overview - header: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming @@ -1185,127 +1295,6 @@ transcript: sec: 4009 time: '1:06:49' who: Alexey -description: Master Bayesian modeling, MCMC/HMC/NUTS and probabilistic programming - with Hakaru & PyMC—learn sampling, priors, posteriors and practical model building. -intro: 'How do you move from point estimates to full Bayesian models and pick the - right sampler for real problems? In this episode, Rob Zinkov — machine learning engineer, - data scientist, and former lead developer of the Hakaru probabilistic programming - language — walks through mastering Bayesian modeling and probabilistic programming, - focusing on practical tools like MCMC, HMC/NUTS, sampling, Hakaru, and PyMC.

- We cover the core Bayesian workflow: priors, likelihoods, and posterior distributions; - why integrals become intractable and how numerical integration via sampling approximates - expectations; and the fundamentals of Markov chain Monte Carlo for exploring high‑probability - regions. Rob contrasts frequentist point estimates with Bayesian distributions, - explains composability and incremental model building in probabilistic languages, - and discusses language vs library design and Hakaru’s role in generating samplers. - You’ll hear a concrete PyMC rainfall model example, strategies for interpreting - posteriors, encoding spatial and hierarchical dependencies, and handling multimodality - and uncertainty. The episode closes with practical learning resources (PyMC book, - Statistical Rethinking) to support your self‑study. Tune in to get actionable guidance - on building, sampling, and refining Bayesian models.' -dateadded: '2024-01-22' -duration: PT01H05M05S -quotableClips: -- name: Episode Introduction & Topic Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=0 - endOffset: 104 -- name: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming project' - startOffset: 104 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=104 - endOffset: 166 -- name: 'Career Journey: From software engineering to machine learning research' - startOffset: 166 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=166 - endOffset: 237 -- name: 'Industry vs Academia: Applying Bayesian tools in real problems' - startOffset: 237 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=237 - endOffset: 400 -- name: 'Transitioning Skills: Embracing calculus, integrals, and optimization' - startOffset: 400 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=400 - endOffset: 492 -- name: 'Core Technical Skills: Linear algebra and optimization for ML' - startOffset: 492 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=492 - endOffset: 572 -- name: 'Self‑Study Path: Learning statistics without formal classes' - startOffset: 572 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=572 - endOffset: 887 -- name: 'Statistical Paradigms: Frequentist point estimates vs Bayesian distributions' - startOffset: 887 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=887 - endOffset: 1146 -- name: 'Bayesian Workflow: Priors, likelihoods, and posterior distributions' - startOffset: 1146 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1146 - endOffset: 1291 -- name: 'Bayesian Advantages: Composability and incremental model building' - startOffset: 1291 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1291 - endOffset: 1425 -- name: 'Probabilistic Programming: Automating Bayesian model tasks' - startOffset: 1425 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1425 - endOffset: 1469 -- name: 'Why Integrals Matter: Intractable integrals in probabilistic models' - startOffset: 1469 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1469 - endOffset: 1600 -- name: 'Numerical Integration: Sampling as an approximation technique' - startOffset: 1600 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1600 - endOffset: 1757 -- name: 'Samplers Overview: Using draws to estimate posterior expectations' - startOffset: 1757 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1757 - endOffset: 2028 -- name: 'MCMC Fundamentals: Markov chains and exploring high‑probability regions' - startOffset: 2028 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2028 - endOffset: 2199 -- name: 'Probabilistic Languages: Hakaru’s role in generating samplers' - startOffset: 2199 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2199 - endOffset: 2378 -- name: 'Language vs Library: Model semantics, control flow, and ASTs' - startOffset: 2378 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2378 - endOffset: 2600 -- name: 'PyMC Example: Building a rainfall model and computational graph' - startOffset: 2600 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2600 - endOffset: 2890 -- name: 'Interpreting Posteriors: Model checks and iterative refinement' - startOffset: 2890 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2890 - endOffset: 3077 -- name: 'Encoding Dependencies: Spatial models and hierarchical structure' - startOffset: 3077 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3077 - endOffset: 3192 -- name: 'Multimodality & Uncertainty: Representing multiple plausible outcomes' - startOffset: 3192 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3192 - endOffset: 3341 -- name: 'Stan & HMC/NUTS: Advances in efficient sampling algorithms' - startOffset: 3341 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3341 - endOffset: 3647 -- name: 'Learning Resources: PyMC book, Statistical Rethinking course, and tutorials' - startOffset: 3647 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3647 - endOffset: 3953 -- name: 'Consulting & Contact: Rob’s statistical consulting and email' - startOffset: 3953 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3953 - endOffset: 3991 -- name: Episode Wrap‑up, Links, and Next Steps - startOffset: 3991 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3991 - endOffset: 3905 --- Links: diff --git a/_podcast/s17e05-machine-learning-engineering-in-finance.md b/_podcast/to-update/s17e05-machine-learning-engineering-in-finance.md similarity index 96% rename from _podcast/s17e05-machine-learning-engineering-in-finance.md rename to _podcast/to-update/s17e05-machine-learning-engineering-in-finance.md index 5e36d977..917277db 100644 --- a/_podcast/s17e05-machine-learning-engineering-in-finance.md +++ b/_podcast/to-update/s17e05-machine-learning-engineering-in-finance.md @@ -1,19 +1,127 @@ --- +title: "Context: Nemanja’s story and the episode’s segments trace practical ML work in regulated finance—moving from research to ML engineering in legacy, governance-heavy environments—covering real constraints (on‑prem infra, approvals), concrete ML Ops responsibilities (CI/CD, deployment, monitoring, model/data versioning), tactical shortcuts, team and platform patterns, and the skills and career moves that enable this work. + +Core: The unifying idea is that bringing ML into production in conservative, regulated organizations succeeds not through ideal tools or big rewrites but through a pragmatic, engineering‑first, incremental approach—building minimal viable ML Ops (reproducible pipelines, environments, monitoring, simple registries), integrating with existing DevOps/governance, reusing platform patterns, and focusing on practical skills and iterative delivery to earn trust and scale ML responsibly." +short: Machine Learning Engineering in Finance +season: 17 episode: 5 guests: - nemanjaradojkovic +image: images/podcast/s17e05-machine-learning-engineering-in-finance.jpg ids: anchor: atatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 youtube: Nl4aibeFwiI -image: images/podcast/s17e05-machine-learning-engineering-in-finance.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 apple: https://podcasts.apple.com/us/podcast/machine-learning-engineering-in-finance-nemanja-radojkovic/id1541710331?i=1000643322929 spotify: https://open.spotify.com/episode/3yQtA8EAndau1yhCFPfwtj?si=ZutO4mLlRfOz_Zgw4GujiQ youtube: https://www.youtube.com/watch?v=Nl4aibeFwiI -season: 17 -short: Machine Learning Engineering in Finance -title: 'Practical MLOps for Finance: CI/CD, On-Prem Deployment & Minimal Viable ML' + +description: 'Learn MLOps for finance: CI/CD & on-prem deployment with minimal viable ML - build reproducible pipelines, model registry and monitoring to ensure compliance' +intro: 'How do you deliver machine learning in highly regulated, legacy finance environments where CI/CD, on‑prem deployment, and governance constrain every decision? In this episode Nemanja Radojkovic — an electrical engineer turned data scientist and MLOps practitioner who now teaches Data Science and contributes courses to DataCamp — walks through pragmatic MLOps for finance.

We cover concrete finance use cases (AML, fraud, compliance, automated document and email processing) and the ML engineering responsibilities that matter most: CI/CD, deployment choices, and integrating ML workflows with existing DevOps and release governance. Nemanja explains working with on‑prem platforms like Hadoop and OpenShift, how to prioritize a minimal viable ML Ops stack on a shoestring (dev/test/prod environments, monitoring, model registry, data versioning, reproducible pipelines), and tactical interim solutions such as using S3 for registry/versioning.

Listeners will get actionable guidance on prototyping under regulatory constraints, team structures and reusable platform patterns, and the practical skills and beginner tech stack (Python, SQL, Pandas/Polars, cloud basics) to move models from experiment to production in finance.' +dateadded: 2024-01-29 + +duration: PT00H58M04S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=0 + endOffset: 95 +- name: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' + startOffset: 95 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=95 + endOffset: 172 +- name: 'Guest Background: Electrical engineering, PhD experience, and early career + moves' + startOffset: 172 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=172 + endOffset: 498 +- name: 'Early Data Roles: PhD, Deloitte, and first paid Python work' + startOffset: 498 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=498 + endOffset: 635 +- name: 'Finance Use Cases: Compliance, AML, fraud, and smart automation (document + & email processing)' + startOffset: 635 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=635 + endOffset: 897 +- name: 'Role Overview: ML engineering / ML Ops responsibilities in finance (CI/CD, + deployment choices)' + startOffset: 897 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=897 + endOffset: 1132 +- name: 'Regulatory & Legacy Constraints: Slow change, legacy systems, and governance + impact' + startOffset: 1132 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1132 + endOffset: 1345 +- name: 'DevOps Governance: Release management, approvals, and building trust' + startOffset: 1345 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1345 + endOffset: 1419 +- name: 'Integrating ML with DevOps: Adapting ML workflows to existing corporate processes' + startOffset: 1419 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1419 + endOffset: 1671 +- name: 'On-Premises Infrastructure: Hadoop, OpenShift, hardware requests, and platform + teams' + startOffset: 1671 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1671 + endOffset: 1862 +- name: 'ML Ops on a Shoestring: Prioritization and minimal viable ML Ops strategy' + startOffset: 1862 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1862 + endOffset: 1917 +- name: 'Minimal ML Ops Components: Dev/test/prod environments, monitoring, model + registry, data versioning, reproducible pipelines' + startOffset: 1917 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1917 + endOffset: 2157 +- name: 'Tactical Solutions: Using S3 and simple approaches as interim model registry/data + versioning' + startOffset: 2157 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2157 + endOffset: 2328 +- name: 'Project Approach: Prototyping, Agile limits for ML, and iterative groundwork' + startOffset: 2328 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2328 + endOffset: 2474 +- name: 'Team Structure: Multiple data scientists per ML engineer and standardized + deployment patterns' + startOffset: 2474 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2474 + endOffset: 2619 +- name: 'Platform & Reuse: Internal libraries, FastAPI framework, and maintaining + production apps' + startOffset: 2619 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2619 + endOffset: 2704 +- name: 'Skills for ML Engineers: Python, Linux, networking, cloud basics, and stakeholder + evangelism' + startOffset: 2704 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2704 + endOffset: 2935 +- name: 'Career Transition Challenges: Moving from electrical engineering and sales + into ML — probabilistic thinking' + startOffset: 2935 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2935 + endOffset: 3171 +- name: 'Beginner Tech Stack: Python, SQL, Pandas/Polars, cloud basics, and job-market + driven learning' + startOffset: 3171 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3171 + endOffset: 3379 +- name: 'Learn by Building: End-to-end projects, web apps, and scraping job postings + to discover in-demand skills' + startOffset: 3379 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3379 + endOffset: 3544 +- name: Closing Remarks and Links to Talk/Resources + startOffset: 3544 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3544 + endOffset: 3484 + transcript: - header: Episode Introduction - header: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' @@ -1167,123 +1275,6 @@ transcript: sec: 3579 time: '59:39' who: Nemanja -description: 'Learn MLOps for finance: CI/CD & on-prem deployment with minimal viable - ML - build reproducible pipelines, model registry and monitoring to ensure compliance' -intro: 'How do you deliver machine learning in highly regulated, legacy finance environments - where CI/CD, on‑prem deployment, and governance constrain every decision? In this - episode Nemanja Radojkovic — an electrical engineer turned data scientist and MLOps - practitioner who now teaches Data Science and contributes courses to DataCamp — - walks through pragmatic MLOps for finance.

We cover concrete finance use - cases (AML, fraud, compliance, automated document and email processing) and the - ML engineering responsibilities that matter most: CI/CD, deployment choices, and - integrating ML workflows with existing DevOps and release governance. Nemanja explains - working with on‑prem platforms like Hadoop and OpenShift, how to prioritize a minimal - viable ML Ops stack on a shoestring (dev/test/prod environments, monitoring, model - registry, data versioning, reproducible pipelines), and tactical interim solutions - such as using S3 for registry/versioning.

Listeners will get actionable - guidance on prototyping under regulatory constraints, team structures and reusable - platform patterns, and the practical skills and beginner tech stack (Python, SQL, - Pandas/Polars, cloud basics) to move models from experiment to production in finance.' -dateadded: '2024-01-29' -duration: PT00H58M04S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=0 - endOffset: 95 -- name: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' - startOffset: 95 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=95 - endOffset: 172 -- name: 'Guest Background: Electrical engineering, PhD experience, and early career - moves' - startOffset: 172 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=172 - endOffset: 498 -- name: 'Early Data Roles: PhD, Deloitte, and first paid Python work' - startOffset: 498 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=498 - endOffset: 635 -- name: 'Finance Use Cases: Compliance, AML, fraud, and smart automation (document - & email processing)' - startOffset: 635 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=635 - endOffset: 897 -- name: 'Role Overview: ML engineering / ML Ops responsibilities in finance (CI/CD, - deployment choices)' - startOffset: 897 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=897 - endOffset: 1132 -- name: 'Regulatory & Legacy Constraints: Slow change, legacy systems, and governance - impact' - startOffset: 1132 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1132 - endOffset: 1345 -- name: 'DevOps Governance: Release management, approvals, and building trust' - startOffset: 1345 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1345 - endOffset: 1419 -- name: 'Integrating ML with DevOps: Adapting ML workflows to existing corporate processes' - startOffset: 1419 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1419 - endOffset: 1671 -- name: 'On-Premises Infrastructure: Hadoop, OpenShift, hardware requests, and platform - teams' - startOffset: 1671 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1671 - endOffset: 1862 -- name: 'ML Ops on a Shoestring: Prioritization and minimal viable ML Ops strategy' - startOffset: 1862 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1862 - endOffset: 1917 -- name: 'Minimal ML Ops Components: Dev/test/prod environments, monitoring, model - registry, data versioning, reproducible pipelines' - startOffset: 1917 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1917 - endOffset: 2157 -- name: 'Tactical Solutions: Using S3 and simple approaches as interim model registry/data - versioning' - startOffset: 2157 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2157 - endOffset: 2328 -- name: 'Project Approach: Prototyping, Agile limits for ML, and iterative groundwork' - startOffset: 2328 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2328 - endOffset: 2474 -- name: 'Team Structure: Multiple data scientists per ML engineer and standardized - deployment patterns' - startOffset: 2474 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2474 - endOffset: 2619 -- name: 'Platform & Reuse: Internal libraries, FastAPI framework, and maintaining - production apps' - startOffset: 2619 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2619 - endOffset: 2704 -- name: 'Skills for ML Engineers: Python, Linux, networking, cloud basics, and stakeholder - evangelism' - startOffset: 2704 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2704 - endOffset: 2935 -- name: 'Career Transition Challenges: Moving from electrical engineering and sales - into ML — probabilistic thinking' - startOffset: 2935 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2935 - endOffset: 3171 -- name: 'Beginner Tech Stack: Python, SQL, Pandas/Polars, cloud basics, and job-market - driven learning' - startOffset: 3171 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3171 - endOffset: 3379 -- name: 'Learn by Building: End-to-end projects, web apps, and scraping job postings - to discover in-demand skills' - startOffset: 3379 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3379 - endOffset: 3544 -- name: Closing Remarks and Links to Talk/Resources - startOffset: 3544 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3544 - endOffset: 3484 --- Links: diff --git a/_podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md b/_podcast/to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md similarity index 97% rename from _podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md rename to _podcast/to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md index c6d64488..ceb56df2 100644 --- a/_podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md +++ b/_podcast/to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md @@ -1,20 +1,138 @@ --- +title: "Context: A coach-led roadmap for technical career changers (often returning parents) that covers defining an ideal role, choosing a specialization, validating skills through projects, targeting companies, crafting resumes/stories, and running consistent, relationship-driven outreach. + +Core theme: Intentionally design a focused, market-aligned career identity and then convert it into tangible evidence and relationships—using targeted projects, tailored applications, informational interviews, and a weekly, measurable outreach plan—to turn validated skills and clear storytelling into job offers." +short: Accelerating The Job Hunt for The Perfect Job in Tech +season: 17 episode: 6 guests: - sarahmestiri +image: images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg ids: anchor: atatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 youtube: PchwbIs0tOg -image: images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 apple: https://podcasts.apple.com/us/podcast/accelerating-the-job-hunt-for-the-perfect-job-in/id1541710331?i=1000643971899 spotify: https://open.spotify.com/episode/7giHGC86pjtIYrLOvwP7g4?si=NB9w6S6QTfCBHB_n93LkBQ youtube: https://www.youtube.com/watch?v=PchwbIs0tOg -season: 17 -short: Accelerating The Job Hunt for The Perfect Job in Tech -title: 'Data Science Career Change: 4-Pillar Job Search, Networking & Informational - Interview Guide' + +description: 'Master data science career change with a 4-pillar job search: informational interviews, resume strategy & specialization tips to land your role.' +intro: 'Facing a career change into data science but unsure how to structure your job search, networking, and informational interviews? In this episode, Sarah Mestiri — data scientist and certified career & interview coach with 6+ years in tech (international companies, FIS, startups) — breaks down a practical Four‑Pillar Job Search Framework: goals, networking, CV, and strategy. Sarah draws on her transition from full‑stack engineering to data science and her work supporting women returning to work to show how to define your ideal role, choose a specialization (ML engineering, data engineering, MLOps), and validate skills through projects versus courses.

You’ll hear step‑by‑step guidance on job research and informational interviews: outreach messaging, key questions to ask, and how to build mutual value and referrals. The episode also covers resume strategy, weekly networking action plans, target company selection, part‑time transition tactics, and assessment tools to align strengths and interests. Listen to gain an actionable job search framework, templates for outreach and interviews, and resources to accelerate a successful data science career change.' +dateadded: 2024-02-03 + +duration: PT01H26S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=0 + endOffset: 171 +- name: Guest Introduction & Coaching Mission + startOffset: 171 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=171 + endOffset: 240 +- name: 'Career Path: Computer Science, Full-Stack to Data Science' + startOffset: 240 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=240 + endOffset: 339 +- name: AdTech Experience & Thriving Career Moms Project + startOffset: 339 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=339 + endOffset: 405 +- name: 'Becoming a Career Coach: Community, Mentorship, First Mentees' + startOffset: 405 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=405 + endOffset: 567 +- name: 'Client Profiles: Career Changers and Return-to-Work Support' + startOffset: 567 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=567 + endOffset: 659 +- name: 'Four-Pillar Job Search Framework: Goals, Networking, CV, Strategy' + startOffset: 659 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=659 + endOffset: 870 +- name: 'Defining Your Ideal Role: Tasks, Skills, and Future Vision' + startOffset: 870 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=870 + endOffset: 907 +- name: 'Job Research: Role Analysis and Informational Interviews' + startOffset: 907 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=907 + endOffset: 1072 +- name: 'Choosing a Specialization: ML Engineering, Data Engineering, MLOps' + startOffset: 1072 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1072 + endOffset: 1201 +- name: 'Narrowing Focus: Aligning Skills, Interests, and Market Demand' + startOffset: 1201 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1201 + endOffset: 1588 +- name: 'Courses vs Projects: Validate Skills Through Practical Work' + startOffset: 1588 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1588 + endOffset: 1775 +- name: 'Target Company Selection: Build a Top-5 Company List' + startOffset: 1775 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1775 + endOffset: 1900 +- name: 'Networking Value: Weak Ties, Referrals, and Opportunity Sources' + startOffset: 1900 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1900 + endOffset: 1937 +- name: 'Informational Interview Best Practices: Outreach and Preparation' + startOffset: 1937 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1937 + endOffset: 2058 +- name: 'Crafting Outreach Messages: Personalization and Short Asks' + startOffset: 2058 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2058 + endOffset: 2170 +- name: 'Key Informational Interview Questions: Day-to-Day & Success Factors' + startOffset: 2170 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2170 + endOffset: 2272 +- name: 'Building Mutual Value: Offer Help and Maintain Relationships' + startOffset: 2272 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2272 + endOffset: 2477 +- name: 'Networking Action Plan: Weekly Outreach and Re-engagement' + startOffset: 2477 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2477 + endOffset: 2705 +- name: 'Resume Strategy: Prioritizing Projects, Skills, and Storytelling' + startOffset: 2705 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2705 + endOffset: 2852 +- name: 'Self-Research Methods: Company Analysis and Skill Matching' + startOffset: 2852 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2852 + endOffset: 2958 +- name: 'Strength & Interest Assessments: Gallup, HIGH5, MyNextMove' + startOffset: 2958 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2958 + endOffset: 3008 +- name: 'Part-Time Work Strategy: Timing, Negotiation, and Flexibility' + startOffset: 3008 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3008 + endOffset: 3210 +- name: 'Age and Career Change: Emphasize Results and Transferable Skills' + startOffset: 3210 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3210 + endOffset: 3358 +- name: 'Applying During Courses: Share Learnings and Build Visibility' + startOffset: 3358 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3358 + endOffset: 3626 +- name: 'Recommended Resources: Tests, Books, Podcasts, MyNextMove' + startOffset: 3626 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3626 + endOffset: 3722 +- name: 'Follow-Up & Support: Links, Slack, and Further Questions' + startOffset: 3722 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3722 + endOffset: 3626 + transcript: - header: Podcast Introduction - header: Guest Introduction & Coaching Mission @@ -1037,133 +1155,6 @@ transcript: sec: 3797 time: '1:03:17' who: Sarah -description: 'Master data science career change with a 4-pillar job search: informational - interviews, resume strategy & specialization tips to land your role.' -intro: 'Facing a career change into data science but unsure how to structure your - job search, networking, and informational interviews? In this episode, Sarah Mestiri - — data scientist and certified career & interview coach with 6+ years in tech (international - companies, FIS, startups) — breaks down a practical Four‑Pillar Job Search Framework: - goals, networking, CV, and strategy. Sarah draws on her transition from full‑stack - engineering to data science and her work supporting women returning to work to show - how to define your ideal role, choose a specialization (ML engineering, data engineering, - MLOps), and validate skills through projects versus courses.

You’ll hear - step‑by‑step guidance on job research and informational interviews: outreach messaging, - key questions to ask, and how to build mutual value and referrals. The episode also - covers resume strategy, weekly networking action plans, target company selection, - part‑time transition tactics, and assessment tools to align strengths and interests. - Listen to gain an actionable job search framework, templates for outreach and interviews, - and resources to accelerate a successful data science career change.' -dateadded: '2024-02-03' -duration: PT01H26S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=0 - endOffset: 171 -- name: Guest Introduction & Coaching Mission - startOffset: 171 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=171 - endOffset: 240 -- name: 'Career Path: Computer Science, Full-Stack to Data Science' - startOffset: 240 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=240 - endOffset: 339 -- name: AdTech Experience & Thriving Career Moms Project - startOffset: 339 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=339 - endOffset: 405 -- name: 'Becoming a Career Coach: Community, Mentorship, First Mentees' - startOffset: 405 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=405 - endOffset: 567 -- name: 'Client Profiles: Career Changers and Return-to-Work Support' - startOffset: 567 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=567 - endOffset: 659 -- name: 'Four-Pillar Job Search Framework: Goals, Networking, CV, Strategy' - startOffset: 659 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=659 - endOffset: 870 -- name: 'Defining Your Ideal Role: Tasks, Skills, and Future Vision' - startOffset: 870 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=870 - endOffset: 907 -- name: 'Job Research: Role Analysis and Informational Interviews' - startOffset: 907 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=907 - endOffset: 1072 -- name: 'Choosing a Specialization: ML Engineering, Data Engineering, MLOps' - startOffset: 1072 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1072 - endOffset: 1201 -- name: 'Narrowing Focus: Aligning Skills, Interests, and Market Demand' - startOffset: 1201 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1201 - endOffset: 1588 -- name: 'Courses vs Projects: Validate Skills Through Practical Work' - startOffset: 1588 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1588 - endOffset: 1775 -- name: 'Target Company Selection: Build a Top-5 Company List' - startOffset: 1775 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1775 - endOffset: 1900 -- name: 'Networking Value: Weak Ties, Referrals, and Opportunity Sources' - startOffset: 1900 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1900 - endOffset: 1937 -- name: 'Informational Interview Best Practices: Outreach and Preparation' - startOffset: 1937 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1937 - endOffset: 2058 -- name: 'Crafting Outreach Messages: Personalization and Short Asks' - startOffset: 2058 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2058 - endOffset: 2170 -- name: 'Key Informational Interview Questions: Day-to-Day & Success Factors' - startOffset: 2170 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2170 - endOffset: 2272 -- name: 'Building Mutual Value: Offer Help and Maintain Relationships' - startOffset: 2272 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2272 - endOffset: 2477 -- name: 'Networking Action Plan: Weekly Outreach and Re-engagement' - startOffset: 2477 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2477 - endOffset: 2705 -- name: 'Resume Strategy: Prioritizing Projects, Skills, and Storytelling' - startOffset: 2705 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2705 - endOffset: 2852 -- name: 'Self-Research Methods: Company Analysis and Skill Matching' - startOffset: 2852 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2852 - endOffset: 2958 -- name: 'Strength & Interest Assessments: Gallup, HIGH5, MyNextMove' - startOffset: 2958 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2958 - endOffset: 3008 -- name: 'Part-Time Work Strategy: Timing, Negotiation, and Flexibility' - startOffset: 3008 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3008 - endOffset: 3210 -- name: 'Age and Career Change: Emphasize Results and Transferable Skills' - startOffset: 3210 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3210 - endOffset: 3358 -- name: 'Applying During Courses: Share Learnings and Build Visibility' - startOffset: 3358 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3358 - endOffset: 3626 -- name: 'Recommended Resources: Tests, Books, Podcasts, MyNextMove' - startOffset: 3626 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3626 - endOffset: 3722 -- name: 'Follow-Up & Support: Links, Slack, and Further Questions' - startOffset: 3722 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3722 - endOffset: 3626 --- Links: diff --git a/_podcast/s17e07-make-impact-through-volunteering-open-source-work.md b/_podcast/to-update/s17e07-make-impact-through-volunteering-open-source-work.md similarity index 97% rename from _podcast/s17e07-make-impact-through-volunteering-open-source-work.md rename to _podcast/to-update/s17e07-make-impact-through-volunteering-open-source-work.md index dd9e21bc..64beb161 100644 --- a/_podcast/s17e07-make-impact-through-volunteering-open-source-work.md +++ b/_podcast/to-update/s17e07-make-impact-through-volunteering-open-source-work.md @@ -1,20 +1,124 @@ --- +title: "Volunteering and community-driven open-source collaboration are the pivot that turns academic curiosity and nascent AI skills into real-world impact and career momentum — by embracing an MVP mindset, creative data sourcing, strategic positioning (hackathons, platforms, and pitches), and intentional networking you can build practical projects, gain mentors and referrals, and overcome resource constraints to bridge research and production." +short: Make an Impact Through Volunteering Open Source Work +season: 17 episode: 7 guests: - saraelateif +image: images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg ids: anchor: atatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan youtube: aHdaIwOEI8Q -image: images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan apple: https://podcasts.apple.com/us/podcast/make-an-impact-through-volunteering-open-source-work/id1541710331?i=1000646627892 spotify: https://open.spotify.com/episode/7tZSSgv1yAlnoMyB4ggQmb?si=AqDaME2QS26usoZjOEWNtQ youtube: https://www.youtube.com/watch?v=aHdaIwOEI8Q -season: 17 -short: Make an Impact Through Volunteering Open Source Work -title: 'Boost Your AI Career: Volunteer Open-Source Projects, Hackathon Strategy & - Data Sourcing' + +description: Discover volunteer open-source projects, hackathon strategy and data sourcing tips to build an AI portfolio, land referrals, and win medical imaging challenges +intro: Struggling to break into impactful AI work—what volunteer projects, hackathon tactics, and data sourcing methods actually move your career forward? In this episode Sara El‑Ateif, Google Developer Expert in Machine Learning, Google PhD Fellow and co‑founder of AI Wonder Girls, walks through her path from big data and computer vision studies to multimodal COVID‑19 research and practical volunteer projects.

Sara breaks down real examples—PTSD chatbot, trash detection, and cervical spine segmentation—showing how to source data (Open Images, creative collection, generative approaches), pitch for volunteer roles, and contribute on platforms like Omdena and Fruit Punch AI. She explains hackathon strategy—understanding judges, defining an MVP despite limited data/compute, and building deliverables with mentors—and outlines opportunity hunting via LinkedIn, social feeds, mailing lists, WIML and conference channels.

Listeners will get actionable guidance on applying to projects, roles for data engineers (data prep, pipelines, dashboards), productivity tips, and how to build a research network. Tune in to learn concrete steps to boost your AI career through open‑source volunteering, smarter hackathon participation, and better data sourcing +dateadded: 2024-02-29 + +duration: PT00H59M34S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=0 + endOffset: 103 +- name: 'Episode Overview: Volunteering, Open Source & Community Impact' + startOffset: 103 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=103 + endOffset: 157 +- name: 'Career Origins: Early AI Interest and Education Path' + startOffset: 157 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=157 + endOffset: 266 +- name: 'Academic Focus: Big Data Specialization and Computer Vision' + startOffset: 266 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=266 + endOffset: 346 +- name: 'PhD Research: Multimodal Learning for COVID-19 & Medical Imaging' + startOffset: 346 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=346 + endOffset: 473 +- name: 'Google PhD Fellowship: Application Strategy and Benefits' + startOffset: 473 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=473 + endOffset: 668 +- name: 'Volunteer Projects Overview: PTSD Chatbot and Trash Detection Cases' + startOffset: 668 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=668 + endOffset: 849 +- name: 'Medical Imaging Project: Cervical Spine Segmentation Work' + startOffset: 849 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=849 + endOffset: 965 +- name: 'Data Sourcing Techniques: Open Images and Creative Collection' + startOffset: 965 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=965 + endOffset: 1068 +- name: 'Opportunity Hunting: LinkedIn, Social Media, and Mailing Lists' + startOffset: 1068 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1068 + endOffset: 1225 +- name: 'Productivity Tips: Curated Feeds and Managing Social Media Time' + startOffset: 1225 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1225 + endOffset: 1424 +- name: 'Platform Differences: Omdena vs. Fruit Punch AI Collaboration Models' + startOffset: 1424 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1424 + endOffset: 1585 +- name: 'Joining Challenges: Beginner Support, Roles, and Team Dynamics' + startOffset: 1585 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1585 + endOffset: 1622 +- name: 'Women-Led AI Groups: Community Formation and Project Workflow' + startOffset: 1622 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1622 + endOffset: 1871 +- name: 'Hackathon Case Study: Medical Imaging Solution, Mentors, and Deliverables' + startOffset: 1871 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1871 + endOffset: 2192 +- name: 'Hackathon Strategy: Understanding Judges, Criteria, and Positioning' + startOffset: 2192 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2192 + endOffset: 2387 +- name: 'MVP Mindset: Overcoming Data and Compute Constraints' + startOffset: 2387 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2387 + endOffset: 2527 +- name: 'Data Creativity: Generative AI, Research, and Team Composition' + startOffset: 2527 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2527 + endOffset: 2724 +- name: 'Building a Research Network: Finding and Following Researchers on Twitter' + startOffset: 2724 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2724 + endOffset: 2922 +- name: 'Applying to Volunteer Projects: Interview Pitching and Relevant Skills' + startOffset: 2922 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2922 + endOffset: 3081 +- name: 'Volunteer Outcomes: Practical Experience, Referrals, and Soft Skills' + startOffset: 3081 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3081 + endOffset: 3365 +- name: 'Roles for Data Engineers: Data Preparation, Pipelines, and Dashboards' + startOffset: 3365 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3365 + endOffset: 3497 +- name: 'Opportunity Sources: WIML, Conference Feeds, and Newsletters' + startOffset: 3497 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3497 + endOffset: 3624 +- name: Closing Remarks and How to Connect + startOffset: 3624 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3624 + endOffset: 3574 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Volunteering, Open Source & Community Impact' @@ -1219,124 +1323,6 @@ transcript: sec: 3677 time: '1:01:17' who: Sara -description: Discover volunteer open-source projects, hackathon strategy and data - sourcing tips to build an AI portfolio, land referrals, and win medical imaging - challenges. -intro: Struggling to break into impactful AI work—what volunteer projects, hackathon - tactics, and data sourcing methods actually move your career forward? In this episode - Sara El‑Ateif, Google Developer Expert in Machine Learning, Google PhD Fellow and - co‑founder of AI Wonder Girls, walks through her path from big data and computer - vision studies to multimodal COVID‑19 research and practical volunteer projects. -

Sara breaks down real examples—PTSD chatbot, trash detection, and cervical - spine segmentation—showing how to source data (Open Images, creative collection, - generative approaches), pitch for volunteer roles, and contribute on platforms like - Omdena and Fruit Punch AI. She explains hackathon strategy—understanding judges, - defining an MVP despite limited data/compute, and building deliverables with mentors—and - outlines opportunity hunting via LinkedIn, social feeds, mailing lists, WIML and - conference channels.

Listeners will get actionable guidance on applying - to projects, roles for data engineers (data prep, pipelines, dashboards), productivity - tips, and how to build a research network. Tune in to learn concrete steps to boost - your AI career through open‑source volunteering, smarter hackathon participation, - and better data sourcing. -dateadded: '2024-02-29' -duration: PT00H59M34S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=0 - endOffset: 103 -- name: 'Episode Overview: Volunteering, Open Source & Community Impact' - startOffset: 103 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=103 - endOffset: 157 -- name: 'Career Origins: Early AI Interest and Education Path' - startOffset: 157 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=157 - endOffset: 266 -- name: 'Academic Focus: Big Data Specialization and Computer Vision' - startOffset: 266 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=266 - endOffset: 346 -- name: 'PhD Research: Multimodal Learning for COVID-19 & Medical Imaging' - startOffset: 346 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=346 - endOffset: 473 -- name: 'Google PhD Fellowship: Application Strategy and Benefits' - startOffset: 473 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=473 - endOffset: 668 -- name: 'Volunteer Projects Overview: PTSD Chatbot and Trash Detection Cases' - startOffset: 668 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=668 - endOffset: 849 -- name: 'Medical Imaging Project: Cervical Spine Segmentation Work' - startOffset: 849 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=849 - endOffset: 965 -- name: 'Data Sourcing Techniques: Open Images and Creative Collection' - startOffset: 965 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=965 - endOffset: 1068 -- name: 'Opportunity Hunting: LinkedIn, Social Media, and Mailing Lists' - startOffset: 1068 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1068 - endOffset: 1225 -- name: 'Productivity Tips: Curated Feeds and Managing Social Media Time' - startOffset: 1225 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1225 - endOffset: 1424 -- name: 'Platform Differences: Omdena vs. Fruit Punch AI Collaboration Models' - startOffset: 1424 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1424 - endOffset: 1585 -- name: 'Joining Challenges: Beginner Support, Roles, and Team Dynamics' - startOffset: 1585 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1585 - endOffset: 1622 -- name: 'Women-Led AI Groups: Community Formation and Project Workflow' - startOffset: 1622 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1622 - endOffset: 1871 -- name: 'Hackathon Case Study: Medical Imaging Solution, Mentors, and Deliverables' - startOffset: 1871 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1871 - endOffset: 2192 -- name: 'Hackathon Strategy: Understanding Judges, Criteria, and Positioning' - startOffset: 2192 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2192 - endOffset: 2387 -- name: 'MVP Mindset: Overcoming Data and Compute Constraints' - startOffset: 2387 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2387 - endOffset: 2527 -- name: 'Data Creativity: Generative AI, Research, and Team Composition' - startOffset: 2527 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2527 - endOffset: 2724 -- name: 'Building a Research Network: Finding and Following Researchers on Twitter' - startOffset: 2724 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2724 - endOffset: 2922 -- name: 'Applying to Volunteer Projects: Interview Pitching and Relevant Skills' - startOffset: 2922 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2922 - endOffset: 3081 -- name: 'Volunteer Outcomes: Practical Experience, Referrals, and Soft Skills' - startOffset: 3081 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3081 - endOffset: 3365 -- name: 'Roles for Data Engineers: Data Preparation, Pipelines, and Dashboards' - startOffset: 3365 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3365 - endOffset: 3497 -- name: 'Opportunity Sources: WIML, Conference Feeds, and Newsletters' - startOffset: 3497 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3497 - endOffset: 3624 -- name: Closing Remarks and How to Connect - startOffset: 3624 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3624 - endOffset: 3574 --- Links: diff --git a/_podcast/s17e08-building-machine-learning-products.md b/_podcast/to-update/s17e08-building-machine-learning-products.md similarity index 96% rename from _podcast/s17e08-building-machine-learning-products.md rename to _podcast/to-update/s17e08-building-machine-learning-products.md index d2749024..5190a95b 100644 --- a/_podcast/s17e08-building-machine-learning-products.md +++ b/_podcast/to-update/s17e08-building-machine-learning-products.md @@ -1,20 +1,133 @@ --- +title: "Modern search is best understood as a decision-making system: moving beyond brittle keyword matching to learned, shared representations (embeddings) that, when combined with traditional IR constraints (filters, recency, business rules), multimodal signals, and time-aware encodings, enable scalable, reliable retrieval and ranking. The real unifying challenge is not just model choice (LLMs vs specialized encoders) but engineering—indexing, compute/storage trade-offs, hybrid architectures, query-time weighting, operational tooling, vendor selection, and metrics-driven iteration—so that representation learning translates into measurable product and business outcomes." +short: Building Machine Learning Products +season: 17 episode: 8 guests: - reemmahmoud +image: images/podcast/s17e08-building-machine-learning-products.jpg ids: anchor: atatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd youtube: m45tNY-8gY8 -image: images/podcast/s17e08-building-machine-learning-products.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd apple: https://podcasts.apple.com/us/podcast/building-machine-learning-products-reem-mahmoud/id1541710331?i=1000649393833 spotify: https://open.spotify.com/episode/4jNredXndQ2b2evgfSmD2G?si=gU2kT-zXSX27hDPgLtwMgQ youtube: https://www.youtube.com/watch?v=m45tNY-8gY8 -season: 17 -short: Building Machine Learning Products -title: 'Vector Search & Hybrid Retrieval: Practical Guide to Embeddings, Indexing, - Multimodal Fusion' + +description: Master vector search, embeddings & hybrid search—learn indexing, multimodal fusion, vector DB trade-offs & ops to boost relevance, latency & personalization +intro: 'How do you build vector search and hybrid retrieval that actually works in production—balancing embeddings, indexing, multimodal fusion, latency, and business constraints? In this episode, Reem Mahmoud, Director of Data Science at intervu.ai, breaks down practical approaches to vector search, hybrid retrieval, and embedding pipelines for real-world systems.

Reem guides listeners through fundamentals—text search and inverted indexes (Lucene), candidate generation and ML ranking—then dives into vector search: embedding generation, compute vs. storage trade-offs, and when to use LLMs versus specialized encoders. You’ll hear concrete advice on multimodal embeddings (text, images, CLIP), hybrid search that combines vector similarity with filters and recency, and techniques for feature fusion, time encoding, and query-time weighting. The conversation also covers vector DB selection, operationalization best practices, search metrics and A/B testing, and prototyping e-commerce personalization with embeddings.

If you’re building or evaluating search/retrieval systems, this episode offers actionable guidance on embeddings, indexing strategies, multimodal fusion, and how to translate business rules into performant hybrid retrieval—so you can iterate faster and measure impact.' +dateadded: 2024-03-17 + +duration: PT01H05M23S + +quotableClips: +- name: 'Guest Introduction: Daniel, Superlinked, and VectorHub' + startOffset: 107 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=107 + endOffset: 149 +- name: 'Career Journey: Competitive programming, startups, and YouTube Ads' + startOffset: 149 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=149 + endOffset: 380 +- name: 'Competitive Programming to Infrastructure: relevance of algorithms' + startOffset: 380 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=380 + endOffset: 480 +- name: 'Defining Search: Information retrieval as a decision problem' + startOffset: 480 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=480 + endOffset: 550 +- name: 'Search vs Recommenders: Representation learning overview' + startOffset: 550 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=550 + endOffset: 645 +- name: 'Search Constraints: Latency and user experience impact' + startOffset: 645 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=645 + endOffset: 689 +- name: 'Text Search Fundamentals: Inverted index and Lucene basics' + startOffset: 689 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=689 + endOffset: 765 +- name: 'Search Architecture: Candidate generation (retrieval) and ML ranking' + startOffset: 765 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=765 + endOffset: 1060 +- name: 'Indexing Documents: Practical tools and why not to hand-roll indexes' + startOffset: 1060 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1060 + endOffset: 1202 +- name: 'Keyword Search Challenges: Brittleness, synonyms, and rule complexity' + startOffset: 1202 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1202 + endOffset: 1315 +- name: 'Vector Search Fundamentals: Embeddings as shared representations' + startOffset: 1315 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1315 + endOffset: 1740 +- name: 'Vector Compute vs Storage: Embedding generation and ingestion pipelines' + startOffset: 1740 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1740 + endOffset: 1993 +- name: 'Multimodal Embeddings: Images, text, CLIP, and modality fusion' + startOffset: 1993 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1993 + endOffset: 2040 +- name: 'Hybrid Search: Combining vector similarity with filters and recency' + startOffset: 2040 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2040 + endOffset: 2330 +- name: 'Feature Fusion: Encoding metadata, behavior, and popularity into vectors' + startOffset: 2330 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2330 + endOffset: 2393 +- name: 'Expressing Constraints: Translating filters and business rules to vectors' + startOffset: 2393 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2393 + endOffset: 2516 +- name: 'Time Encoding in Embeddings: Timestamps, positional encodings, and decay' + startOffset: 2516 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2516 + endOffset: 2711 +- name: 'Query-Time Weighting: Normalization, weights, and context-specific tuning' + startOffset: 2711 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2711 + endOffset: 2857 +- name: 'LLMs vs Specialized Encoders: Prompting trade-offs and efficiency limits' + startOffset: 2857 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2857 + endOffset: 2976 +- name: 'Learning Resources: VectorHub tutorials, graph and multimodal examples' + startOffset: 2976 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2976 + endOffset: 3155 +- name: 'Vector DB Selection: Vendor comparison and trade-offs' + startOffset: 3155 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3155 + endOffset: 3353 +- name: 'Monolithic vs Specialized Systems: Lucene/elasticsearch versus dedicated + VDBs' + startOffset: 3353 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3353 + endOffset: 3497 +- name: 'E‑commerce Personalization: Prototyping with embeddings and CLIP' + startOffset: 3497 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3497 + endOffset: 3685 +- name: 'Search Metrics: Business KPIs, A/B tests, and revenue attribution' + startOffset: 3685 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3685 + endOffset: 3830 +- name: 'Operationalization: Enabling engineers, offline tests, and fast iteration' + startOffset: 3830 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3830 + endOffset: 4008 +- name: Episode Recap and Closing + startOffset: 4008 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=4008 + endOffset: 3923 + transcript: - header: 'Guest Introduction: Daniel, Superlinked, and VectorHub' - line: This week, we'll talk about building production search systems. We have a @@ -1038,132 +1151,6 @@ transcript: sec: 4030 time: '1:07:10' who: Daniel -description: Master vector search, embeddings & hybrid search—learn indexing, multimodal - fusion, vector DB trade-offs & ops to boost relevance, latency & personalization -intro: 'How do you build vector search and hybrid retrieval that actually works in - production—balancing embeddings, indexing, multimodal fusion, latency, and business - constraints? In this episode, Reem Mahmoud, Director of Data Science at intervu.ai, - breaks down practical approaches to vector search, hybrid retrieval, and embedding - pipelines for real-world systems.

Reem guides listeners through fundamentals—text - search and inverted indexes (Lucene), candidate generation and ML ranking—then dives - into vector search: embedding generation, compute vs. storage trade-offs, and when - to use LLMs versus specialized encoders. You’ll hear concrete advice on multimodal - embeddings (text, images, CLIP), hybrid search that combines vector similarity with - filters and recency, and techniques for feature fusion, time encoding, and query-time - weighting. The conversation also covers vector DB selection, operationalization - best practices, search metrics and A/B testing, and prototyping e-commerce personalization - with embeddings.

If you’re building or evaluating search/retrieval systems, - this episode offers actionable guidance on embeddings, indexing strategies, multimodal - fusion, and how to translate business rules into performant hybrid retrieval—so - you can iterate faster and measure impact.' -dateadded: '2024-03-17' -duration: PT01H05M23S -quotableClips: -- name: 'Guest Introduction: Daniel, Superlinked, and VectorHub' - startOffset: 107 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=107 - endOffset: 149 -- name: 'Career Journey: Competitive programming, startups, and YouTube Ads' - startOffset: 149 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=149 - endOffset: 380 -- name: 'Competitive Programming to Infrastructure: relevance of algorithms' - startOffset: 380 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=380 - endOffset: 480 -- name: 'Defining Search: Information retrieval as a decision problem' - startOffset: 480 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=480 - endOffset: 550 -- name: 'Search vs Recommenders: Representation learning overview' - startOffset: 550 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=550 - endOffset: 645 -- name: 'Search Constraints: Latency and user experience impact' - startOffset: 645 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=645 - endOffset: 689 -- name: 'Text Search Fundamentals: Inverted index and Lucene basics' - startOffset: 689 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=689 - endOffset: 765 -- name: 'Search Architecture: Candidate generation (retrieval) and ML ranking' - startOffset: 765 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=765 - endOffset: 1060 -- name: 'Indexing Documents: Practical tools and why not to hand-roll indexes' - startOffset: 1060 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1060 - endOffset: 1202 -- name: 'Keyword Search Challenges: Brittleness, synonyms, and rule complexity' - startOffset: 1202 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1202 - endOffset: 1315 -- name: 'Vector Search Fundamentals: Embeddings as shared representations' - startOffset: 1315 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1315 - endOffset: 1740 -- name: 'Vector Compute vs Storage: Embedding generation and ingestion pipelines' - startOffset: 1740 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1740 - endOffset: 1993 -- name: 'Multimodal Embeddings: Images, text, CLIP, and modality fusion' - startOffset: 1993 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1993 - endOffset: 2040 -- name: 'Hybrid Search: Combining vector similarity with filters and recency' - startOffset: 2040 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2040 - endOffset: 2330 -- name: 'Feature Fusion: Encoding metadata, behavior, and popularity into vectors' - startOffset: 2330 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2330 - endOffset: 2393 -- name: 'Expressing Constraints: Translating filters and business rules to vectors' - startOffset: 2393 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2393 - endOffset: 2516 -- name: 'Time Encoding in Embeddings: Timestamps, positional encodings, and decay' - startOffset: 2516 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2516 - endOffset: 2711 -- name: 'Query-Time Weighting: Normalization, weights, and context-specific tuning' - startOffset: 2711 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2711 - endOffset: 2857 -- name: 'LLMs vs Specialized Encoders: Prompting trade-offs and efficiency limits' - startOffset: 2857 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2857 - endOffset: 2976 -- name: 'Learning Resources: VectorHub tutorials, graph and multimodal examples' - startOffset: 2976 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2976 - endOffset: 3155 -- name: 'Vector DB Selection: Vendor comparison and trade-offs' - startOffset: 3155 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3155 - endOffset: 3353 -- name: 'Monolithic vs Specialized Systems: Lucene/elasticsearch versus dedicated - VDBs' - startOffset: 3353 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3353 - endOffset: 3497 -- name: 'E‑commerce Personalization: Prototyping with embeddings and CLIP' - startOffset: 3497 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3497 - endOffset: 3685 -- name: 'Search Metrics: Business KPIs, A/B tests, and revenue attribution' - startOffset: 3685 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3685 - endOffset: 3830 -- name: 'Operationalization: Enabling engineers, offline tests, and fast iteration' - startOffset: 3830 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3830 - endOffset: 4008 -- name: Episode Recap and Closing - startOffset: 4008 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=4008 - endOffset: 3923 --- Links: diff --git a/_podcast/s17e09-building-production-search-systems.md b/_podcast/to-update/s17e09-building-production-search-systems.md similarity index 96% rename from _podcast/s17e09-building-production-search-systems.md rename to _podcast/to-update/s17e09-building-production-search-systems.md index d2c2aa05..d6ebae3d 100644 --- a/_podcast/s17e09-building-production-search-systems.md +++ b/_podcast/to-update/s17e09-building-production-search-systems.md @@ -1,20 +1,168 @@ --- +title: "Central narrative: Building effective, real-world search and retrieval is a systems engineering problem that pragmatically combines modern representation learning (dense, multimodal embeddings and specialized encoders) with classical IR techniques (inverted indexes, filters, recency, and ranking), wrapped in robust MLOps, evaluation, and product-oriented trade-offs. The episode’s through-line is that success comes from hybrid architectures and operational discipline—careful choices about embeddings, indexing, model versioning, pipeline design, vendor/tool selection, and business metrics—so teams can move fast from prototype (e.g., CLIP experiments) to scalable, maintainable, and measurable production search." +short: Building Production Search Systems +season: 17 episode: 9 guests: - danielsvonava +image: images/podcast/s17e09-building-production-search-systems.jpg ids: anchor: atatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh youtube: gEmSrknGKDE -image: images/podcast/s17e09-building-production-search-systems.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh apple: https://podcasts.apple.com/us/podcast/building-production-search-systems-daniel-svonava/id1541710331?i=1000650138905 spotify: https://open.spotify.com/episode/19R0rLA8hULTBZi9FhZuTs?si=xggb0OzfRHCFSmXtJWm7bA youtube: https://www.youtube.com/watch?v=gEmSrknGKDE -season: 17 -short: Building Production Search Systems -title: 'Vector Search & Databases: Indexing, Embeddings, Hybrid Retrieval, MLOps & - CLIP' + +description: Discover vector search, embeddings & vector database practices - indexing, hybrid retrieval, CLIP prototype and MLOps tips to boost relevance & ship faster +intro: How do you design and operate reliable vector search systems that balance embeddings, traditional indexing, and production MLOps? In this episode, Daniel Svonava — co-founder of Superlinked and VectorHub, former ML infrastructure tech lead for YouTube Ads with a 20‑year engineering background including competitive programming and research internships at Google and IBM — answers that question with practical detail.

We dig into the mechanics of indexing (inverted indexes, document chunking, candidate generation and ranking), the evolution from bag‑of‑words to dense embeddings, and the role of vector databases for nearest‑neighbor search. Daniel walks through vector compute tradeoffs (ingestion vs query‑time encoding), model versioning, pipeline challenges like recomputing embeddings, and hybrid retrieval strategies that combine vector similarity with filters, recency, and Lucene-style constraints. He also explains multi‑modal retrieval with CLIP, multi‑embedding designs, timestamp/positional encoding, and vendor selection criteria.

Listen to learn concrete guidance on prototyping with CLIP, when to use Lucene/Elasticsearch versus dedicated vector DBs, MLOps tradeoffs, and how to measure search impact through A/B testing and operational metrics — actionable insight for engineers building production search and recommender systems +dateadded: 2024-03-25 + +duration: PT01H05M23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=0 + endOffset: 107 +- name: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' + startOffset: 107 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=107 + endOffset: 160 +- name: 'Career Highlights: Internships, YouTube Ads, and Startups' + startOffset: 160 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=160 + endOffset: 299 +- name: Competitive Programming Influence on Engineering + startOffset: 299 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=299 + endOffset: 380 +- name: 'Framing Search: Decision Problem & Relevance' + startOffset: 380 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=380 + endOffset: 550 +- name: Information Retrieval vs Recommender Boundaries; Representation Learning + startOffset: 550 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=550 + endOffset: 689 +- name: From Bag-of-Words to Dense Vector Representations + startOffset: 689 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=689 + endOffset: 765 +- name: Inverted Index Mechanics, Candidate Generation & Ranking + startOffset: 765 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=765 + endOffset: 1005 +- name: 'Practical Indexing: Document Chunking and Ingestion' + startOffset: 1005 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1005 + endOffset: 1060 +- name: 'Use Existing Engines: Lucene and Open-source Tools' + startOffset: 1060 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1060 + endOffset: 1129 +- name: 'Index Data Structures: Trees, Alphabetical Ordering, and Lookups' + startOffset: 1129 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1129 + endOffset: 1202 +- name: 'Search Maintenance: Brittleness, Synonyms, and Configuration Debt' + startOffset: 1202 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1202 + endOffset: 1315 +- name: Multi-modal Retrieval and Personalization Requirements + startOffset: 1315 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1315 + endOffset: 1641 +- name: 'Vector Databases: Storing Embeddings and Nearest-Neighbor Search' + startOffset: 1641 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1641 + endOffset: 1740 +- name: 'Vector Compute: Ingestion Encoding vs Query-Time Encoding' + startOffset: 1740 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1740 + endOffset: 1822 +- name: 'Pipeline Challenges: Recomputing Embeddings and Model Versioning' + startOffset: 1822 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1822 + endOffset: 1963 +- name: 'CLIP Example: Text-to-Image Cross-modal Search' + startOffset: 1963 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1963 + endOffset: 1993 +- name: 'Embedding Strategy Changes: Model Swaps and Pipeline Flexibility' + startOffset: 1993 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1993 + endOffset: 2040 +- name: 'Hybrid Search: Combining Vector Similarity with Filters and Recency' + startOffset: 2040 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2040 + endOffset: 2181 +- name: Custom Embeddings, Ranking Models, and MLOps Trade-offs + startOffset: 2181 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2181 + endOffset: 2291 +- name: 'Multi-embedding Design: Titles, Content, Images, and Behavioral Signals' + startOffset: 2291 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2291 + endOffset: 2393 +- name: 'Expressing Constraints: Lucene Must/Should vs Vector-query Approaches' + startOffset: 2393 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2393 + endOffset: 2448 +- name: 'Recency and Bias: Encoding Time and Applying Weights in Embeddings' + startOffset: 2448 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2448 + endOffset: 2516 +- name: Timestamp & Positional Encoding Techniques in Vector Space + startOffset: 2516 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2516 + endOffset: 2711 +- name: Normalizing Components and Late-binding Query Weights + startOffset: 2711 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2711 + endOffset: 2778 +- name: 'LLM Contexting: Prompted Timestamps and Limitations' + startOffset: 2778 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2778 + endOffset: 2857 +- name: Limits of LLM-only Retrieval; Value of Specialized Encoders + startOffset: 2857 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2857 + endOffset: 2976 +- name: 'Resources & Tutorials: VectorHub Guides on Combining Modalities' + startOffset: 2976 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2976 + endOffset: 3155 +- name: 'Vendor Selection: Vector DB Feature Comparison and Trade-offs' + startOffset: 3155 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3155 + endOffset: 3296 +- name: When to Use Lucene/Elasticsearch vs Dedicated Vector Databases + startOffset: 3296 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3296 + endOffset: 3468 +- name: 'E-commerce Strategy: Prototype with Embeddings for Mid-size D2C' + startOffset: 3468 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3468 + endOffset: 3497 +- name: Rapid Prototyping with CLIP and Steps to Productionize + startOffset: 3497 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3497 + endOffset: 3685 +- name: 'Measuring Search Impact: Business Metrics, A/B Testing, and USD' + startOffset: 3685 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3685 + endOffset: 3830 +- name: Operational Metrics, Offline Evaluation, and Empowering Engineers + startOffset: 3830 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3830 + endOffset: 4008 +- name: Closing Remarks and How to Connect with Daniel/VectorHub + startOffset: 4008 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=4008 + endOffset: 3923 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' @@ -1046,167 +1194,6 @@ transcript: sec: 4030 time: '1:07:10' who: Daniel -description: Discover vector search, embeddings & vector database practices - indexing, - hybrid retrieval, CLIP prototype and MLOps tips to boost relevance & ship faster -intro: How do you design and operate reliable vector search systems that balance embeddings, - traditional indexing, and production MLOps? In this episode, Daniel Svonava — co-founder - of Superlinked and VectorHub, former ML infrastructure tech lead for YouTube Ads - with a 20‑year engineering background including competitive programming and research - internships at Google and IBM — answers that question with practical detail.

- We dig into the mechanics of indexing (inverted indexes, document chunking, candidate - generation and ranking), the evolution from bag‑of‑words to dense embeddings, and - the role of vector databases for nearest‑neighbor search. Daniel walks through vector - compute tradeoffs (ingestion vs query‑time encoding), model versioning, pipeline - challenges like recomputing embeddings, and hybrid retrieval strategies that combine - vector similarity with filters, recency, and Lucene-style constraints. He also explains - multi‑modal retrieval with CLIP, multi‑embedding designs, timestamp/positional encoding, - and vendor selection criteria.

Listen to learn concrete guidance on prototyping - with CLIP, when to use Lucene/Elasticsearch versus dedicated vector DBs, MLOps tradeoffs, - and how to measure search impact through A/B testing and operational metrics — actionable - insight for engineers building production search and recommender systems. -dateadded: '2024-03-25' -duration: PT01H05M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=0 - endOffset: 107 -- name: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' - startOffset: 107 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=107 - endOffset: 160 -- name: 'Career Highlights: Internships, YouTube Ads, and Startups' - startOffset: 160 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=160 - endOffset: 299 -- name: Competitive Programming Influence on Engineering - startOffset: 299 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=299 - endOffset: 380 -- name: 'Framing Search: Decision Problem & Relevance' - startOffset: 380 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=380 - endOffset: 550 -- name: Information Retrieval vs Recommender Boundaries; Representation Learning - startOffset: 550 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=550 - endOffset: 689 -- name: From Bag-of-Words to Dense Vector Representations - startOffset: 689 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=689 - endOffset: 765 -- name: Inverted Index Mechanics, Candidate Generation & Ranking - startOffset: 765 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=765 - endOffset: 1005 -- name: 'Practical Indexing: Document Chunking and Ingestion' - startOffset: 1005 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1005 - endOffset: 1060 -- name: 'Use Existing Engines: Lucene and Open-source Tools' - startOffset: 1060 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1060 - endOffset: 1129 -- name: 'Index Data Structures: Trees, Alphabetical Ordering, and Lookups' - startOffset: 1129 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1129 - endOffset: 1202 -- name: 'Search Maintenance: Brittleness, Synonyms, and Configuration Debt' - startOffset: 1202 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1202 - endOffset: 1315 -- name: Multi-modal Retrieval and Personalization Requirements - startOffset: 1315 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1315 - endOffset: 1641 -- name: 'Vector Databases: Storing Embeddings and Nearest-Neighbor Search' - startOffset: 1641 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1641 - endOffset: 1740 -- name: 'Vector Compute: Ingestion Encoding vs Query-Time Encoding' - startOffset: 1740 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1740 - endOffset: 1822 -- name: 'Pipeline Challenges: Recomputing Embeddings and Model Versioning' - startOffset: 1822 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1822 - endOffset: 1963 -- name: 'CLIP Example: Text-to-Image Cross-modal Search' - startOffset: 1963 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1963 - endOffset: 1993 -- name: 'Embedding Strategy Changes: Model Swaps and Pipeline Flexibility' - startOffset: 1993 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1993 - endOffset: 2040 -- name: 'Hybrid Search: Combining Vector Similarity with Filters and Recency' - startOffset: 2040 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2040 - endOffset: 2181 -- name: Custom Embeddings, Ranking Models, and MLOps Trade-offs - startOffset: 2181 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2181 - endOffset: 2291 -- name: 'Multi-embedding Design: Titles, Content, Images, and Behavioral Signals' - startOffset: 2291 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2291 - endOffset: 2393 -- name: 'Expressing Constraints: Lucene Must/Should vs Vector-query Approaches' - startOffset: 2393 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2393 - endOffset: 2448 -- name: 'Recency and Bias: Encoding Time and Applying Weights in Embeddings' - startOffset: 2448 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2448 - endOffset: 2516 -- name: Timestamp & Positional Encoding Techniques in Vector Space - startOffset: 2516 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2516 - endOffset: 2711 -- name: Normalizing Components and Late-binding Query Weights - startOffset: 2711 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2711 - endOffset: 2778 -- name: 'LLM Contexting: Prompted Timestamps and Limitations' - startOffset: 2778 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2778 - endOffset: 2857 -- name: Limits of LLM-only Retrieval; Value of Specialized Encoders - startOffset: 2857 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2857 - endOffset: 2976 -- name: 'Resources & Tutorials: VectorHub Guides on Combining Modalities' - startOffset: 2976 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2976 - endOffset: 3155 -- name: 'Vendor Selection: Vector DB Feature Comparison and Trade-offs' - startOffset: 3155 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3155 - endOffset: 3296 -- name: When to Use Lucene/Elasticsearch vs Dedicated Vector Databases - startOffset: 3296 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3296 - endOffset: 3468 -- name: 'E-commerce Strategy: Prototype with Embeddings for Mid-size D2C' - startOffset: 3468 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3468 - endOffset: 3497 -- name: Rapid Prototyping with CLIP and Steps to Productionize - startOffset: 3497 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3497 - endOffset: 3685 -- name: 'Measuring Search Impact: Business Metrics, A/B Testing, and USD' - startOffset: 3685 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3685 - endOffset: 3830 -- name: Operational Metrics, Offline Evaluation, and Empowering Engineers - startOffset: 3830 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3830 - endOffset: 4008 -- name: Closing Remarks and How to Connect with Daniel/VectorHub - startOffset: 4008 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=4008 - endOffset: 3923 --- Links: diff --git a/_podcast/s18e01-inclusive-data-leadership-coaching.md b/_podcast/to-update/s18e01-inclusive-data-leadership-coaching.md similarity index 96% rename from _podcast/s18e01-inclusive-data-leadership-coaching.md rename to _podcast/to-update/s18e01-inclusive-data-leadership-coaching.md index ede42bf4..0564d34f 100644 --- a/_podcast/s18e01-inclusive-data-leadership-coaching.md +++ b/_podcast/to-update/s18e01-inclusive-data-leadership-coaching.md @@ -1,7 +1,11 @@ --- +title: "Helping technical professionals—especially data practitioners—become high-impact, inclusive leaders by combining mindset shifts and practical routines: making invisible technical work visible with a product/value lens, cultivating psychological safety and feedback skills, managing sustainable team scope, and using empathetic communication and stakeholder framing to influence across functions." +short: Inclusive Data Leadership Coaching +season: 18 episode: 1 guests: - terezaiofciu +image: images/podcast/s18e01-inclusive-data-leadership-coaching.jpg ids: anchor: 'on has three major components, this includes the main AI framework which is the “TermAIte”, the main database, and the mobile application. These three @@ -9,15 +13,125 @@ ids: by the user. The data includes the images of the wood and the different environmental conditions readings: temperature, humidity, and wood moisture. ' youtube: Z4vOTgzLkJQ -image: images/podcast/s18e01-inclusive-data-leadership-coaching.jpg links: apple: https://podcasts.apple.com/us/podcast/inclusive-data-leadership-coaching-tereza-iofciu/id1541710331?i=1000650865043 spotify: https://open.spotify.com/episode/3zVzlQ0NmAVCtaFQXbqvHE?si=sSZhU-KXRamv2x5YZCDxAg youtube: https://www.youtube.com/watch?v=Z4vOTgzLkJQ -season: 18 -short: Inclusive Data Leadership Coaching -title: 'Data Leadership Coaching: Transition to Manager, Feedback Skills & Influencing - Without Authority' + +description: Learn data leadership, feedback skills and influencing without authority to transition to manager, increase impact and lead cross-functional teams +intro: How do you move from an individual contributor to an effective data leader while coaching teams, giving constructive feedback, and influencing without formal authority? In this episode, Tereza Iofciu—an experienced data practitioner who has worked as a data scientist, data engineer, product manager, leads a coaching team, and teaches data science at neuefische—walks through the practical challenges of that transition. She shares her career journey from a computer science PhD to data lead and coach, early coaching experiments, and why managers need teammates who can solve problems independently.

Key topics include transition-to-manager tactics, building feedback skills and psychological safety, designing sustainable team span-of-control (the “pizza” metaphor), making foundational data work visible with product-minded KPIs, and influencing without authority through stakeholder framing, active listening, and empathy. Tereza also covers coaching delivery formats—one-shot sessions, CV reviews, and community initiatives like PyLadies and conference newcomer talks—and how to blend coaching, mentoring, and practical advice.

If you’re stepping into a lead role or coaching data teams, listen for actionable frameworks, feedback routines, and inclusive leadership practices to increase impact, visibility, and promotion readiness. Closing notes include how to reach Tereza and schedule time via Calendly +dateadded: 2024-03-31 + +duration: PT00H56M35S + +quotableClips: +- name: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) + startOffset: 86 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=86 + endOffset: 161 +- name: 'Career Journey: From Computer Science PhD to Data Lead and Coach' + startOffset: 161 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=161 + endOffset: 224 +- name: 'Transition to Coaching: Stepping back from product responsibility' + startOffset: 224 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=224 + endOffset: 377 +- name: 'Career Shift: Challenges of moving from Individual Contributor to Lead' + startOffset: 377 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=377 + endOffset: 472 +- name: 'Early Coaching Experiments: Free sessions to learn real problems' + startOffset: 472 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=472 + endOffset: 555 +- name: 'Manager Bandwidth Limits: Need for independent problem-solving' + startOffset: 555 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=555 + endOffset: 689 +- name: 'Community Format: Python Pizza conference and newcomer talks' + startOffset: 689 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=689 + endOffset: 758 +- name: 'Team Span-of-Control: Pizza metaphor for sustainable management' + startOffset: 758 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=758 + endOffset: 854 +- name: 'Leadership Learning: Courses, awareness, and building feedback culture' + startOffset: 854 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=854 + endOffset: 1022 +- name: 'Leadership Training vs. Self-Study: Practical workshops and frameworks' + startOffset: 1022 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1022 + endOffset: 1183 +- name: 'Feedback Skills: Giving constructive feedback without hurting relationships' + startOffset: 1183 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1183 + endOffset: 1218 +- name: 'Team Feedback Training: Psychological safety and practiced routines' + startOffset: 1218 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1218 + endOffset: 1387 +- name: 'Coaching Focus: Increasing impact, promotions, and strategic mindset' + startOffset: 1387 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1387 + endOffset: 1472 +- name: 'Data Work Visibility: Foundation work, product mindset, and KPIs' + startOffset: 1472 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1472 + endOffset: 1710 +- name: 'Coaching Delivery: LinkedIn, Calendly, one-shot sessions, and CV reviews' + startOffset: 1710 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1710 + endOffset: 1959 +- name: 'Side Projects & Partnerships: PyPodcats, Shades & Contrast, Responsible AI' + startOffset: 1959 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1959 + endOffset: 2078 +- name: 'Coaching Approach: Blending coaching, mentoring, and practical advice' + startOffset: 2078 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2078 + endOffset: 2174 +- name: 'Self-Promotion vs. Bragging: CV culture and owning achievements' + startOffset: 2174 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2174 + endOffset: 2313 +- name: 'Personal Retrospectives: Tracking wins and the two-year rule for topics' + startOffset: 2313 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2313 + endOffset: 2618 +- name: 'Communication Overhead: Behind-the-scenes work for models and open source' + startOffset: 2618 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2618 + endOffset: 2760 +- name: 'Influencing Without Authority: Speaking different work languages & active + listening' + startOffset: 2760 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2760 + endOffset: 2960 +- name: 'Stakeholder Framing: Connecting projects to what''s important for others' + startOffset: 2960 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2960 + endOffset: 3023 +- name: 'Empathy in Practice: Role perspective-taking for better collaboration' + startOffset: 3023 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3023 + endOffset: 3230 +- name: 'Cross-Functional Leadership: Emotional intelligence and people care' + startOffset: 3230 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3230 + endOffset: 3264 +- name: 'Inclusive Leadership: Defining inclusion, avoiding exclusivity, and cultural + diversity' + startOffset: 3264 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3264 + endOffset: 3468 +- name: 'Closing & Contact: How to reach Tereza, Calendly and further resources' + startOffset: 3468 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3468 + endOffset: 3395 + transcript: - header: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) @@ -1096,135 +1210,6 @@ transcript: sec: 3481 time: '58:01' who: Tereza -description: Learn data leadership, feedback skills and influencing without authority - to transition to manager, increase impact and lead cross-functional teams. -intro: How do you move from an individual contributor to an effective data leader - while coaching teams, giving constructive feedback, and influencing without formal - authority? In this episode, Tereza Iofciu—an experienced data practitioner who has - worked as a data scientist, data engineer, product manager, leads a coaching team, - and teaches data science at neuefische—walks through the practical challenges of - that transition. She shares her career journey from a computer science PhD to data - lead and coach, early coaching experiments, and why managers need teammates who - can solve problems independently.

Key topics include transition-to-manager - tactics, building feedback skills and psychological safety, designing sustainable - team span-of-control (the “pizza” metaphor), making foundational data work visible - with product-minded KPIs, and influencing without authority through stakeholder - framing, active listening, and empathy. Tereza also covers coaching delivery formats—one-shot - sessions, CV reviews, and community initiatives like PyLadies and conference newcomer - talks—and how to blend coaching, mentoring, and practical advice.

If you’re - stepping into a lead role or coaching data teams, listen for actionable frameworks, - feedback routines, and inclusive leadership practices to increase impact, visibility, - and promotion readiness. Closing notes include how to reach Tereza and schedule - time via Calendly. -dateadded: '2024-03-31' -duration: PT00H56M35S -quotableClips: -- name: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) - startOffset: 86 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=86 - endOffset: 161 -- name: 'Career Journey: From Computer Science PhD to Data Lead and Coach' - startOffset: 161 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=161 - endOffset: 224 -- name: 'Transition to Coaching: Stepping back from product responsibility' - startOffset: 224 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=224 - endOffset: 377 -- name: 'Career Shift: Challenges of moving from Individual Contributor to Lead' - startOffset: 377 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=377 - endOffset: 472 -- name: 'Early Coaching Experiments: Free sessions to learn real problems' - startOffset: 472 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=472 - endOffset: 555 -- name: 'Manager Bandwidth Limits: Need for independent problem-solving' - startOffset: 555 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=555 - endOffset: 689 -- name: 'Community Format: Python Pizza conference and newcomer talks' - startOffset: 689 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=689 - endOffset: 758 -- name: 'Team Span-of-Control: Pizza metaphor for sustainable management' - startOffset: 758 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=758 - endOffset: 854 -- name: 'Leadership Learning: Courses, awareness, and building feedback culture' - startOffset: 854 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=854 - endOffset: 1022 -- name: 'Leadership Training vs. Self-Study: Practical workshops and frameworks' - startOffset: 1022 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1022 - endOffset: 1183 -- name: 'Feedback Skills: Giving constructive feedback without hurting relationships' - startOffset: 1183 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1183 - endOffset: 1218 -- name: 'Team Feedback Training: Psychological safety and practiced routines' - startOffset: 1218 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1218 - endOffset: 1387 -- name: 'Coaching Focus: Increasing impact, promotions, and strategic mindset' - startOffset: 1387 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1387 - endOffset: 1472 -- name: 'Data Work Visibility: Foundation work, product mindset, and KPIs' - startOffset: 1472 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1472 - endOffset: 1710 -- name: 'Coaching Delivery: LinkedIn, Calendly, one-shot sessions, and CV reviews' - startOffset: 1710 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1710 - endOffset: 1959 -- name: 'Side Projects & Partnerships: PyPodcats, Shades & Contrast, Responsible AI' - startOffset: 1959 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1959 - endOffset: 2078 -- name: 'Coaching Approach: Blending coaching, mentoring, and practical advice' - startOffset: 2078 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2078 - endOffset: 2174 -- name: 'Self-Promotion vs. Bragging: CV culture and owning achievements' - startOffset: 2174 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2174 - endOffset: 2313 -- name: 'Personal Retrospectives: Tracking wins and the two-year rule for topics' - startOffset: 2313 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2313 - endOffset: 2618 -- name: 'Communication Overhead: Behind-the-scenes work for models and open source' - startOffset: 2618 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2618 - endOffset: 2760 -- name: 'Influencing Without Authority: Speaking different work languages & active - listening' - startOffset: 2760 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2760 - endOffset: 2960 -- name: 'Stakeholder Framing: Connecting projects to what''s important for others' - startOffset: 2960 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2960 - endOffset: 3023 -- name: 'Empathy in Practice: Role perspective-taking for better collaboration' - startOffset: 3023 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3023 - endOffset: 3230 -- name: 'Cross-Functional Leadership: Emotional intelligence and people care' - startOffset: 3230 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3230 - endOffset: 3264 -- name: 'Inclusive Leadership: Defining inclusion, avoiding exclusivity, and cultural - diversity' - startOffset: 3264 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3264 - endOffset: 3468 -- name: 'Closing & Contact: How to reach Tereza, Calendly and further resources' - startOffset: 3468 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3468 - endOffset: 3395 --- Links: diff --git a/_podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md b/_podcast/to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md similarity index 96% rename from _podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md rename to _podcast/to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md index fcb7e20b..97fbba1e 100644 --- a/_podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md +++ b/_podcast/to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md @@ -1,19 +1,119 @@ --- +title: "Context: The episode follows a mechanical-engineer-turned-applied-AI practitioner exploring how finite element analysis, crash-simulation optimization, and automotive R&D can be augmented by graph-based representations and modern language models—covering knowledge graphs, computational/graph analytics, embeddings/RAG, trust and hallucination, and practical deployment lessons from a project that parses papers and links domain artifacts. + +Core unifying theme: Knowledge graphs serve as the essential bridge between physics-based engineering models and data-driven AI (graph ML and LLMs), providing a structured, explainable substrate that grounds retrieval and reasoning, enables graph-native analytics and optimization workflows, and thereby accelerates trustworthy, automatable engineering discovery and decision-making." +short: Knowledge Graphs and LLMs Across Academia and Industry +season: 18 episode: 2 guests: - anahitapakiman +image: images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg ids: anchor: atatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 youtube: YncdlUscUOo -image: images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 apple: https://podcasts.apple.com/us/podcast/knowledge-graphs-and-llms-across-academia-and/id1541710331?i=1000651561079 spotify: https://open.spotify.com/episode/1yDgx6uNaSQxKTjGU1qtIj?si=g0xQjWmDTRinzxhoYV3sdA youtube: https://www.youtube.com/watch?v=YncdlUscUOo -season: 18 -short: Knowledge Graphs and LLMs Across Academia and Industry -title: 'Using Knowledge Graphs & LLMs for Automotive R&D: RAG, Graph ML & Crash Simulation' + +description: Discover how knowledge graphs, LLMs and RAG boost automotive R&D—improve crash simulation insights, grounded retrieval, graph ML and faster paper parsing +intro: How can knowledge graphs and large language models (LLMs) be combined to improve automotive R&D workflows like crash simulation and paper reading? In this episode Anahita Pakiman—Senior Knowledge Graph-Data Scientist Consultant at brox IT-Solutions—walks us from her mechanical engineering roots into applied AI, explaining how finite element analysis (FEA) and optimization intersect with data-driven approaches.

We cover practical topics including FEA vs. machine learning, topology optimization, semantic reporting for crash simulations, and the motivation for adopting knowledge graphs (Neo4j) in automotive R&D. Anahita compares graph and tabular representations, shows how NetworkX and graph analytics bridge knowledge graphs to computational graphs, and dives into graph data science techniques like similarity measures and SimRank. She also explains grounding LLMs with retrieval-augmented generation (RAG), the trade-offs between embeddings/vector databases and KG semantics, Cypher-driven retrieval, prompt templates, and limits around trust and hallucination.

Listeners will get concrete guidance on building KG+LLM systems (including the ADPT-LRN-PHYS project), parsing papers into graphs, deployment and frontend considerations, and recommended graph ML learning resources—valuable for engineers and data scientists working on crash simulation, knowledge graphs, and RAG workflows +dateadded: 2024-04-07 + +duration: PT00H59M24S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=0 + endOffset: 100 +- name: 'Guest Bio: career path from mechanical engineering to applied AI' + startOffset: 100 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=100 + endOffset: 177 +- name: Guest Background & Career Transition + startOffset: 177 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=177 + endOffset: 337 +- name: Applied Mechanics & Finite Element Analysis (FEA) overview + startOffset: 337 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=337 + endOffset: 485 +- name: 'FEA vs Machine Learning: numerical modeling vs data-driven approaches' + startOffset: 485 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=485 + endOffset: 530 +- name: Optimization, Topology & Semantic Reporting in crash simulations + startOffset: 530 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=530 + endOffset: 958 +- name: 'Knowledge Graphs for Automotive R&D: motivation and Neo4j adoption' + startOffset: 958 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=958 + endOffset: 1232 +- name: 'Graph vs Tabular Representations: visualization, clustering, load-path detection' + startOffset: 1232 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1232 + endOffset: 1575 +- name: From Knowledge Graphs to Computational Graphs (NetworkX & graph analytics) + startOffset: 1575 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1575 + endOffset: 1680 +- name: 'Graph Data Science & Graph ML: similarity measures and SimRank' + startOffset: 1680 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1680 + endOffset: 2023 +- name: 'Combining Knowledge Graphs & LLMs: grounding and retrieval-augmented generation + (RAG)' + startOffset: 2023 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2023 + endOffset: 2290 +- name: Text Chunking, Embeddings & Vector Databases vs Knowledge Graph Semantics + startOffset: 2290 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2290 + endOffset: 2396 +- name: Prompt Templates & KG-driven Retrieval (Cypher-based examples) + startOffset: 2396 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2396 + endOffset: 2423 +- name: 'RAG vs Transfer Learning: embeddings, fine-tuning, and distinctions' + startOffset: 2423 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2423 + endOffset: 2562 +- name: Trust, Hallucination & Verification Limits of LLM-extracted Knowledge + startOffset: 2562 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2562 + endOffset: 2653 +- name: 'ADPT-LRN-PHYS Project Overview: LLM + KG for adaptive learning and paper + reading' + startOffset: 2653 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2653 + endOffset: 2830 +- name: 'Paper Parsing & KG Visualization: sections, keywords, PageRank and reference + mapping' + startOffset: 2830 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2830 + endOffset: 3258 +- name: 'Project Challenges: automating graph generation and scoping the demo' + startOffset: 3258 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3258 + endOffset: 3336 +- name: 'Deployment & Frontend Issues: Streamlit limits and state management for graph + UIs' + startOffset: 3336 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3336 + endOffset: 3466 +- name: 'Learning Resources: graph ML courses, Jure Leskovec, Graph Conference and + KG+LLM courses' + startOffset: 3466 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3466 + endOffset: 3641 +- name: Episode Conclusion and Takeaways + startOffset: 3641 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3641 + endOffset: 3564 + transcript: - header: Episode Introduction - header: 'Guest Bio: career path from mechanical engineering to applied AI' @@ -1037,117 +1137,6 @@ transcript: sec: 3664 time: '1:01:04' who: Alexey -description: Discover how knowledge graphs, LLMs and RAG boost automotive R&D—improve - crash simulation insights, grounded retrieval, graph ML and faster paper parsing. -intro: How can knowledge graphs and large language models (LLMs) be combined to improve - automotive R&D workflows like crash simulation and paper reading? In this episode - Anahita Pakiman—Senior Knowledge Graph-Data Scientist Consultant at brox IT-Solutions—walks - us from her mechanical engineering roots into applied AI, explaining how finite - element analysis (FEA) and optimization intersect with data-driven approaches.

- We cover practical topics including FEA vs. machine learning, topology optimization, - semantic reporting for crash simulations, and the motivation for adopting knowledge - graphs (Neo4j) in automotive R&D. Anahita compares graph and tabular representations, - shows how NetworkX and graph analytics bridge knowledge graphs to computational - graphs, and dives into graph data science techniques like similarity measures and - SimRank. She also explains grounding LLMs with retrieval-augmented generation (RAG), - the trade-offs between embeddings/vector databases and KG semantics, Cypher-driven - retrieval, prompt templates, and limits around trust and hallucination.

- Listeners will get concrete guidance on building KG+LLM systems (including the ADPT-LRN-PHYS - project), parsing papers into graphs, deployment and frontend considerations, and - recommended graph ML learning resources—valuable for engineers and data scientists - working on crash simulation, knowledge graphs, and RAG workflows. -dateadded: '2024-04-07' -duration: PT00H59M24S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=0 - endOffset: 100 -- name: 'Guest Bio: career path from mechanical engineering to applied AI' - startOffset: 100 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=100 - endOffset: 177 -- name: Guest Background & Career Transition - startOffset: 177 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=177 - endOffset: 337 -- name: Applied Mechanics & Finite Element Analysis (FEA) overview - startOffset: 337 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=337 - endOffset: 485 -- name: 'FEA vs Machine Learning: numerical modeling vs data-driven approaches' - startOffset: 485 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=485 - endOffset: 530 -- name: Optimization, Topology & Semantic Reporting in crash simulations - startOffset: 530 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=530 - endOffset: 958 -- name: 'Knowledge Graphs for Automotive R&D: motivation and Neo4j adoption' - startOffset: 958 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=958 - endOffset: 1232 -- name: 'Graph vs Tabular Representations: visualization, clustering, load-path detection' - startOffset: 1232 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1232 - endOffset: 1575 -- name: From Knowledge Graphs to Computational Graphs (NetworkX & graph analytics) - startOffset: 1575 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1575 - endOffset: 1680 -- name: 'Graph Data Science & Graph ML: similarity measures and SimRank' - startOffset: 1680 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1680 - endOffset: 2023 -- name: 'Combining Knowledge Graphs & LLMs: grounding and retrieval-augmented generation - (RAG)' - startOffset: 2023 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2023 - endOffset: 2290 -- name: Text Chunking, Embeddings & Vector Databases vs Knowledge Graph Semantics - startOffset: 2290 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2290 - endOffset: 2396 -- name: Prompt Templates & KG-driven Retrieval (Cypher-based examples) - startOffset: 2396 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2396 - endOffset: 2423 -- name: 'RAG vs Transfer Learning: embeddings, fine-tuning, and distinctions' - startOffset: 2423 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2423 - endOffset: 2562 -- name: Trust, Hallucination & Verification Limits of LLM-extracted Knowledge - startOffset: 2562 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2562 - endOffset: 2653 -- name: 'ADPT-LRN-PHYS Project Overview: LLM + KG for adaptive learning and paper - reading' - startOffset: 2653 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2653 - endOffset: 2830 -- name: 'Paper Parsing & KG Visualization: sections, keywords, PageRank and reference - mapping' - startOffset: 2830 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2830 - endOffset: 3258 -- name: 'Project Challenges: automating graph generation and scoping the demo' - startOffset: 3258 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3258 - endOffset: 3336 -- name: 'Deployment & Frontend Issues: Streamlit limits and state management for graph - UIs' - startOffset: 3336 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3336 - endOffset: 3466 -- name: 'Learning Resources: graph ML courses, Jure Leskovec, Graph Conference and - KG+LLM courses' - startOffset: 3466 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3466 - endOffset: 3641 -- name: Episode Conclusion and Takeaways - startOffset: 3641 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3641 - endOffset: 3564 --- Links: diff --git a/_podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.md b/_podcast/to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md similarity index 68% rename from _podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.md rename to _podcast/to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md index 7f0e8781..f5aacd50 100644 --- a/_podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md @@ -1,40 +1,27 @@ --- +title: "Context: The episode frames a biodiversity crisis made harder by fragmented, sparse data and limited monitoring capacity, then surveys AI tools (computer vision, remote sensing, platforms, citizen science), technical challenges, ethical concerns, and policy needs for conservation. + +Core narrative: AI's most important role in conservation is as an integrative, trustworthy infrastructure that turns heterogeneous, messy ecological data into continuous, scalable, and actionable knowledge—bridging camera traps, drones, satellites, citizen science, and field expertise through interoperable standards, robust models, edge deployment, and open platforms. Real impact requires coupling technical advances with ethics, community engagement, capacity building, sustainable funding, and multistakeholder governance so that AI-enabled monitoring directly informs equitable conservation decisions, enforcement, and long-term policy." +short: AI for Ecology, Biodiversity, and Conservation +season: 18 episode: 3 guests: - tanyabergerwolf +image: images/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.jpg ids: anchor: atatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi youtube: 30tTrozbAkg -image: images/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi apple: https://podcasts.apple.com/us/podcast/ai-for-ecology-biodiversity-and-conservation-tanya/id1541710331?i=1000653709956 spotify: https://open.spotify.com/episode/3Hhz5N8ZDvsOPlPP3wxQxq?si=Oz7y_pBrTfeypfYZXubu-g youtube: https://www.youtube.com/watch?v=30tTrozbAkg -season: 18 -short: AI for Ecology, Biodiversity, and Conservation -title: 'Scaling Wildlife Conservation with AI: Computer Vision, Remote Sensing & Citizen - Science' -description: 'Discover AI-driven wildlife conservation: computer vision, remote sensing - & citizen science for scalable species ID, habitat maps, alerts and policy impact.' -intro: How can AI actually scale wildlife conservation in the face of accelerating - biodiversity loss and persistent data gaps? In this episode, computational ecologist - Tanya Berger-Wolf—director of TDAI@OSU, co‑founder of the Wildbook project, and - director of technology at Wild Me—walks us through practical ways computer vision, - remote sensing, and citizen science are transforming biodiversity monitoring.

- We explore core AI techniques (machine learning, transfer learning, domain adaptation), - image‑based monitoring with camera traps, drones and photo‑ID for individual tracking, - and remote sensing for habitat mapping and change detection. Tanya addresses key - data challenges—labeling, class imbalance, sparse observations—and the need for - interoperable datasets, open standards and FAIR principles. We also cover model - robustness, edge deployment in the field, ethics and Indigenous knowledge, scalable - platforms like Wildbook, and how citizen science and crowdsourcing support quality - control and long‑term monitoring.

Listeners will come away with a clearer - understanding of tools and workflows for wildlife monitoring, practical barriers - to scaling AI for conservation, policy and funding considerations, and resources - to begin applying computer vision, remote sensing, and citizen science in their - own conservation projects. -dateadded: '2024-04-28' + +description: 'Discover AI-driven wildlife conservation: computer vision, remote sensing & citizen science for scalable species ID, habitat maps, alerts and policy impact.' +intro: How can AI actually scale wildlife conservation in the face of accelerating biodiversity loss and persistent data gaps? In this episode, computational ecologist Tanya Berger-Wolf—director of TDAI@OSU, co‑founder of the Wildbook project, and director of technology at Wild Me—walks us through practical ways computer vision, remote sensing, and citizen science are transforming biodiversity monitoring.

We explore core AI techniques (machine learning, transfer learning, domain adaptation), image‑based monitoring with camera traps, drones and photo‑ID for individual tracking, and remote sensing for habitat mapping and change detection. Tanya addresses key data challenges—labeling, class imbalance, sparse observations—and the need for interoperable datasets, open standards and FAIR principles. We also cover model robustness, edge deployment in the field, ethics and Indigenous knowledge, scalable platforms like Wildbook, and how citizen science and crowdsourcing support quality control and long‑term monitoring.

Listeners will come away with a clearer understanding of tools and workflows for wildlife monitoring, practical barriers to scaling AI for conservation, policy and funding considerations, and resources to begin applying computer vision, remote sensing, and citizen science in their own conservation projects +dateadded: 2024-04-28 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -132,6 +119,7 @@ quotableClips: startOffset: 3720 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3720 endOffset: 3720 + --- Links: diff --git a/_podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.md b/_podcast/to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md similarity index 94% rename from _podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.md rename to _podcast/to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md index 4925b6ac..699e1786 100644 --- a/_podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.md +++ b/_podcast/to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md @@ -1,20 +1,154 @@ --- +title: "Context: This episode surveys the Scikit-Learn ecosystem, related projects (Scikit Lego, Skrub), and initiatives like Calm Code and :probabl., weaving together career stories, governance, tooling choices, content production, maintainer handoffs, CI/cost concerns, and early business models. + +Core unifying idea: Long-term health and impact of open-source machine‑learning projects depends not just on great code but on a deliberate integration of engineering excellence, community stewardship, accessible education, and sustainable operational/business practices — i.e., building pragmatic tools and clear learning paths while creating incentives (training, consulting, platform models, cost‑efficient infrastructure, and low‑pressure contributor experiences) that enable maintainers and contributors to keep projects useful, adoptable, and durable." +short: Working in Open Source - Probabl.ai and sklearn +season: 18 episode: 4 guests: - vincentwarmerdam +image: images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg ids: anchor: atatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs youtube: UPlIETGwTg8 -image: images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs apple: https://podcasts.apple.com/us/podcast/working-in-open-source-probabl-ai-and-sklearn-vincent/id1541710331?i=1000654481795 spotify: https://open.spotify.com/episode/0HT3IQOaTXTMH0OdEBnw9s?si=HrLtx7QKT_amZyUbZuqRzQ youtube: https://www.youtube.com/watch?v=UPlIETGwTg8 -season: 18 -short: Working in Open Source - Probabl.ai and sklearn -title: 'Build Sustainable Scikit-Learn Ecosystems: scikit-lego, Skrub, GAP Encoder - & DevRel' + +description: 'Discover scalable scikit-learn ecosystems with scikit-lego and Skrub: learn GAP Encoder, contributor growth, CI optimization and DevRel sustainability.' +intro: How do you build a sustainable scikit-learn ecosystem that serves both users and contributors? In this episode, Vincent Warmerdam — Research Advocate at Rasa, open source contributor and creator of Calm Code and the Koaning blog — walks through practical decisions that keep ML tooling healthy over time. We cover scikit-lego’s origins and adoption, governance and NumFOCUS roles, and the trade-offs between adding features to core scikit-learn versus plugins.

Key topics include maintaining contributor growth and steward transitions, motivating volunteer maintainers, DevRel combined with core engineering, and demonstrable open source quality as a hiring signal. Vincent also explains Skrub’s table vectorizer and the GAP Encoder approach for clustering dirty categorical values to avoid one-hot explosion, plus examples of CI and cost optimization (custom runners, GitHub Actions) and sustainable compute choices. You’ll get actionable guidance on teaching fundamentals (Docker, pip, Git), producing interactive content, and potential business models around training and consulting. Tune in to learn concrete strategies for building, funding, and scaling scikit-learn-compatible tools and communities without sacrificing long-term sustainability +dateadded: 2024-05-06 + +duration: PT01H15S + +quotableClips: +- name: Episode Overview — Open Source Focus + startOffset: 0 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=0 + endOffset: 100 +- name: Guest Reintroduction & Vincent’s Open Source Profile + startOffset: 100 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=100 + endOffset: 240 +- name: Early Community Work & PyLadies Code Sprint + startOffset: 240 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=240 + endOffset: 259 +- name: Scikit Lego Origin, Adoption, and Career Impact + startOffset: 259 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=259 + endOffset: 363 +- name: 'Career Path: Econometrics → DevRel → Core Engineering' + startOffset: 363 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=363 + endOffset: 513 +- name: 'Company Naming: Why :probabl. Is Separate from Scikit-Learn' + startOffset: 513 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=513 + endOffset: 628 +- name: Scikit-Learn Governance, NumFOCUS, and Project History + startOffset: 628 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=628 + endOffset: 841 +- name: 'Ecosystem Strategy: Plugins vs. Core Scikit-Learn Features' + startOffset: 841 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=841 + endOffset: 1003 +- name: Scikit Lego in Corporate Training and Contributor Growth + startOffset: 1003 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1003 + endOffset: 1091 +- name: 'Maintainer Transition: Finding Sustainable Project Stewards' + startOffset: 1091 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1091 + endOffset: 1311 +- name: Motivating Volunteer Maintainers and Keeping Projects Fun + startOffset: 1311 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1311 + endOffset: 1409 +- name: 'Demonstrating Quality: Open Source Work as a Hiring Signal' + startOffset: 1409 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1409 + endOffset: 1546 +- name: 'Calm Code Philosophy: Practical, Low-Pressure Learning' + startOffset: 1546 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1546 + endOffset: 1644 +- name: 'Content Production: Videos, Scale, and Communication Practice' + startOffset: 1644 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1644 + endOffset: 1770 +- name: 'Calm Code Platform: Django, Monetization, and Hiring Contributors' + startOffset: 1770 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1770 + endOffset: 1902 +- name: 'CI and Cost Optimization: Custom Runners and GitHub Actions' + startOffset: 1902 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1902 + endOffset: 1946 +- name: 'Sustainable Compute Examples: Leaf.cloud and Environmental Impact' + startOffset: 1946 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1946 + endOffset: 2069 +- name: 'Teaching Fundamentals: Docker, pip, and Git Challenges for Beginners' + startOffset: 2069 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2069 + endOffset: 2136 +- name: 'Conceptual Learning: Mindset Over Commands for Tooling' + startOffset: 2136 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2136 + endOffset: 2302 +- name: Combining DevRel and Core Development Responsibilities + startOffset: 2302 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2302 + endOffset: 2481 +- name: 'Role Definition: Developer Relations Engineer at :probabl.' + startOffset: 2481 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2481 + endOffset: 2540 +- name: Enhancing Scikit-Learn with Interactive Content and Videos + startOffset: 2540 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2540 + endOffset: 2670 +- name: 'Deep Dive Example: Why the Standard Scaler Is Complex' + startOffset: 2670 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2670 + endOffset: 2911 +- name: 'Skrub Overview: Table Vectorizer and Pragmatic Tabular Defaults' + startOffset: 2911 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2911 + endOffset: 3027 +- name: 'Skrub GAP Encoder: Clustering Dirty Categories to Avoid One-Hot Explosion' + startOffset: 3027 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3027 + endOffset: 3227 +- name: 'Why Form a Company for Scikit-Learn: Funding and European Tech Goals' + startOffset: 3227 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3227 + endOffset: 3379 +- name: 'Potential Business Models: Training, Consulting, and Partnerships' + startOffset: 3379 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3379 + endOffset: 3454 +- name: 'Upcoming Work: Calm Code Book on Expectations vs. Reality in Data' + startOffset: 3454 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3454 + endOffset: 3497 +- name: 'Live Experiments: Converting Tree Models to SQL and Streaming Work' + startOffset: 3497 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3497 + endOffset: 3627 +- name: 'Live Stream Format: Preparation, Live Coding, and Demos' + startOffset: 3627 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3627 + endOffset: 3675 +- name: Episode Closing and Final Remarks + startOffset: 3675 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3675 + endOffset: 3615 + transcript: - header: Episode Overview — Open Source Focus - header: Guest Reintroduction & Vincent’s Open Source Profile @@ -857,151 +991,6 @@ transcript: sec: 3715 time: '1:01:55' who: Alexey -description: 'Discover scalable scikit-learn ecosystems with scikit-lego and Skrub: - learn GAP Encoder, contributor growth, CI optimization and DevRel sustainability.' -intro: How do you build a sustainable scikit-learn ecosystem that serves both users - and contributors? In this episode, Vincent Warmerdam — Research Advocate at Rasa, - open source contributor and creator of Calm Code and the Koaning blog — walks through - practical decisions that keep ML tooling healthy over time. We cover scikit-lego’s - origins and adoption, governance and NumFOCUS roles, and the trade-offs between - adding features to core scikit-learn versus plugins.

Key topics include - maintaining contributor growth and steward transitions, motivating volunteer maintainers, - DevRel combined with core engineering, and demonstrable open source quality as a - hiring signal. Vincent also explains Skrub’s table vectorizer and the GAP Encoder - approach for clustering dirty categorical values to avoid one-hot explosion, plus - examples of CI and cost optimization (custom runners, GitHub Actions) and sustainable - compute choices. You’ll get actionable guidance on teaching fundamentals (Docker, - pip, Git), producing interactive content, and potential business models around training - and consulting. Tune in to learn concrete strategies for building, funding, and - scaling scikit-learn-compatible tools and communities without sacrificing long-term - sustainability. -dateadded: '2024-05-06' -duration: PT01H15S -quotableClips: -- name: Episode Overview — Open Source Focus - startOffset: 0 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=0 - endOffset: 100 -- name: Guest Reintroduction & Vincent’s Open Source Profile - startOffset: 100 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=100 - endOffset: 240 -- name: Early Community Work & PyLadies Code Sprint - startOffset: 240 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=240 - endOffset: 259 -- name: Scikit Lego Origin, Adoption, and Career Impact - startOffset: 259 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=259 - endOffset: 363 -- name: 'Career Path: Econometrics → DevRel → Core Engineering' - startOffset: 363 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=363 - endOffset: 513 -- name: 'Company Naming: Why :probabl. Is Separate from Scikit-Learn' - startOffset: 513 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=513 - endOffset: 628 -- name: Scikit-Learn Governance, NumFOCUS, and Project History - startOffset: 628 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=628 - endOffset: 841 -- name: 'Ecosystem Strategy: Plugins vs. Core Scikit-Learn Features' - startOffset: 841 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=841 - endOffset: 1003 -- name: Scikit Lego in Corporate Training and Contributor Growth - startOffset: 1003 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1003 - endOffset: 1091 -- name: 'Maintainer Transition: Finding Sustainable Project Stewards' - startOffset: 1091 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1091 - endOffset: 1311 -- name: Motivating Volunteer Maintainers and Keeping Projects Fun - startOffset: 1311 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1311 - endOffset: 1409 -- name: 'Demonstrating Quality: Open Source Work as a Hiring Signal' - startOffset: 1409 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1409 - endOffset: 1546 -- name: 'Calm Code Philosophy: Practical, Low-Pressure Learning' - startOffset: 1546 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1546 - endOffset: 1644 -- name: 'Content Production: Videos, Scale, and Communication Practice' - startOffset: 1644 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1644 - endOffset: 1770 -- name: 'Calm Code Platform: Django, Monetization, and Hiring Contributors' - startOffset: 1770 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1770 - endOffset: 1902 -- name: 'CI and Cost Optimization: Custom Runners and GitHub Actions' - startOffset: 1902 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1902 - endOffset: 1946 -- name: 'Sustainable Compute Examples: Leaf.cloud and Environmental Impact' - startOffset: 1946 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1946 - endOffset: 2069 -- name: 'Teaching Fundamentals: Docker, pip, and Git Challenges for Beginners' - startOffset: 2069 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2069 - endOffset: 2136 -- name: 'Conceptual Learning: Mindset Over Commands for Tooling' - startOffset: 2136 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2136 - endOffset: 2302 -- name: Combining DevRel and Core Development Responsibilities - startOffset: 2302 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2302 - endOffset: 2481 -- name: 'Role Definition: Developer Relations Engineer at :probabl.' - startOffset: 2481 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2481 - endOffset: 2540 -- name: Enhancing Scikit-Learn with Interactive Content and Videos - startOffset: 2540 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2540 - endOffset: 2670 -- name: 'Deep Dive Example: Why the Standard Scaler Is Complex' - startOffset: 2670 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2670 - endOffset: 2911 -- name: 'Skrub Overview: Table Vectorizer and Pragmatic Tabular Defaults' - startOffset: 2911 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2911 - endOffset: 3027 -- name: 'Skrub GAP Encoder: Clustering Dirty Categories to Avoid One-Hot Explosion' - startOffset: 3027 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3027 - endOffset: 3227 -- name: 'Why Form a Company for Scikit-Learn: Funding and European Tech Goals' - startOffset: 3227 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3227 - endOffset: 3379 -- name: 'Potential Business Models: Training, Consulting, and Partnerships' - startOffset: 3379 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3379 - endOffset: 3454 -- name: 'Upcoming Work: Calm Code Book on Expectations vs. Reality in Data' - startOffset: 3454 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3454 - endOffset: 3497 -- name: 'Live Experiments: Converting Tree Models to SQL and Streaming Work' - startOffset: 3497 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3497 - endOffset: 3627 -- name: 'Live Stream Format: Preparation, Live Coding, and Demos' - startOffset: 3627 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3627 - endOffset: 3675 -- name: Episode Closing and Final Remarks - startOffset: 3675 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3675 - endOffset: 3615 --- Links: diff --git a/_podcast/s18e05-community-building-and-teaching-in-ai-tech.md b/_podcast/to-update/s18e05-community-building-and-teaching-in-ai-tech.md similarity index 92% rename from _podcast/s18e05-community-building-and-teaching-in-ai-tech.md rename to _podcast/to-update/s18e05-community-building-and-teaching-in-ai-tech.md index e0cfe171..74bcfdb5 100644 --- a/_podcast/s18e05-community-building-and-teaching-in-ai-tech.md +++ b/_podcast/to-update/s18e05-community-building-and-teaching-in-ai-tech.md @@ -1,20 +1,140 @@ --- +title: "Context: This episode follows Erum Afzal and Omdena Academy’s evolution—how global, project‑based AI collaborations and community organizing were systematized into accessible, tiered courses and local chapters to teach practical, ethical AI skills. + +Core theme: The unifying idea is that democratizing real‑world AI expertise requires a community‑first, project‑to‑course approach—turning collaborative problem‑solving into structured learning pathways, open instructor pipelines, regional sub‑communities, and integrity‑focused practices so diverse learners can rapidly gain practical skills, leadership opportunities, and ethical career pathways in AI." +short: Community Building and Teaching in AI & Tech +season: 18 episode: 5 guests: - erumafzal +image: images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg ids: anchor: lub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r youtube: 7SLd5V7z3xQ -image: images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r apple: https://podcasts.apple.com/us/podcast/community-building-and-teaching-in-ai-tech-erum-afzal/id1541710331?i=1000655187649 spotify: https://open.spotify.com/episode/4iAvz4Qu0l28fxXvaHdAPj?si=7MdKKu1fTrqxIGPQBT61Ag youtube: https://www.youtube.com/watch?v=7SLd5V7z3xQ -season: 18 -short: Community Building and Teaching in AI & Tech -title: 'Omdena Academy: Project-to-Course AI Education for Data Science Careers & - Instructors' + +description: 'Discover Omdena Academy''s project-to-course AI education for data science: learn Python, NLP, instructor paths, and gain real-world project experience.' +intro: 'How do you turn real-world AI project experience into repeatable courses that launch data science careers and train instructors? In this episode, Erum Afzal — lead ML engineer, Teaching Expert at Women in AI Academy, and PhD researcher in AI for teacher training — explains how Omdena Academy evolved from collaborative projects into a project-to-course model for AI education.

We cover the Academy’s shift from global Omdena projects to structured data science courses, foundational topics taught (Python, Pandas, NumPy, NLP), and the process for developing courses: instructor application, content review, delivery, and evaluation. Erum outlines access pathways—enrolling without prior Omdena membership, pathways into projects, and an open instructor pipeline—plus community and leadership development through regional chapters and sub-communities. You’ll hear about curriculum tiers (basic to advanced), boosting engagement with live sessions, and maintaining hiring integrity amid plagiarism and responsible ChatGPT use. Practical details include how to apply (Omdena.com/Omdena-Academy), scholarship and GitHub resources, and options for instructors to volunteer or monetize content.

Listen to learn actionable steps for joining, teaching, or designing project-based data science courses that prepare learners for careers in AI.' +dateadded: 2024-05-12 + +duration: PT00H57M03S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=0 + endOffset: 85 +- name: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' + startOffset: 85 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=85 + endOffset: 152 +- name: 'Background: Journey from Pakistan to PhD & community teaching' + startOffset: 152 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=152 + endOffset: 303 +- name: 'Omdena Academy: Evolution from projects to structured courses' + startOffset: 303 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=303 + endOffset: 364 +- name: 'Omdena Projects: Global collaborators solving real‑world AI problems' + startOffset: 364 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=364 + endOffset: 619 +- name: 'Project-to-Course Model: Teaching skills learned from projects' + startOffset: 619 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=619 + endOffset: 692 +- name: 'Foundational Data Science Courses: Python, Pandas, NumPy, NLP' + startOffset: 692 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=692 + endOffset: 872 +- name: 'Course Development: Instructor application, content review, delivery' + startOffset: 872 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=872 + endOffset: 952 +- name: 'Business Model: Free learner courses with organizational partnerships' + startOffset: 952 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=952 + endOffset: 1058 +- name: 'Access Pathways: Enroll without prior Omdena membership; pathway to projects' + startOffset: 1058 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1058 + endOffset: 1248 +- name: 'Instructor Pipeline: Open applications and project-based recruitment' + startOffset: 1248 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1248 + endOffset: 1349 +- name: 'Course Engagement: Live sessions, selection process, graduation rates' + startOffset: 1349 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1349 + endOffset: 1458 +- name: 'Selection & Motivation: Prereqs, availability, and incentive programs' + startOffset: 1458 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1458 + endOffset: 1600 +- name: 'Roles & Responsibilities: Teaching focus vs community management' + startOffset: 1600 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1600 + endOffset: 1788 +- name: 'Leadership Development: Network-building and taking initiative' + startOffset: 1788 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1788 + endOffset: 1983 +- name: 'Community Growth Strategy: Start small and scale (AI Wonder Girl example)' + startOffset: 1983 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1983 + endOffset: 2246 +- name: 'Communities for Career Building: Skill discovery and rapid learning' + startOffset: 2246 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2246 + endOffset: 2400 +- name: 'Empowering Sub-communities: Regional chapters, branding, ethics' + startOffset: 2400 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2400 + endOffset: 2570 +- name: 'Boosting Attendance: Clear takeaways and live event value' + startOffset: 2570 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2570 + endOffset: 2793 +- name: 'Curriculum Design: Basic, intermediate, and advanced course tiers' + startOffset: 2793 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2793 + endOffset: 2900 +- name: 'Talent Market Dynamics: Standing out amid data science competition' + startOffset: 2900 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2900 + endOffset: 3056 +- name: 'Hiring Integrity & Tools: Originality, plagiarism, and responsible ChatGPT + use' + startOffset: 3056 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3056 + endOffset: 3147 +- name: 'How to Apply: Becoming an Omdena Academy instructor (Omdena.com/Omdena-Academy)' + startOffset: 3147 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3147 + endOffset: 3203 +- name: 'Monetization Options: Volunteer teaching vs selling courses on platforms' + startOffset: 3203 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3203 + endOffset: 3289 +- name: 'Access & Scholarship Resources: Courses, GitHub projects, and women‑focused + support' + startOffset: 3289 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3289 + endOffset: 3399 +- name: 'Recommended Readings: AI ethics newsletter and curated resources' + startOffset: 3399 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3399 + endOffset: 3466 +- name: Episode Wrap-Up & Closing Remarks + startOffset: 3466 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3466 + endOffset: 3423 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' @@ -557,138 +677,6 @@ transcript: sec: 3508 time: '58:28' who: Alexey -description: 'Discover Omdena Academy''s project-to-course AI education for data science: - learn Python, NLP, instructor paths, and gain real-world project experience.' -intro: 'How do you turn real-world AI project experience into repeatable courses that - launch data science careers and train instructors? In this episode, Erum Afzal — - lead ML engineer, Teaching Expert at Women in AI Academy, and PhD researcher in - AI for teacher training — explains how Omdena Academy evolved from collaborative - projects into a project-to-course model for AI education.

We cover the - Academy’s shift from global Omdena projects to structured data science courses, - foundational topics taught (Python, Pandas, NumPy, NLP), and the process for developing - courses: instructor application, content review, delivery, and evaluation. Erum - outlines access pathways—enrolling without prior Omdena membership, pathways into - projects, and an open instructor pipeline—plus community and leadership development - through regional chapters and sub-communities. You’ll hear about curriculum tiers - (basic to advanced), boosting engagement with live sessions, and maintaining hiring - integrity amid plagiarism and responsible ChatGPT use. Practical details include - how to apply (Omdena.com/Omdena-Academy), scholarship and GitHub resources, and - options for instructors to volunteer or monetize content.

Listen to learn - actionable steps for joining, teaching, or designing project-based data science - courses that prepare learners for careers in AI.' -dateadded: '2024-05-12' -duration: PT00H57M03S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=0 - endOffset: 85 -- name: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' - startOffset: 85 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=85 - endOffset: 152 -- name: 'Background: Journey from Pakistan to PhD & community teaching' - startOffset: 152 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=152 - endOffset: 303 -- name: 'Omdena Academy: Evolution from projects to structured courses' - startOffset: 303 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=303 - endOffset: 364 -- name: 'Omdena Projects: Global collaborators solving real‑world AI problems' - startOffset: 364 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=364 - endOffset: 619 -- name: 'Project-to-Course Model: Teaching skills learned from projects' - startOffset: 619 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=619 - endOffset: 692 -- name: 'Foundational Data Science Courses: Python, Pandas, NumPy, NLP' - startOffset: 692 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=692 - endOffset: 872 -- name: 'Course Development: Instructor application, content review, delivery' - startOffset: 872 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=872 - endOffset: 952 -- name: 'Business Model: Free learner courses with organizational partnerships' - startOffset: 952 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=952 - endOffset: 1058 -- name: 'Access Pathways: Enroll without prior Omdena membership; pathway to projects' - startOffset: 1058 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1058 - endOffset: 1248 -- name: 'Instructor Pipeline: Open applications and project-based recruitment' - startOffset: 1248 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1248 - endOffset: 1349 -- name: 'Course Engagement: Live sessions, selection process, graduation rates' - startOffset: 1349 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1349 - endOffset: 1458 -- name: 'Selection & Motivation: Prereqs, availability, and incentive programs' - startOffset: 1458 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1458 - endOffset: 1600 -- name: 'Roles & Responsibilities: Teaching focus vs community management' - startOffset: 1600 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1600 - endOffset: 1788 -- name: 'Leadership Development: Network-building and taking initiative' - startOffset: 1788 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1788 - endOffset: 1983 -- name: 'Community Growth Strategy: Start small and scale (AI Wonder Girl example)' - startOffset: 1983 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1983 - endOffset: 2246 -- name: 'Communities for Career Building: Skill discovery and rapid learning' - startOffset: 2246 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2246 - endOffset: 2400 -- name: 'Empowering Sub-communities: Regional chapters, branding, ethics' - startOffset: 2400 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2400 - endOffset: 2570 -- name: 'Boosting Attendance: Clear takeaways and live event value' - startOffset: 2570 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2570 - endOffset: 2793 -- name: 'Curriculum Design: Basic, intermediate, and advanced course tiers' - startOffset: 2793 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2793 - endOffset: 2900 -- name: 'Talent Market Dynamics: Standing out amid data science competition' - startOffset: 2900 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2900 - endOffset: 3056 -- name: 'Hiring Integrity & Tools: Originality, plagiarism, and responsible ChatGPT - use' - startOffset: 3056 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3056 - endOffset: 3147 -- name: 'How to Apply: Becoming an Omdena Academy instructor (Omdena.com/Omdena-Academy)' - startOffset: 3147 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3147 - endOffset: 3203 -- name: 'Monetization Options: Volunteer teaching vs selling courses on platforms' - startOffset: 3203 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3203 - endOffset: 3289 -- name: 'Access & Scholarship Resources: Courses, GitHub projects, and women‑focused - support' - startOffset: 3289 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3289 - endOffset: 3399 -- name: 'Recommended Readings: AI ethics newsletter and curated resources' - startOffset: 3399 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3399 - endOffset: 3466 -- name: Episode Wrap-Up & Closing Remarks - startOffset: 3466 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3466 - endOffset: 3423 --- Links: diff --git a/_podcast/s18e07-building-domestic-risk-assessment-tool.md b/_podcast/to-update/s18e07-building-domestic-risk-assessment-tool.md similarity index 68% rename from _podcast/s18e07-building-domestic-risk-assessment-tool.md rename to _podcast/to-update/s18e07-building-domestic-risk-assessment-tool.md index 21e353e6..6074f9ec 100644 --- a/_podcast/s18e07-building-domestic-risk-assessment-tool.md +++ b/_podcast/to-update/s18e07-building-domestic-risk-assessment-tool.md @@ -1,40 +1,27 @@ --- +title: "Context: The episode follows the end-to-end effort to create a domestic risk assessment tool—framing the problem, assembling and preparing data, designing and evaluating models, addressing privacy, ethics and legal needs, integrating with frontline workflows and interfaces, engaging stakeholders, monitoring performance, and planning for scale, funding, and reproducibility. + +Core theme: Designing and operationalizing a people-centered, data-driven domestic risk assessment that translates technical rigor into trustworthy, ethical, legally compliant, and user-friendly decision support—balancing accuracy, fairness, privacy, and sustainability so models meaningfully improve frontline triage and resource allocation in the real world." +short: Building a Domestic Risk Assessment Tool +season: 18 episode: 7 guests: - sabinafirtala +image: images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg ids: anchor: lub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i youtube: CpWlBAmD9ok -image: images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i apple: https://podcasts.apple.com/us/podcast/building-a-domestic-risk-assessment-tool-sabina-firtala/id1541710331?i=1000662124309 spotify: https://open.spotify.com/episode/7bjORhGzTQoxtbv60mMtzW?si=p6UaBdZJTnGvlwbGb6AsFQ youtube: https://www.youtube.com/watch?v=CpWlBAmD9ok -season: 18 -short: Building a Domestic Risk Assessment Tool -title: 'Build a Domestic Risk Assessment Tool for Triage: Data, Models, Privacy & - Deployment' -description: 'Learn to build a domestic risk assessment tool: data cleaning, risk - scoring models, privacy and deployment strategies to improve triage and resource - allocation.' -intro: 'How do you build a domestic risk assessment tool that meaningfully improves - triage while protecting people’s privacy and avoiding bias? In this episode, Sabina - Firtala from Frontline’s AI product development walks through the end-to-end process - of building a domestic risk assessment tool for triage. Sabina brings hands-on experience - across data wrangling, visualization, statistical testing, model training and validation, - with a background in Natural Sciences and prior analyst roles in finance and SaaS, - plus freelance work for mission-driven projects.

We cover problem framing - and project scope, data sources (case management systems, public records, surveys), - and data preparation: cleaning, linking and feature engineering. Sabina explains - risk scoring and model architecture, evaluation metrics and bias assessment, and - practical privacy, ethical and legal compliance measures. Deployment topics include - integrating risk tools into frontline workflows, user interface and decision-support - design, stakeholder training and trust, plus monitoring for model drift and alerts. - The episode also addresses operational constraints, partnerships, funding and open - resources. Listen for concrete guidance on building, evaluating and deploying a - domestic risk assessment tool—focused on impact, fairness, privacy and sustainability.' -dateadded: '2024-07-15' + +description: 'Learn to build a domestic risk assessment tool: data cleaning, risk scoring models, privacy and deployment strategies to improve triage and resource allocation.' +intro: 'How do you build a domestic risk assessment tool that meaningfully improves triage while protecting people’s privacy and avoiding bias? In this episode, Sabina Firtala from Frontline’s AI product development walks through the end-to-end process of building a domestic risk assessment tool for triage. Sabina brings hands-on experience across data wrangling, visualization, statistical testing, model training and validation, with a background in Natural Sciences and prior analyst roles in finance and SaaS, plus freelance work for mission-driven projects.

We cover problem framing and project scope, data sources (case management systems, public records, surveys), and data preparation: cleaning, linking and feature engineering. Sabina explains risk scoring and model architecture, evaluation metrics and bias assessment, and practical privacy, ethical and legal compliance measures. Deployment topics include integrating risk tools into frontline workflows, user interface and decision-support design, stakeholder training and trust, plus monitoring for model drift and alerts. The episode also addresses operational constraints, partnerships, funding and open resources. Listen for concrete guidance on building, evaluating and deploying a domestic risk assessment tool—focused on impact, fairness, privacy and sustainability.' +dateadded: 2024-07-15 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -124,6 +111,7 @@ quotableClips: startOffset: 3840 url: https://www.youtube.com/watch?v=CpWlBAmD9ok&t=3840 endOffset: 3840 + --- Links: diff --git a/_podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.md b/_podcast/to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md similarity index 91% rename from _podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.md rename to _podcast/to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md index 2bb2e108..ed8b805f 100644 --- a/_podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.md +++ b/_podcast/to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md @@ -1,37 +1,120 @@ --- +title: "DataOps is the episode’s unifying idea: treating data and ML work as engineered, production-ready products by applying software best practices—automation, CI/CD, testing and test data, immutable versioning, and observability—plus cultural change and leadership to remove fear, reduce rework and burnout, and shorten cycle time. The through-line argues that operationalizing the full lifecycle (day‑one provisioning through day‑two reliability and day‑three evolution) turns pockets of heroic, ad‑hoc data work into consistent, reliable delivery that enables safe, scalable use of AI and analytics." +short: DataOps, Observability, and The Cure for Data Team Blues +season: 18 episode: 9 guests: - christopherbergh -description: Learn DataOps best practices for observability, CI/CD and deployment - automation to reduce rework, boost model reliability and speed analytics delivery. -intro: How do you move data teams from fragile, firefighting workloads to reliable, - automated production? In this episode, Christopher Bergh of DataKitchen walks through - his career journey from software engineering to data entrepreneurship and tackles - that exact challenge through the lens of DataOps.

You’ll hear a clear definition - of DataOps and why it matters—covering pre-cloud data engineering pain points, early - DevOps lessons, and workforce burnout tied to poor deployment culture. Key topics - include core DataOps practices (automation, observability, productivity), operational - lifecycle thinking (Day One/Two/Three), model reliability and on‑call readiness - for data science, CI/CD pipelines, regression testing and test data for analytics, - and data versioning strategies. The conversation also addresses MLOps and LLMs, - the limits of AI generation versus process improvement, containers versus serverless - tradeoffs, and how observability-first monitoring drives real change.

Listeners - will come away with practical starting steps for individual contributors and leaders - to reduce rework and cycle time, improve deployment automation, and create sustainable - data engineering and ML practices that lower turnover and increase reliability. +image: images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg ids: anchor: atatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f youtube: HzGpIxV8HtA -image: images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f apple: https://podcasts.apple.com/us/podcast/dataops-observability-and-the-cure-for-data-team/id1541710331?i=1000665429770 spotify: https://open.spotify.com/episode/02VoOk5UkMcvfq7VkSOegb youtube: https://www.youtube.com/watch?v=HzGpIxV8HtA -season: 18 -short: DataOps, Observability, and The Cure for Data Team Blues -title: 'DataOps for Data Engineering: Automation, Observability, CI/CD & Reliable - ML Deployments' + +description: Learn DataOps best practices for observability, CI/CD and deployment automation to reduce rework, boost model reliability and speed analytics delivery +intro: How do you move data teams from fragile, firefighting workloads to reliable, automated production? In this episode, Christopher Bergh of DataKitchen walks through his career journey from software engineering to data entrepreneurship and tackles that exact challenge through the lens of DataOps.

You’ll hear a clear definition of DataOps and why it matters—covering pre-cloud data engineering pain points, early DevOps lessons, and workforce burnout tied to poor deployment culture. Key topics include core DataOps practices (automation, observability, productivity), operational lifecycle thinking (Day One/Two/Three), model reliability and on‑call readiness for data science, CI/CD pipelines, regression testing and test data for analytics, and data versioning strategies. The conversation also addresses MLOps and LLMs, the limits of AI generation versus process improvement, containers versus serverless tradeoffs, and how observability-first monitoring drives real change.

Listeners will come away with practical starting steps for individual contributors and leaders to reduce rework and cycle time, improve deployment automation, and create sustainable data engineering and ML practices that lower turnover and increase reliability +dateadded: 2024-09-04 + +duration: PT01H01M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=0 + endOffset: 132 +- name: 'Guest Introduction: Christopher Bergh & DataKitchen' + startOffset: 132 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=132 + endOffset: 245 +- name: 'Career Journey: From Software Engineering to Data Entrepreneurship' + startOffset: 245 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=245 + endOffset: 366 +- name: Pre-cloud Data Engineering Challenges (SQL Server, scaling) + startOffset: 366 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=366 + endOffset: 509 +- name: DevOps Adoption Timeline and Early Lessons + startOffset: 509 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=509 + endOffset: 713 +- name: DataOps Definition and Workforce Burnout Statistics + startOffset: 713 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=713 + endOffset: 807 +- name: 'Deployment Culture: Fear vs. Heroism in Data Teams' + startOffset: 807 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=807 + endOffset: 952 +- name: 'Core DataOps Practices: Automation, Observability, and Productivity' + startOffset: 952 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=952 + endOffset: 1126 +- name: 'DataOps Today: MLOps, LLMs, and Buzzword Clarification' + startOffset: 1126 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1126 + endOffset: 1436 +- name: 'Operational Lifecycle: Day One, Day Two, Day Three' + startOffset: 1436 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1436 + endOffset: 1573 +- name: Model Reliability and On‑call Readiness for Data Science + startOffset: 1573 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1573 + endOffset: 1855 +- name: CI/CD Pipelines, Regression Tests, and Test Data for Analytics + startOffset: 1855 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1855 + endOffset: 2053 +- name: Reducing Rework and Cycle Time in Data Workflows + startOffset: 2053 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2053 + endOffset: 2344 +- name: AI Tools and the Limits of Generation vs. Process Improvement + startOffset: 2344 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2344 + endOffset: 2559 +- name: 'End-to-End Deployment Automation: Version Control and Tests' + startOffset: 2559 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2559 + endOffset: 2670 +- name: 'Variable Adoption: Pockets of Best Practice and Integration Gaps' + startOffset: 2670 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2670 + endOffset: 3029 +- name: 'Observability-First Approach: Monitoring Production to Drive Change' + startOffset: 3029 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3029 + endOffset: 3162 +- name: 'Containers vs. Serverless: Docker, Kubernetes, and Alternatives' + startOffset: 3162 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3162 + endOffset: 3245 +- name: 'Data Versioning Strategy: Immutability and Versioning Code' + startOffset: 3245 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3245 + endOffset: 3495 +- name: 'Culture and Leadership: Lowering Turnover with Better Processes' + startOffset: 3495 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3495 + endOffset: 3514 +- name: Practical Starting Steps for Individual Contributors + startOffset: 3514 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3514 + endOffset: 3680 +- name: Closing Summary and Next Steps + startOffset: 3680 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3680 + endOffset: 3847 +- name: Episode End + startOffset: 3847 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3847 + endOffset: 3715 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Christopher Bergh & DataKitchen' @@ -375,99 +458,4 @@ transcript: sec: 3847 time: '1:04:07' who: Alexey -dateadded: '2024-09-04' -duration: PT01H01M55S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=0 - endOffset: 132 -- name: 'Guest Introduction: Christopher Bergh & DataKitchen' - startOffset: 132 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=132 - endOffset: 245 -- name: 'Career Journey: From Software Engineering to Data Entrepreneurship' - startOffset: 245 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=245 - endOffset: 366 -- name: Pre-cloud Data Engineering Challenges (SQL Server, scaling) - startOffset: 366 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=366 - endOffset: 509 -- name: DevOps Adoption Timeline and Early Lessons - startOffset: 509 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=509 - endOffset: 713 -- name: DataOps Definition and Workforce Burnout Statistics - startOffset: 713 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=713 - endOffset: 807 -- name: 'Deployment Culture: Fear vs. Heroism in Data Teams' - startOffset: 807 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=807 - endOffset: 952 -- name: 'Core DataOps Practices: Automation, Observability, and Productivity' - startOffset: 952 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=952 - endOffset: 1126 -- name: 'DataOps Today: MLOps, LLMs, and Buzzword Clarification' - startOffset: 1126 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1126 - endOffset: 1436 -- name: 'Operational Lifecycle: Day One, Day Two, Day Three' - startOffset: 1436 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1436 - endOffset: 1573 -- name: Model Reliability and On‑call Readiness for Data Science - startOffset: 1573 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1573 - endOffset: 1855 -- name: CI/CD Pipelines, Regression Tests, and Test Data for Analytics - startOffset: 1855 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1855 - endOffset: 2053 -- name: Reducing Rework and Cycle Time in Data Workflows - startOffset: 2053 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2053 - endOffset: 2344 -- name: AI Tools and the Limits of Generation vs. Process Improvement - startOffset: 2344 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2344 - endOffset: 2559 -- name: 'End-to-End Deployment Automation: Version Control and Tests' - startOffset: 2559 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2559 - endOffset: 2670 -- name: 'Variable Adoption: Pockets of Best Practice and Integration Gaps' - startOffset: 2670 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2670 - endOffset: 3029 -- name: 'Observability-First Approach: Monitoring Production to Drive Change' - startOffset: 3029 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3029 - endOffset: 3162 -- name: 'Containers vs. Serverless: Docker, Kubernetes, and Alternatives' - startOffset: 3162 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3162 - endOffset: 3245 -- name: 'Data Versioning Strategy: Immutability and Versioning Code' - startOffset: 3245 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3245 - endOffset: 3495 -- name: 'Culture and Leadership: Lowering Turnover with Better Processes' - startOffset: 3495 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3495 - endOffset: 3514 -- name: Practical Starting Steps for Individual Contributors - startOffset: 3514 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3514 - endOffset: 3680 -- name: Closing Summary and Next Steps - startOffset: 3680 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3680 - endOffset: 3847 -- name: Episode End - startOffset: 3847 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3847 - endOffset: 3715 --- diff --git a/_podcast/s19e01-using-data-to-create-liveable-cities.md b/_podcast/to-update/s19e01-using-data-to-create-liveable-cities.md similarity index 95% rename from _podcast/s19e01-using-data-to-create-liveable-cities.md rename to _podcast/to-update/s19e01-using-data-to-create-liveable-cities.md index b1f0ddef..9f8a1b9f 100644 --- a/_podcast/s19e01-using-data-to-create-liveable-cities.md +++ b/_podcast/to-update/s19e01-using-data-to-create-liveable-cities.md @@ -1,20 +1,150 @@ --- +title: "Context: Cities are complex systems where transport, land use, public space and technology intersect; practitioners juggle short-term operations and long-term planning using streams of sensor, fare, and observational data while balancing livability, equity and privacy. + +Core narrative: A human-centered, data-driven approach to urban mobility—building robust, privacy-aware data pipelines that integrate real-time sensors, fare systems and observational sources, and leveraging predictive models and generative-AI interfaces—enables actionable insights for both operational responsiveness and strategic planning, ultimately shaping walkable, equitable, and livable cities through transparent open data, rigorous data quality, and interdisciplinary collaboration." +short: Using Data to Create Liveable Cities +season: 19 episode: 1 guests: - rachellim +image: images/podcast/s19e01-using-data-to-create-liveable-cities.jpg ids: anchor: atatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup youtube: VXQIGHUWeL0 -image: images/podcast/s19e01-using-data-to-create-liveable-cities.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup apple: https://podcasts.apple.com/us/podcast/using-data-to-create-liveable-cities-rachel-lim/id1541710331?i=1000675373908 spotify: https://open.spotify.com/episode/1z7jdogto8i4Zk6Zh1vDxE?si=KCg2Iq1US0SKwFCKasGqUg youtube: https://www.youtube.com/watch?v=VXQIGHUWeL0 -season: 19 -short: Using Data to Create Liveable Cities -title: How Urban Transport Data & AI Enable Real-Time Monitoring, Demand Forecasting - & Data Pipelines + +description: Discover urban transport data, real-time monitoring and demand forecasting techniques—learn data pipelines, text-to-SQL and AI tools to improve planning & ops +intro: How can urban transport data and AI be combined to enable real-time monitoring, accurate demand forecasting, and reliable data pipelines for city operations and planning? In this episode, Rachel Lim, an urban data scientist with a geography background and a master’s in urban data science, walks through practical approaches that bridge urban design and data engineering. We cover the core data sources—GPS, sensors, fare card systems, ride‑hailing feeds—and where computer vision complements missing passenger-flow data. Rachel explains planning horizons from short‑term operational response (traffic marshals, cameras, event analytics like F1) to long‑term infrastructure planning driven by travel demand forecasting. She breaks down data pipelines and warehousing, real‑time stacks (Kafka, Apache Spark, sensors, APIs), journey logic, anomaly detection, and privacy-preserving publishing to open portals like data.gov.sg and DataMall. The episode also explores generative AI and Text‑to‑SQL workflows, prompt safety, synthetic data, and conversational search for natural‑language access to datasets. Listeners will gain actionable insight into building and governing transport data systems that support real‑time monitoring, demand forecasting, and scalable analytics +dateadded: 2024-11-06 + +duration: PT00H51M32S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=0 + endOffset: 116 +- name: 'Guest Introduction: Rachel Lim, urban data scientist' + startOffset: 116 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=116 + endOffset: 172 +- name: 'Career Path: Geography to urban informatics and data engineering' + startOffset: 172 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=172 + endOffset: 287 +- name: 'Transport Scientist Role: public sector and consultancy applications' + startOffset: 287 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=287 + endOffset: 334 +- name: 'Planning Horizons: short-term operations vs long-term infrastructure' + startOffset: 334 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=334 + endOffset: 407 +- name: 'Data Sources for Transport: GPS, sensors, fare cards, ride-hailing' + startOffset: 407 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=407 + endOffset: 460 +- name: 'Fare Card Systems: tap-in/tap-out travel data mechanics' + startOffset: 460 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=460 + endOffset: 500 +- name: Computer Vision for Passenger Flow where fare data is absent + startOffset: 500 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=500 + endOffset: 535 +- name: 'Professional Motivation: internships, World City Summit, master’s study' + startOffset: 535 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=535 + endOffset: 686 +- name: 'Urban Design Principles: walkability, public spaces, human-scale streets' + startOffset: 686 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=686 + endOffset: 829 +- name: 'Livability Criteria: transport, housing, green space, digital access' + startOffset: 829 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=829 + endOffset: 948 +- name: 'Singapore Planning Practices: Master Plan and placemaking initiatives' + startOffset: 948 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=948 + endOffset: 1104 +- name: 'Open Data & Collaboration: public datasets enabling research and apps' + startOffset: 1104 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1104 + endOffset: 1269 +- name: 'Travel Demand Forecasting: predicting movements for infrastructure planning' + startOffset: 1269 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1269 + endOffset: 1381 +- name: 'Data Pipelines & Warehousing: aggregation of real-time and historical data' + startOffset: 1381 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1381 + endOffset: 1449 +- name: 'Real-Time Monitoring: traffic management and event analytics (F1 example)' + startOffset: 1449 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1449 + endOffset: 1510 +- name: 'Operational Response: cameras, recovery services, traffic marshals' + startOffset: 1510 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1510 + endOffset: 1679 +- name: 'Generative AI in Data Engineering: natural-language access to data' + startOffset: 1679 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1679 + endOffset: 1999 +- name: 'Text-to-SQL Architecture: metadata, vector DB, RAG and LLMs' + startOffset: 1999 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1999 + endOffset: 2118 +- name: 'Prompt Engineering & Query Safety: reliability and SQL restrictions' + startOffset: 2118 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2118 + endOffset: 2192 +- name: 'Dataset Scale: millions of fare card records and demand analytics' + startOffset: 2192 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2192 + endOffset: 2314 +- name: 'Infrastructure Stack: Kafka, Apache Spark, sensors, real-time APIs' + startOffset: 2314 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2314 + endOffset: 2367 +- name: 'Journey Logic & Aggregation: trip definition and fare computation' + startOffset: 2367 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2367 + endOffset: 2468 +- name: 'Data Quality Management: anomaly detection and sensor reliability' + startOffset: 2468 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2468 + endOffset: 2537 +- name: 'Generative AI Use Cases: synthetic data and conversational search' + startOffset: 2537 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2537 + endOffset: 2740 +- name: 'Privacy & Publishing: masking sensitive data before release' + startOffset: 2740 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2740 + endOffset: 2760 +- name: 'Singapore Open Data Portals: data.gov.sg and DataMall resources' + startOffset: 2760 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2760 + endOffset: 2825 +- name: 'Project Ideas for Learning: car parking and real-time taxi datasets' + startOffset: 2825 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2825 + endOffset: 2956 +- name: 'Recommended Resources: DataTalks.Club, Jane Jacobs, Happy City' + startOffset: 2956 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2956 + endOffset: 3162 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3162 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=3162 + endOffset: 3092 + transcript: - header: Episode Introduction - header: 'Guest Introduction: Rachel Lim, urban data scientist' @@ -786,148 +916,6 @@ transcript: sec: 3208 time: '53:28' who: Alexey -description: Discover urban transport data, real-time monitoring and demand forecasting - techniques—learn data pipelines, text-to-SQL and AI tools to improve planning & - ops. -intro: How can urban transport data and AI be combined to enable real-time monitoring, - accurate demand forecasting, and reliable data pipelines for city operations and - planning? In this episode, Rachel Lim, an urban data scientist with a geography background - and a master’s in urban data science, walks through practical approaches that bridge - urban design and data engineering. We cover the core data sources—GPS, sensors, - fare card systems, ride‑hailing feeds—and where computer vision complements missing - passenger-flow data. Rachel explains planning horizons from short‑term operational - response (traffic marshals, cameras, event analytics like F1) to long‑term infrastructure - planning driven by travel demand forecasting. She breaks down data pipelines and - warehousing, real‑time stacks (Kafka, Apache Spark, sensors, APIs), journey logic, - anomaly detection, and privacy-preserving publishing to open portals like data.gov.sg - and DataMall. The episode also explores generative AI and Text‑to‑SQL workflows, - prompt safety, synthetic data, and conversational search for natural‑language access - to datasets. Listeners will gain actionable insight into building and governing - transport data systems that support real‑time monitoring, demand forecasting, and - scalable analytics. -dateadded: '2024-11-06' -duration: PT00H51M32S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=0 - endOffset: 116 -- name: 'Guest Introduction: Rachel Lim, urban data scientist' - startOffset: 116 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=116 - endOffset: 172 -- name: 'Career Path: Geography to urban informatics and data engineering' - startOffset: 172 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=172 - endOffset: 287 -- name: 'Transport Scientist Role: public sector and consultancy applications' - startOffset: 287 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=287 - endOffset: 334 -- name: 'Planning Horizons: short-term operations vs long-term infrastructure' - startOffset: 334 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=334 - endOffset: 407 -- name: 'Data Sources for Transport: GPS, sensors, fare cards, ride-hailing' - startOffset: 407 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=407 - endOffset: 460 -- name: 'Fare Card Systems: tap-in/tap-out travel data mechanics' - startOffset: 460 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=460 - endOffset: 500 -- name: Computer Vision for Passenger Flow where fare data is absent - startOffset: 500 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=500 - endOffset: 535 -- name: 'Professional Motivation: internships, World City Summit, master’s study' - startOffset: 535 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=535 - endOffset: 686 -- name: 'Urban Design Principles: walkability, public spaces, human-scale streets' - startOffset: 686 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=686 - endOffset: 829 -- name: 'Livability Criteria: transport, housing, green space, digital access' - startOffset: 829 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=829 - endOffset: 948 -- name: 'Singapore Planning Practices: Master Plan and placemaking initiatives' - startOffset: 948 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=948 - endOffset: 1104 -- name: 'Open Data & Collaboration: public datasets enabling research and apps' - startOffset: 1104 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1104 - endOffset: 1269 -- name: 'Travel Demand Forecasting: predicting movements for infrastructure planning' - startOffset: 1269 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1269 - endOffset: 1381 -- name: 'Data Pipelines & Warehousing: aggregation of real-time and historical data' - startOffset: 1381 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1381 - endOffset: 1449 -- name: 'Real-Time Monitoring: traffic management and event analytics (F1 example)' - startOffset: 1449 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1449 - endOffset: 1510 -- name: 'Operational Response: cameras, recovery services, traffic marshals' - startOffset: 1510 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1510 - endOffset: 1679 -- name: 'Generative AI in Data Engineering: natural-language access to data' - startOffset: 1679 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1679 - endOffset: 1999 -- name: 'Text-to-SQL Architecture: metadata, vector DB, RAG and LLMs' - startOffset: 1999 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1999 - endOffset: 2118 -- name: 'Prompt Engineering & Query Safety: reliability and SQL restrictions' - startOffset: 2118 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2118 - endOffset: 2192 -- name: 'Dataset Scale: millions of fare card records and demand analytics' - startOffset: 2192 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2192 - endOffset: 2314 -- name: 'Infrastructure Stack: Kafka, Apache Spark, sensors, real-time APIs' - startOffset: 2314 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2314 - endOffset: 2367 -- name: 'Journey Logic & Aggregation: trip definition and fare computation' - startOffset: 2367 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2367 - endOffset: 2468 -- name: 'Data Quality Management: anomaly detection and sensor reliability' - startOffset: 2468 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2468 - endOffset: 2537 -- name: 'Generative AI Use Cases: synthetic data and conversational search' - startOffset: 2537 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2537 - endOffset: 2740 -- name: 'Privacy & Publishing: masking sensitive data before release' - startOffset: 2740 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2740 - endOffset: 2760 -- name: 'Singapore Open Data Portals: data.gov.sg and DataMall resources' - startOffset: 2760 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2760 - endOffset: 2825 -- name: 'Project Ideas for Learning: car parking and real-time taxi datasets' - startOffset: 2825 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2825 - endOffset: 2956 -- name: 'Recommended Resources: DataTalks.Club, Jane Jacobs, Happy City' - startOffset: 2956 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2956 - endOffset: 3162 -- name: Episode Wrap-Up and Closing Remarks - startOffset: 3162 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=3162 - endOffset: 3092 --- Links: diff --git a/_podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.md b/_podcast/to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md similarity index 94% rename from _podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.md rename to _podcast/to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md index dd67f6b8..24e6a2e1 100644 --- a/_podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.md +++ b/_podcast/to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md @@ -1,19 +1,142 @@ --- +title: "Context: The episode surveys how linguistics and computational methods intersect to address limitations of mainstream ASR for people with disordered, accented, or atypical speech — covering phonetics and morpho‑syntax foundations, distinctions between accent and disorder, modern ASR advances and failure modes, data collection and GDPR constraints, targeted datasets and augmentation, multimodal and transfer approaches, personalization and on‑device deployment, and the ethical/assistive implications. + +Core: Build ASR systems that are human‑centered and linguistically informed—prioritizing inclusive data practices, phonetics‑aware modeling, adaptive techniques (augmentation, transfer learning, multimodal cues, personalization), and ethical deployment—so speech technology recognizes and respects the communicative diversity and needs of people with disordered or atypical speech." +short: Human-Centered AI for Disordered Speech Recognition +season: 19 episode: 2 guests: - katarzynaforemniak +image: images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg ids: anchor: atatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 youtube: yTZ4cddD7DU -image: images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 apple: https://podcasts.apple.com/us/podcast/human-centered-ai-for-disordered-speech-recognition/id1541710331?i=1000671805368 spotify: https://open.spotify.com/show/0pck8zuiXdI0OrCg86DAPy?si=ac857db69d484277 youtube: https://www.youtube.com/watch?v=yTZ4cddD7DU -season: 19 -short: Human-Centered AI for Disordered Speech Recognition -title: 'Human-Centered ASR for Disordered Speech: Data, Multimodal Cues & Personalization' + +description: 'Learn ASR strategies for disordered speech: data, multimodal cues and personalization to build robust assistive voice systems and on-device speech tools.' +intro: 'How can automatic speech recognition (ASR) systems reliably understand disordered and atypical speech without compromising user identity or privacy? In this episode Katarzyna Foremniak, a computational linguist with 10+ years in NLP who developed language models for Audi and Porsche and teaches at the University of Warsaw, tackles that question through a human‑centered lens.

We explore core phonetics and morpho‑syntax concepts that matter for disordered speech, distinctions between accents and disorders, and practical limits of modern models (e.g., Whisper) when faced with atypical articulation, stammering, and voice quality variation. Katarzyna walks through data‑driven strategies: specialized datasets, data augmentation, transfer learning and fine‑tuning with limited data, plus multimodal ASR approaches that integrate lip‑reading and visual cues. The conversation also covers data collection challenges (GDPR, clinical data, language and dialect coverage), personalization and on‑device adaptation, and assistive and automotive use cases with deployment constraints.

If you work on speech recognition, accessibility, or multilingual NLP, this episode offers concrete technical strategies and ethical considerations for building personalized, multimodal ASR systems that better serve people with speech disorders.' +dateadded: 2024-10-10 + +duration: PT00H57M19S + +quotableClips: +- name: 'Episode Introduction: Human‑Centered AI for Disordered Speech' + startOffset: 0 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=0 + endOffset: 486 +- name: Guest Introduction & Career Highlights (Katarzyna Foremniak) + startOffset: 486 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=486 + endOffset: 546 +- name: 'From Linguistics to Computational Linguistics: Transition & Skills' + startOffset: 546 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=546 + endOffset: 802 +- name: 'Linguistics Meets Computer Science: Data‑driven Approaches' + startOffset: 802 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=802 + endOffset: 925 +- name: 'Phonetics & Morpho‑syntax Explained: Core Concepts for ASR' + startOffset: 925 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=925 + endOffset: 1233 +- name: 'Phonetics and Speech Disorders: Articulation, Fluency, Voice Quality' + startOffset: 1233 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1233 + endOffset: 1399 +- name: 'Accents vs Speech Disorders: Variation, Identity, and Comprehension' + startOffset: 1399 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1399 + endOffset: 1481 +- name: 'ASR Progress: Modern Models (Whisper) and Improved Accent Handling' + startOffset: 1481 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1481 + endOffset: 1651 +- name: 'ASR Fundamentals: Standard Speech Datasets and Reference Speech' + startOffset: 1651 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1651 + endOffset: 1824 +- name: 'ASR Limitations with Atypical Speech: Training/Deployment Gaps' + startOffset: 1824 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1824 + endOffset: 1853 +- name: 'Strategies for Disordered Speech Recognition: Specialized Datasets & Adaptation' + startOffset: 1853 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1853 + endOffset: 2227 +- name: 'Data Augmentation for Disordered Speech: Synthetic Variations' + startOffset: 2227 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2227 + endOffset: 2253 +- name: 'Multimodal ASR: Integrating Lip‑reading and Visual Cues' + startOffset: 2253 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2253 + endOffset: 2417 +- name: 'Transfer Learning for ASR: Fine‑tuning with Limited Data' + startOffset: 2417 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2417 + endOffset: 2470 +- name: 'Data Collection Challenges: GDPR, Clinical Data, Language Coverage' + startOffset: 2470 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2470 + endOffset: 2538 +- name: 'Language & Dialect Effects: Bilingualism and Disorder Variability' + startOffset: 2538 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2538 + endOffset: 2671 +- name: 'Stammering & Fluency Issues: Characteristics and Recognition Needs' + startOffset: 2671 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2671 + endOffset: 2716 +- name: 'Pronunciation Challenges: Polish Consonant Clusters and Phonetics' + startOffset: 2716 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2716 + endOffset: 2777 +- name: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post‑processing' + startOffset: 2777 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2777 + endOffset: 2848 +- name: 'Contextual Language Models in ASR: Meaning Preservation vs WER' + startOffset: 2848 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2848 + endOffset: 3087 +- name: 'Utterance Analysis in ASR: Phonemes, Words, and Contextual Prediction' + startOffset: 3087 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3087 + endOffset: 3245 +- name: 'Personalized ASR: User Adaptation, Fine‑tuning, and On‑device Setup' + startOffset: 3245 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3245 + endOffset: 3480 +- name: 'Assistive Applications: Communication Tools for People with Disorders' + startOffset: 3480 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3480 + endOffset: 3602 +- name: 'Model Size & Deployment Constraints: Mobile and Edge Considerations' + startOffset: 3602 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3602 + endOffset: 3713 +- name: 'In‑Car Voice Recognition: Automotive Use Cases and Limitations' + startOffset: 3713 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3713 + endOffset: 3807 +- name: 'Notable Failure Examples: Elevator/Car Voice Recognition Humor' + startOffset: 3807 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3807 + endOffset: 3853 +- name: 'Closing Reflections: Human‑Centered AI Priorities & Further Reading' + startOffset: 3853 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3853 + endOffset: 3925 +- name: Episode Sign‑off and Guest Thanks + startOffset: 3925 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3925 + endOffset: 3439 + transcript: - header: 'Episode Introduction: Human‑Centered AI for Disordered Speech' - header: Guest Introduction & Career Highlights (Katarzyna Foremniak) @@ -790,140 +913,6 @@ transcript: sec: 3925 time: '1:05:25' who: Katarzyna -description: 'Learn ASR strategies for disordered speech: data, multimodal cues and - personalization to build robust assistive voice systems and on-device speech tools.' -intro: 'How can automatic speech recognition (ASR) systems reliably understand disordered - and atypical speech without compromising user identity or privacy? In this episode - Katarzyna Foremniak, a computational linguist with 10+ years in NLP who developed - language models for Audi and Porsche and teaches at the University of Warsaw, tackles - that question through a human‑centered lens.

We explore core phonetics - and morpho‑syntax concepts that matter for disordered speech, distinctions between - accents and disorders, and practical limits of modern models (e.g., Whisper) when - faced with atypical articulation, stammering, and voice quality variation. Katarzyna - walks through data‑driven strategies: specialized datasets, data augmentation, transfer - learning and fine‑tuning with limited data, plus multimodal ASR approaches that - integrate lip‑reading and visual cues. The conversation also covers data collection - challenges (GDPR, clinical data, language and dialect coverage), personalization - and on‑device adaptation, and assistive and automotive use cases with deployment - constraints.

If you work on speech recognition, accessibility, or multilingual - NLP, this episode offers concrete technical strategies and ethical considerations - for building personalized, multimodal ASR systems that better serve people with - speech disorders.' -dateadded: '2024-10-10' -duration: PT00H57M19S -quotableClips: -- name: 'Episode Introduction: Human‑Centered AI for Disordered Speech' - startOffset: 0 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=0 - endOffset: 486 -- name: Guest Introduction & Career Highlights (Katarzyna Foremniak) - startOffset: 486 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=486 - endOffset: 546 -- name: 'From Linguistics to Computational Linguistics: Transition & Skills' - startOffset: 546 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=546 - endOffset: 802 -- name: 'Linguistics Meets Computer Science: Data‑driven Approaches' - startOffset: 802 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=802 - endOffset: 925 -- name: 'Phonetics & Morpho‑syntax Explained: Core Concepts for ASR' - startOffset: 925 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=925 - endOffset: 1233 -- name: 'Phonetics and Speech Disorders: Articulation, Fluency, Voice Quality' - startOffset: 1233 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1233 - endOffset: 1399 -- name: 'Accents vs Speech Disorders: Variation, Identity, and Comprehension' - startOffset: 1399 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1399 - endOffset: 1481 -- name: 'ASR Progress: Modern Models (Whisper) and Improved Accent Handling' - startOffset: 1481 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1481 - endOffset: 1651 -- name: 'ASR Fundamentals: Standard Speech Datasets and Reference Speech' - startOffset: 1651 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1651 - endOffset: 1824 -- name: 'ASR Limitations with Atypical Speech: Training/Deployment Gaps' - startOffset: 1824 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1824 - endOffset: 1853 -- name: 'Strategies for Disordered Speech Recognition: Specialized Datasets & Adaptation' - startOffset: 1853 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1853 - endOffset: 2227 -- name: 'Data Augmentation for Disordered Speech: Synthetic Variations' - startOffset: 2227 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2227 - endOffset: 2253 -- name: 'Multimodal ASR: Integrating Lip‑reading and Visual Cues' - startOffset: 2253 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2253 - endOffset: 2417 -- name: 'Transfer Learning for ASR: Fine‑tuning with Limited Data' - startOffset: 2417 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2417 - endOffset: 2470 -- name: 'Data Collection Challenges: GDPR, Clinical Data, Language Coverage' - startOffset: 2470 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2470 - endOffset: 2538 -- name: 'Language & Dialect Effects: Bilingualism and Disorder Variability' - startOffset: 2538 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2538 - endOffset: 2671 -- name: 'Stammering & Fluency Issues: Characteristics and Recognition Needs' - startOffset: 2671 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2671 - endOffset: 2716 -- name: 'Pronunciation Challenges: Polish Consonant Clusters and Phonetics' - startOffset: 2716 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2716 - endOffset: 2777 -- name: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post‑processing' - startOffset: 2777 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2777 - endOffset: 2848 -- name: 'Contextual Language Models in ASR: Meaning Preservation vs WER' - startOffset: 2848 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2848 - endOffset: 3087 -- name: 'Utterance Analysis in ASR: Phonemes, Words, and Contextual Prediction' - startOffset: 3087 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3087 - endOffset: 3245 -- name: 'Personalized ASR: User Adaptation, Fine‑tuning, and On‑device Setup' - startOffset: 3245 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3245 - endOffset: 3480 -- name: 'Assistive Applications: Communication Tools for People with Disorders' - startOffset: 3480 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3480 - endOffset: 3602 -- name: 'Model Size & Deployment Constraints: Mobile and Edge Considerations' - startOffset: 3602 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3602 - endOffset: 3713 -- name: 'In‑Car Voice Recognition: Automotive Use Cases and Limitations' - startOffset: 3713 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3713 - endOffset: 3807 -- name: 'Notable Failure Examples: Elevator/Car Voice Recognition Humor' - startOffset: 3807 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3807 - endOffset: 3853 -- name: 'Closing Reflections: Human‑Centered AI Priorities & Further Reading' - startOffset: 3853 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3853 - endOffset: 3925 -- name: Episode Sign‑off and Guest Thanks - startOffset: 3925 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3925 - endOffset: 3439 --- Links: diff --git a/_podcast/s19e03-datatalks-club-anniversary-podcast.md b/_podcast/to-update/s19e03-datatalks-club-anniversary-podcast.md similarity index 96% rename from _podcast/s19e03-datatalks-club-anniversary-podcast.md rename to _podcast/to-update/s19e03-datatalks-club-anniversary-podcast.md index e512551e..d2c6fd2b 100644 --- a/_podcast/s19e03-datatalks-club-anniversary-podcast.md +++ b/_podcast/to-update/s19e03-datatalks-club-anniversary-podcast.md @@ -1,34 +1,152 @@ --- +title: "Context: Born during COVID as a volunteer meetup, DataTalks.Club scaled organically into a free-to-learn, community-first education platform—driven by practical courses (data engineering, ML, MLOps, LLMs), events, mentorship, and hands-on student success—while the founders stayed technically engaged and navigated financial, operational, and growth trade-offs. + +Core: The unifying idea is that sustainable, founder-led communities that combine technical experimentation and product-building with human-centered connection (mentorship, events, accessible learning) create lasting impact and resilience—allowing thoughtful stewardship to adapt to AI-driven change, achieve product-market fit, and scale education without sacrificing community values." +short: DataTalks.Club Anniversary Podcast +season: 19 episode: 3 guests: - alexeygrigorev -description: How do you grow an open, free-to-learn data community into a sustainable - education platform? In this episode, Alexey Grigorev — who founded DataTalks.Club - during COVID and later transitioned to running it full-time — walks through the - practical decisions and trade-offs behind building courses, community, and a product. - We cover the course portfolio (machine learning, data engineering, MLOps, LLMs, - stock analytics), organic growth from the Data Engineering Zoomcamp, and the technical - stack for scaling (a Django-based course platform). Johanna shares lessons on sponsorship - dynamics, prepaid tax realities in Germany, protecting community safety from scams, - and staying technical through pet projects, LLM experiments and an automated storytelling - pipeline. You’ll also hear about launching an LLM course and RAG experiments, early - validation and product-market fit, scaling challenges and loneliness, plus concrete - ways to contribute—guesting, mentoring, or joining project weeks. Listen for actionable - insights on running an online data community, course productization, community moderation, - and practical next steps if you want to build or support a data education ecosystem. +image: images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg ids: anchor: atatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 youtube: GHbeXIKnkLQ -image: images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 apple: https://podcasts.apple.com/us/podcast/datatalks-club-4th-anniversary-ama-podcast-alexey-grigorev/id1541710331?i=1000674473200 spotify: https://open.spotify.com/episode/50wIZxjq6goREu9pwXYITP?si=mPW0v5fBQxuBpg622CpCEA youtube: https://www.youtube.com/watch?v=GHbeXIKnkLQ -season: 19 -short: DataTalks.Club Anniversary Podcast -title: 'Inside Scaling DataTalks.Club: How We Built Free Data Engineering, MLOps & - LLM Courses' + +description: How do you grow an open, free-to-learn data community into a sustainable education platform? In this episode, Alexey Grigorev — who founded DataTalks.Club during COVID and later transitioned to running it full-time — walks through the practical decisions and trade-offs behind building courses, community, and a product. We cover the course portfolio (machine learning, data engineering, MLOps, LLMs, stock analytics), organic growth from the Data Engineering Zoomcamp, and the technical stack for scaling (a Django-based course platform). Johanna shares lessons on sponsorship dynamics, prepaid tax realities in Germany, protecting community safety from scams, and staying technical through pet projects, LLM experiments and an automated storytelling pipeline. You’ll also hear about launching an LLM course and RAG experiments, early validation and product-market fit, scaling challenges and loneliness, plus concrete ways to contribute—guesting, mentoring, or joining project weeks. Listen for actionable insights on running an online data community, course productization, community moderation, and practical next steps if you want to build or support a data education ecosystem +dateadded: 2024-11-08 + +duration: PT01H03M17S + +quotableClips: +- name: Podcast Welcome & AMA Format (community links and live questions) + startOffset: 0 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=0 + endOffset: 95 +- name: 'Host Intro: Johanna as special host' + startOffset: 95 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=95 + endOffset: 149 +- name: 'Origin Story: Founding DataTalks.Club during COVID' + startOffset: 149 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=149 + endOffset: 232 +- name: 'Career Shift: Transition to running DataTalks.Club full-time' + startOffset: 232 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=232 + endOffset: 246 +- name: 'Financial Decision: Leaving corporate work and early sustainability' + startOffset: 246 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=246 + endOffset: 307 +- name: 'Course Portfolio: Machine Learning, Data Engineering, MLOps, LLMs, Stock + Analytics' + startOffset: 307 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=307 + endOffset: 493 +- name: 'Organic Growth: Word-of-mouth success of Data Engineering Zoomcamp' + startOffset: 493 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=493 + endOffset: 562 +- name: 'Community Safety: Upwork scam awareness and moderation tips' + startOffset: 562 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=562 + endOffset: 724 +- name: 'Mission: Free-to-learn education inspired by Open Data Science' + startOffset: 724 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=724 + endOffset: 987 +- name: 'Community Impact: Student success stories and donations' + startOffset: 987 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=987 + endOffset: 1076 +- name: 'Sponsorship Dynamics: Revenue volatility and runway management' + startOffset: 1076 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1076 + endOffset: 1214 +- name: 'Taxes & Cashflow: Prepaid tax system in Germany' + startOffset: 1214 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1214 + endOffset: 1443 +- name: 'Staying Technical: Pet projects, LLM experiments, and automated storytelling + pipeline' + startOffset: 1443 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1443 + endOffset: 1603 +- name: 'Product Work: Building the course platform in Django to scale courses' + startOffset: 1603 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1603 + endOffset: 1754 +- name: 'LLMs & RAG: From skepticism to launching an LLM course' + startOffset: 1754 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1754 + endOffset: 1910 +- name: 'Life Update: Reflections on full-time community work and no regrets' + startOffset: 1910 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1910 + endOffset: 2020 +- name: 'Early Validation: First event success and finding product-market fit' + startOffset: 2020 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2020 + endOffset: 2197 +- name: 'Community Longevity: Active engagement, investment, and self-organization' + startOffset: 2197 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2197 + endOffset: 2354 +- name: 'AI and Roles: Impact of AutoML/LLMs on data analysts and data scientists' + startOffset: 2354 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2354 + endOffset: 2544 +- name: 'AI in Healthcare: Human touch versus automated assistance' + startOffset: 2544 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2544 + endOffset: 2744 +- name: 'Scaling Challenges: Time investment, loneliness, and rejecting acquisition + offers' + startOffset: 2744 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2744 + endOffset: 2882 +- name: 'Networking Benefits: Masterminds, meetups, and personal connections' + startOffset: 2882 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2882 + endOffset: 2989 +- name: 'Growth Objectives: More sponsors, new courses, and instructor autonomy' + startOffset: 2989 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2989 + endOffset: 3098 +- name: 'How to Help: Be a guest, mentor in Slack, and join Project of the Week' + startOffset: 3098 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3098 + endOffset: 3226 +- name: 'Events Roadmap: Competitions, future hackathons, and ML course contests' + startOffset: 3226 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3226 + endOffset: 3329 +- name: 'Course Schedule: Stock market analytics rerun and upcoming workshops' + startOffset: 3329 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3329 + endOffset: 3401 +- name: 'Podcast Workflow: Guest research, question prep, and interview process' + startOffset: 3401 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3401 + endOffset: 3527 +- name: 'Career Advice: Starting in data science now and junior hiring realities' + startOffset: 3527 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3527 + endOffset: 3670 +- name: 'Personal Reads: Book recommendations and current reading' + startOffset: 3670 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3670 + endOffset: 3761 +- name: Closing Remarks & Thank You + startOffset: 3761 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3761 + endOffset: 3797 + transcript: - header: Podcast Welcome & AMA Format (community links and live questions) - line: Hi, everyone. Welcome to our event. This event is brought to you by DataTalks.Club, @@ -1073,130 +1191,4 @@ transcript: sec: 3797 time: '1:03:17' who: Johanna -dateadded: '2024-11-08' -duration: PT01H03M17S -quotableClips: -- name: Podcast Welcome & AMA Format (community links and live questions) - startOffset: 0 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=0 - endOffset: 95 -- name: 'Host Intro: Johanna as special host' - startOffset: 95 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=95 - endOffset: 149 -- name: 'Origin Story: Founding DataTalks.Club during COVID' - startOffset: 149 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=149 - endOffset: 232 -- name: 'Career Shift: Transition to running DataTalks.Club full-time' - startOffset: 232 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=232 - endOffset: 246 -- name: 'Financial Decision: Leaving corporate work and early sustainability' - startOffset: 246 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=246 - endOffset: 307 -- name: 'Course Portfolio: Machine Learning, Data Engineering, MLOps, LLMs, Stock - Analytics' - startOffset: 307 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=307 - endOffset: 493 -- name: 'Organic Growth: Word-of-mouth success of Data Engineering Zoomcamp' - startOffset: 493 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=493 - endOffset: 562 -- name: 'Community Safety: Upwork scam awareness and moderation tips' - startOffset: 562 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=562 - endOffset: 724 -- name: 'Mission: Free-to-learn education inspired by Open Data Science' - startOffset: 724 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=724 - endOffset: 987 -- name: 'Community Impact: Student success stories and donations' - startOffset: 987 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=987 - endOffset: 1076 -- name: 'Sponsorship Dynamics: Revenue volatility and runway management' - startOffset: 1076 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1076 - endOffset: 1214 -- name: 'Taxes & Cashflow: Prepaid tax system in Germany' - startOffset: 1214 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1214 - endOffset: 1443 -- name: 'Staying Technical: Pet projects, LLM experiments, and automated storytelling - pipeline' - startOffset: 1443 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1443 - endOffset: 1603 -- name: 'Product Work: Building the course platform in Django to scale courses' - startOffset: 1603 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1603 - endOffset: 1754 -- name: 'LLMs & RAG: From skepticism to launching an LLM course' - startOffset: 1754 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1754 - endOffset: 1910 -- name: 'Life Update: Reflections on full-time community work and no regrets' - startOffset: 1910 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1910 - endOffset: 2020 -- name: 'Early Validation: First event success and finding product-market fit' - startOffset: 2020 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2020 - endOffset: 2197 -- name: 'Community Longevity: Active engagement, investment, and self-organization' - startOffset: 2197 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2197 - endOffset: 2354 -- name: 'AI and Roles: Impact of AutoML/LLMs on data analysts and data scientists' - startOffset: 2354 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2354 - endOffset: 2544 -- name: 'AI in Healthcare: Human touch versus automated assistance' - startOffset: 2544 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2544 - endOffset: 2744 -- name: 'Scaling Challenges: Time investment, loneliness, and rejecting acquisition - offers' - startOffset: 2744 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2744 - endOffset: 2882 -- name: 'Networking Benefits: Masterminds, meetups, and personal connections' - startOffset: 2882 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2882 - endOffset: 2989 -- name: 'Growth Objectives: More sponsors, new courses, and instructor autonomy' - startOffset: 2989 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2989 - endOffset: 3098 -- name: 'How to Help: Be a guest, mentor in Slack, and join Project of the Week' - startOffset: 3098 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3098 - endOffset: 3226 -- name: 'Events Roadmap: Competitions, future hackathons, and ML course contests' - startOffset: 3226 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3226 - endOffset: 3329 -- name: 'Course Schedule: Stock market analytics rerun and upcoming workshops' - startOffset: 3329 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3329 - endOffset: 3401 -- name: 'Podcast Workflow: Guest research, question prep, and interview process' - startOffset: 3401 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3401 - endOffset: 3527 -- name: 'Career Advice: Starting in data science now and junior hiring realities' - startOffset: 3527 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3527 - endOffset: 3670 -- name: 'Personal Reads: Book recommendations and current reading' - startOffset: 3670 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3670 - endOffset: 3761 -- name: Closing Remarks & Thank You - startOffset: 3761 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3761 - endOffset: 3797 --- diff --git a/_podcast/s19e04-mlops-as-team.md b/_podcast/to-update/s19e04-mlops-as-team.md similarity index 94% rename from _podcast/s19e04-mlops-as-team.md rename to _podcast/to-update/s19e04-mlops-as-team.md index 7fab47b4..8b392555 100644 --- a/_podcast/s19e04-mlops-as-team.md +++ b/_podcast/to-update/s19e04-mlops-as-team.md @@ -1,38 +1,130 @@ --- +title: "Context: Raphaël Hoogvliets (Eneco) walks through his journey from agriculture to data science and MLOps, illustrating real-world tradeoffs in design, team structure, tooling, and delivery while sharing concrete practices, stories, and metrics for operationalizing ML. + +Core narrative: MLOps is fundamentally about operationalizing machine learning as sustainable product engineering—building an enabling, platform-led way of working that brings cross-functional teams, pragmatic engineering practices (CI/CD, reproducibility, testing, dependency management), and iterative adoption together so organizations can balance speed versus robustness, build trust with quick wins and measured KPIs, and keep models reliably deployed and delivering business impact." +short: MLOps as a Team +season: 19 episode: 4 guests: - raphaelhoogvliets -description: 'Master MLOps: CI/CD, reproducibility, and delivery strategies to accelerate - ML delivery, boost reliability, improve team efficiency, and measure business impact.' -intro: How do you keep machine learning models deployed, monitored, and maintained - in production? In this episode, Raphaël Hoogvliets from Eneco — whose career journey - spans agriculture to data science and MLOps — tackles that core MLOps challenge. - We trace practical design choices and long‑term trade‑offs between speed and robustness, - and why team coordination, evangelists, tech translators and technical leads matter - when scaling ML.

Key topics include centralized MLOps as an enabling platform - team, support models for product teams and ML engineers, adoption strategies centered - on iteration and developer experience, and tactics for building trust through quick - wins and pain‑point collection. We also cover measurable KPIs like deployment frequency - and impact tracking, core practices such as CI/CD, repo structure, parameterization, - testing, and reproducibility with data versioning and experiment capture. The episode - reviews essential tools—experiment tracking, model registry, serving and monitoring—plus - dependency and container strategies (Docker, Kubernetes, Databricks) and signals - for when to introduce governance.

If you’re responsible for operationalizing - machine learning, this discussion offers concrete guidance on prioritizing CI/CD, - staffing the right skill mix, and choosing tools and processes to keep models reliable - in production. +image: images/podcast/s19e04-mlops-as-team.jpg ids: anchor: atalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr youtube: rMq63r3zi4c -image: images/podcast/s19e04-mlops-as-team.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr apple: https://podcasts.apple.com/us/podcast/mlops-as-a-team-rapha%C3%ABl-hoogvliets/id1541710331?i=1000676238840 spotify: https://open.spotify.com/episode/0Dl372MFGvN0zDa1YQx7oe?si=eCy-a4fkRtOaEe21-KDHXQ youtube: https://youtube.com/watch?v=rMq63r3zi4c -season: 19 -short: MLOps as a Team -title: 'MLOps at Scale: CI/CD, Reproducibility, Model Monitoring & Team Adoption Strategies' + +description: 'Master MLOps: CI/CD, reproducibility, and delivery strategies to accelerate ML delivery, boost reliability, improve team efficiency, and measure business impact.' +intro: How do you keep machine learning models deployed, monitored, and maintained in production? In this episode, Raphaël Hoogvliets from Eneco — whose career journey spans agriculture to data science and MLOps — tackles that core MLOps challenge. We trace practical design choices and long‑term trade‑offs between speed and robustness, and why team coordination, evangelists, tech translators and technical leads matter when scaling ML.

Key topics include centralized MLOps as an enabling platform team, support models for product teams and ML engineers, adoption strategies centered on iteration and developer experience, and tactics for building trust through quick wins and pain‑point collection. We also cover measurable KPIs like deployment frequency and impact tracking, core practices such as CI/CD, repo structure, parameterization, testing, and reproducibility with data versioning and experiment capture. The episode reviews essential tools—experiment tracking, model registry, serving and monitoring—plus dependency and container strategies (Docker, Kubernetes, Databricks) and signals for when to introduce governance.

If you’re responsible for operationalizing machine learning, this discussion offers concrete guidance on prioritizing CI/CD, staffing the right skill mix, and choosing tools and processes to keep models reliable in production +dateadded: 2024-11-16 + +duration: PT01H04M07S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=0 + endOffset: 81 +- name: 'Guest Overview: Raphaël Hoogvliets and Eneco role' + startOffset: 81 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=81 + endOffset: 154 +- name: 'Career Path: From agriculture to data science and MLOps' + startOffset: 154 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=154 + endOffset: 521 +- name: Agriculture technology, scale, and sustainability trade-offs + startOffset: 521 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=521 + endOffset: 636 +- name: Design Choices and Long‑Term Tradeoffs in ML projects + startOffset: 636 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=636 + endOffset: 817 +- name: 'Speed vs. Robustness: trade-offs in MLOps delivery' + startOffset: 817 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=817 + endOffset: 845 +- name: 'Team Coordination: why collaboration matters for ML at scale' + startOffset: 845 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=845 + endOffset: 1018 +- name: 'Key Team Roles: evangelists, tech translators, and technical leads' + startOffset: 1018 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1018 + endOffset: 1381 +- name: Centralized MLOps as an enabling platform team + startOffset: 1381 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1381 + endOffset: 1520 +- name: 'Support Model: how MLOps assists product teams and ML engineers' + startOffset: 1520 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1520 + endOffset: 1676 +- name: 'Adoption Strategy: iteration, feedback loops, and developer experience' + startOffset: 1676 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1676 + endOffset: 1966 +- name: 'Building Trust: collecting pain points and delivering quick wins' + startOffset: 1966 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1966 + endOffset: 2215 +- name: 'Measuring Value: KPIs, deployment frequency, and impact tracking' + startOffset: 2215 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2215 + endOffset: 2346 +- name: 'Core Practices: CI, repo structure, parameterization, and testing' + startOffset: 2346 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2346 + endOffset: 2551 +- name: 'Reproducibility: data versioning, traceability, and experiment capture' + startOffset: 2551 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2551 + endOffset: 2662 +- name: 'Maturity Signals: when to introduce data versioning and governance' + startOffset: 2662 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2662 + endOffset: 2710 +- name: 'Skill Mix: combining data science, SRE/devops, and platform engineering' + startOffset: 2710 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2710 + endOffset: 2921 +- name: 'Getting Started: prioritize CI/CD and solve tangible pain points' + startOffset: 2921 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2921 + endOffset: 3081 +- name: 'MLOps Toolset: experiment tracking, model registry, serving, and monitoring' + startOffset: 3081 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3081 + endOffset: 3188 +- name: 'Dependency Management: package registries for reproducible deployments' + startOffset: 3188 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3188 + endOffset: 3410 +- name: 'Container Strategy: Docker, Kubernetes, Databricks trade-offs' + startOffset: 3410 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3410 + endOffset: 3476 +- name: 'Success & Failure Stories: deployment wins and integration freezes' + startOffset: 3476 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3476 + endOffset: 3654 +- name: 'MLOps Defined: operationalizing machine learning in business' + startOffset: 3654 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3654 + endOffset: 3718 +- name: 'Core Challenge: keeping models deployed, monitored, and maintained' + startOffset: 3718 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3718 + endOffset: 3762 +- name: Closing Remarks and next steps + startOffset: 3762 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3762 + endOffset: 3847 + transcript: - header: Podcast Introduction - line: Hi, everyone! Welcome to our event. This is brought to you by DataTalks.Club, @@ -832,107 +924,4 @@ transcript: sec: 3847 time: '1:04:07' who: Raphaël -dateadded: '2024-11-16' -duration: PT01H04M07S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=0 - endOffset: 81 -- name: 'Guest Overview: Raphaël Hoogvliets and Eneco role' - startOffset: 81 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=81 - endOffset: 154 -- name: 'Career Path: From agriculture to data science and MLOps' - startOffset: 154 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=154 - endOffset: 521 -- name: Agriculture technology, scale, and sustainability trade-offs - startOffset: 521 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=521 - endOffset: 636 -- name: Design Choices and Long‑Term Tradeoffs in ML projects - startOffset: 636 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=636 - endOffset: 817 -- name: 'Speed vs. Robustness: trade-offs in MLOps delivery' - startOffset: 817 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=817 - endOffset: 845 -- name: 'Team Coordination: why collaboration matters for ML at scale' - startOffset: 845 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=845 - endOffset: 1018 -- name: 'Key Team Roles: evangelists, tech translators, and technical leads' - startOffset: 1018 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1018 - endOffset: 1381 -- name: Centralized MLOps as an enabling platform team - startOffset: 1381 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1381 - endOffset: 1520 -- name: 'Support Model: how MLOps assists product teams and ML engineers' - startOffset: 1520 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1520 - endOffset: 1676 -- name: 'Adoption Strategy: iteration, feedback loops, and developer experience' - startOffset: 1676 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1676 - endOffset: 1966 -- name: 'Building Trust: collecting pain points and delivering quick wins' - startOffset: 1966 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1966 - endOffset: 2215 -- name: 'Measuring Value: KPIs, deployment frequency, and impact tracking' - startOffset: 2215 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2215 - endOffset: 2346 -- name: 'Core Practices: CI, repo structure, parameterization, and testing' - startOffset: 2346 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2346 - endOffset: 2551 -- name: 'Reproducibility: data versioning, traceability, and experiment capture' - startOffset: 2551 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2551 - endOffset: 2662 -- name: 'Maturity Signals: when to introduce data versioning and governance' - startOffset: 2662 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2662 - endOffset: 2710 -- name: 'Skill Mix: combining data science, SRE/devops, and platform engineering' - startOffset: 2710 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2710 - endOffset: 2921 -- name: 'Getting Started: prioritize CI/CD and solve tangible pain points' - startOffset: 2921 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2921 - endOffset: 3081 -- name: 'MLOps Toolset: experiment tracking, model registry, serving, and monitoring' - startOffset: 3081 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3081 - endOffset: 3188 -- name: 'Dependency Management: package registries for reproducible deployments' - startOffset: 3188 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3188 - endOffset: 3410 -- name: 'Container Strategy: Docker, Kubernetes, Databricks trade-offs' - startOffset: 3410 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3410 - endOffset: 3476 -- name: 'Success & Failure Stories: deployment wins and integration freezes' - startOffset: 3476 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3476 - endOffset: 3654 -- name: 'MLOps Defined: operationalizing machine learning in business' - startOffset: 3654 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3654 - endOffset: 3718 -- name: 'Core Challenge: keeping models deployed, monitored, and maintained' - startOffset: 3718 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3718 - endOffset: 3762 -- name: Closing Remarks and next steps - startOffset: 3762 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3762 - endOffset: 3847 --- diff --git a/_podcast/s19e05-large-hadron-collider-and-mentorship.md b/_podcast/to-update/s19e05-large-hadron-collider-and-mentorship.md similarity index 95% rename from _podcast/s19e05-large-hadron-collider-and-mentorship.md rename to _podcast/to-update/s19e05-large-hadron-collider-and-mentorship.md index 5062a31f..b7c29e73 100644 --- a/_podcast/s19e05-large-hadron-collider-and-mentorship.md +++ b/_podcast/to-update/s19e05-large-hadron-collider-and-mentorship.md @@ -1,20 +1,138 @@ --- +title: "Context: A physicist’s journey from building and analyzing massive collider experiments to applying those technical, collaborative, and software-engineering skills in industry—culminating in a deliberate turn toward mentoring others through career transitions and leadership challenges. + +Core theme: Experimental physics training—rooted in tackling large-scale data, complex systems, rigorous software and teamwork practices—is a powerful, transferable foundation, and mentorship is the essential bridge that translates that expertise into effective industry roles, career progression, and leadership." +short: Large Hadron Collider and Mentorship +season: 19 episode: 5 guests: - anastasiakaravdina +image: images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg ids: anchor: atalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth youtube: kV0ZDy2UtJA -image: images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth apple: https://podcasts.apple.com/us/podcast/large-hadron-collider-and-mentorship-anastasia-karavdina/id1541710331?i=1000677930293 spotify: https://open.spotify.com/episode/6AZ26Q8O4VBkC9YtUNzhab?si=75154323e14d4dca youtube: https://www.youtube.com/watch?v=kV0ZDy2UtJA -season: 19 -short: Large Hadron Collider and Mentorship -title: 'From Collider Physics to Data Science: Research Software Engineering, Interview - Prep & Mentorship' + +description: Discover research software engineering, data science, and mentorship strategies from a collider physicist - interview prep, CI/CD practices, and career-shift tips +intro: 'How do you pivot from collider physics to industry data science while mastering research software engineering, interview prep, and mentorship? In this episode, Anastasia Karavdina — a particle physicist turned data scientist with experience at Large Hadron Collider experiments, Blue Yonder, and Kaufland e‑commerce — walks through that transition and the concrete skills that made it possible.

We unpack collider physics basics (particle acceleration, detector imaging, event volumes, statistical analysis), roles in large research collaborations, and how hardware development and data analysis intersect. Anastasia explains how research software engineering practices — version control, CI/CD, and reproducible workflows — translate into enterprise machine learning and supply chain AI. She also covers interview challenges (position fit, evolving hiring expectations, behavioral interviews and cultural fit in Germany), how to prepare leadership stories, and practical tactics for moving into ML engineer and data science roles. Finally, she discusses mentoring: motivation, boundaries, paid vs free options, and platforms like MentorCruise.

Listen to gain actionable guidance on translating high‑energy physics expertise into data science, improving technical interview performance, and building effective mentorship relationships.' +dateadded: 2024-12-17 + +duration: PT01H01M22S + +quotableClips: +- name: Episode Opening & Guest Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=0 + endOffset: 106 +- name: Guest Background Snapshot + startOffset: 106 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=106 + endOffset: 242 +- name: 'Origins: From Novokuznetsk, Siberia' + startOffset: 242 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=242 + endOffset: 318 +- name: Relocation & Life in Hamburg + startOffset: 318 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=318 + endOffset: 362 +- name: 'Collider Purpose: Exploring Fundamental Particles' + startOffset: 362 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=362 + endOffset: 450 +- name: Particle Acceleration & Detector Imaging + startOffset: 450 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=450 + endOffset: 575 +- name: Collider Magnets & Data Capture Scale + startOffset: 575 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=575 + endOffset: 675 +- name: Event Volume & Statistical Analysis in Particle Physics + startOffset: 675 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=675 + endOffset: 760 +- name: Roles & Specializations in Large Research Collaborations + startOffset: 760 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=760 + endOffset: 993 +- name: 'Dual Roles: Hardware Development and Data Analysis' + startOffset: 993 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=993 + endOffset: 1066 +- name: Scientific Goals & Safety Myths (Higgs, Dark Matter, Black Holes) + startOffset: 1066 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1066 + endOffset: 1235 +- name: Translating Research Skills into Industry Data Science + startOffset: 1235 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1235 + endOffset: 1420 +- name: 'Research Software Engineering: Version Control & CI/CD Practices' + startOffset: 1420 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1420 + endOffset: 1471 +- name: 'Jargon Translation: Multivariate Analysis to Machine Learning' + startOffset: 1471 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1471 + endOffset: 1590 +- name: 'Interview Challenges: Position Fit & Evolving Hiring Expectations' + startOffset: 1590 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1590 + endOffset: 1680 +- name: 'Blue Yonder & Supply Chain AI: From Physics to Enterprise ML' + startOffset: 1680 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1680 + endOffset: 1765 +- name: 'Career Shift: From Industry Roles to Mentoring Focus' + startOffset: 1765 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1765 + endOffset: 2043 +- name: Behavioral Interview Strategies & Cultural Fit in Germany + startOffset: 2043 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2043 + endOffset: 2273 +- name: Preparing Stories & Practicing Leadership Principles + startOffset: 2273 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2273 + endOffset: 2486 +- name: 'Becoming a Mentor: Motivation, Rewards & Burnout' + startOffset: 2486 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2486 + endOffset: 2650 +- name: 'Mentoring Origins: Accidental Start & Finding the Right Fit' + startOffset: 2650 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2650 + endOffset: 2711 +- name: 'Mentoring Defined: Process, Goals & Time Commitment' + startOffset: 2711 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2711 + endOffset: 2965 +- name: 'Approaching Mentors: Clear Goals & Manageable Requests' + startOffset: 2965 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2965 + endOffset: 3052 +- name: 'Paid Mentorship vs. Free Communities: Pros & Cons' + startOffset: 3052 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3052 + endOffset: 3344 +- name: 'Mentorship as Career Leverage: Promotions & Leadership Experience' + startOffset: 3344 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3344 + endOffset: 3582 +- name: Finding Mentoring Communities & Platforms (MentorCruise, others) + startOffset: 3582 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3582 + endOffset: 3652 +- name: Episode Wrap-up & Key Takeaways + startOffset: 3652 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3652 + endOffset: 3682 + transcript: - header: Episode Opening & Guest Introduction - line: This week, we’ll talk about your career. Specifically, your transition from @@ -896,137 +1014,6 @@ transcript: sec: 3682 time: '1:01:22' who: Alexey -description: Discover research software engineering, data science, and mentorship - strategies from a collider physicist - interview prep, CI/CD practices, and career-shift - tips. -intro: 'How do you pivot from collider physics to industry data science while mastering - research software engineering, interview prep, and mentorship? In this episode, Anastasia - Karavdina — a particle physicist turned data scientist with experience at Large - Hadron Collider experiments, Blue Yonder, and Kaufland e‑commerce — walks through - that transition and the concrete skills that made it possible.

We unpack - collider physics basics (particle acceleration, detector imaging, event volumes, - statistical analysis), roles in large research collaborations, and how hardware - development and data analysis intersect. Anastasia explains how research software - engineering practices — version control, CI/CD, and reproducible workflows — translate - into enterprise machine learning and supply chain AI. She also covers interview - challenges (position fit, evolving hiring expectations, behavioral interviews and - cultural fit in Germany), how to prepare leadership stories, and practical tactics - for moving into ML engineer and data science roles. Finally, she discusses mentoring: - motivation, boundaries, paid vs free options, and platforms like MentorCruise.

- Listen to gain actionable guidance on translating high‑energy physics expertise - into data science, improving technical interview performance, and building effective - mentorship relationships.' -dateadded: '2024-12-17' -duration: PT01H01M22S -quotableClips: -- name: Episode Opening & Guest Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=0 - endOffset: 106 -- name: Guest Background Snapshot - startOffset: 106 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=106 - endOffset: 242 -- name: 'Origins: From Novokuznetsk, Siberia' - startOffset: 242 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=242 - endOffset: 318 -- name: Relocation & Life in Hamburg - startOffset: 318 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=318 - endOffset: 362 -- name: 'Collider Purpose: Exploring Fundamental Particles' - startOffset: 362 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=362 - endOffset: 450 -- name: Particle Acceleration & Detector Imaging - startOffset: 450 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=450 - endOffset: 575 -- name: Collider Magnets & Data Capture Scale - startOffset: 575 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=575 - endOffset: 675 -- name: Event Volume & Statistical Analysis in Particle Physics - startOffset: 675 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=675 - endOffset: 760 -- name: Roles & Specializations in Large Research Collaborations - startOffset: 760 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=760 - endOffset: 993 -- name: 'Dual Roles: Hardware Development and Data Analysis' - startOffset: 993 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=993 - endOffset: 1066 -- name: Scientific Goals & Safety Myths (Higgs, Dark Matter, Black Holes) - startOffset: 1066 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1066 - endOffset: 1235 -- name: Translating Research Skills into Industry Data Science - startOffset: 1235 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1235 - endOffset: 1420 -- name: 'Research Software Engineering: Version Control & CI/CD Practices' - startOffset: 1420 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1420 - endOffset: 1471 -- name: 'Jargon Translation: Multivariate Analysis to Machine Learning' - startOffset: 1471 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1471 - endOffset: 1590 -- name: 'Interview Challenges: Position Fit & Evolving Hiring Expectations' - startOffset: 1590 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1590 - endOffset: 1680 -- name: 'Blue Yonder & Supply Chain AI: From Physics to Enterprise ML' - startOffset: 1680 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1680 - endOffset: 1765 -- name: 'Career Shift: From Industry Roles to Mentoring Focus' - startOffset: 1765 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1765 - endOffset: 2043 -- name: Behavioral Interview Strategies & Cultural Fit in Germany - startOffset: 2043 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2043 - endOffset: 2273 -- name: Preparing Stories & Practicing Leadership Principles - startOffset: 2273 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2273 - endOffset: 2486 -- name: 'Becoming a Mentor: Motivation, Rewards & Burnout' - startOffset: 2486 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2486 - endOffset: 2650 -- name: 'Mentoring Origins: Accidental Start & Finding the Right Fit' - startOffset: 2650 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2650 - endOffset: 2711 -- name: 'Mentoring Defined: Process, Goals & Time Commitment' - startOffset: 2711 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2711 - endOffset: 2965 -- name: 'Approaching Mentors: Clear Goals & Manageable Requests' - startOffset: 2965 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2965 - endOffset: 3052 -- name: 'Paid Mentorship vs. Free Communities: Pros & Cons' - startOffset: 3052 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3052 - endOffset: 3344 -- name: 'Mentorship as Career Leverage: Promotions & Leadership Experience' - startOffset: 3344 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3344 - endOffset: 3582 -- name: Finding Mentoring Communities & Platforms (MentorCruise, others) - startOffset: 3582 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3582 - endOffset: 3652 -- name: Episode Wrap-up & Key Takeaways - startOffset: 3652 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3652 - endOffset: 3682 --- Links: diff --git a/_podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.md b/_podcast/to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md similarity index 94% rename from _podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.md rename to _podcast/to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md index 3aed7042..9bbb9b89 100644 --- a/_podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.md +++ b/_podcast/to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md @@ -1,38 +1,119 @@ --- +title: "Context: Across a career-spanning conversation about linguistics, industry AI roles, chatbot hacks, safety failures, mitigations, human-in-the-loop workflows, translation and ancient-language challenges, and industry trade-offs, the episode maps how generative AI is rapidly democratized yet brittle, risky, and dependent on data and linguistic nuance. + +Core: The central imperative is that realizing the real-world promise of generative AI requires marrying deep linguistic and domain expertise with layered technical defenses, human oversight, and pragmatic product trade-offs—so systems can be safe, trustworthy, and useful despite hallucinations, manipulation, data-quality limits, and operational constraints." +short: 'AI in Industry: Trust, Return on Investment and Future' +season: 19 episode: 6 guests: - mariasukhareva -description: Discover generative AI, chatbot safety, and prompting strategies to prevent - hallucinations and data exfiltration, boost translation quality and ROI. -intro: How do we balance the rapid democratization of generative AI with real-world - chatbot safety, trust, and operational value? In this episode, we speak with a linguist-turned-computational-linguist - who now serves as a principal key expert in AI advising on technology and risk. - We trace their career path into industry and then dig into the practical security - and reliability challenges of large-scale chatbots.

Topics include the - rise of prompt engineering and new “AI experts,” a large-scale chatbot hacking exercise - and its findings on hallucinations, legal exposure, and financial incidents, and - data exfiltration methods like overloaded prompts and knowledge-base retrieval. - The conversation moves to concrete mitigations—output validation, query analysis, - layered defenses, and non-LLM classifiers—plus usability and ROI issues that slow - adoption. We also cover human-in-the-loop review, AI-assisted translation workflows, - prompt customization for controlled machine translation, and broader multilingual - and historical-linguistics challenges (from orthography to low-resource languages). -

Listen to learn actionable strategies for chatbot safety, practical prompt - and translation techniques, and how to evaluate trade-offs between research innovation - and operational risk. +image: images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg ids: anchor: atalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 youtube: bT7-HRNCltk -image: images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 apple: https://podcasts.apple.com/us/podcast/ai-in-industry-trust-return-on-investment-and-future/id1541710331?i=1000679505962 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=a99463e34ffb48f1 youtube: https://www.youtube.com/watch?v=bT7-HRNCltk -season: 19 -short: 'AI in Industry: Trust, Return on Investment and Future' -title: 'Hardening Generative AI Chatbots: Prevent Prompt Injection, Data Exfiltration - & Hallucinations' + +description: Discover generative AI, chatbot safety, and prompting strategies to prevent hallucinations and data exfiltration, boost translation quality and ROI +intro: How do we balance the rapid democratization of generative AI with real-world chatbot safety, trust, and operational value? In this episode, we speak with a linguist-turned-computational-linguist who now serves as a principal key expert in AI advising on technology and risk. We trace their career path into industry and then dig into the practical security and reliability challenges of large-scale chatbots.

Topics include the rise of prompt engineering and new “AI experts,” a large-scale chatbot hacking exercise and its findings on hallucinations, legal exposure, and financial incidents, and data exfiltration methods like overloaded prompts and knowledge-base retrieval. The conversation moves to concrete mitigations—output validation, query analysis, layered defenses, and non-LLM classifiers—plus usability and ROI issues that slow adoption. We also cover human-in-the-loop review, AI-assisted translation workflows, prompt customization for controlled machine translation, and broader multilingual and historical-linguistics challenges (from orthography to low-resource languages).

Listen to learn actionable strategies for chatbot safety, practical prompt and translation techniques, and how to evaluate trade-offs between research innovation and operational risk +dateadded: 2024-12-17 + +duration: PT00H59M53S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=0 + endOffset: 133 +- name: 'Career Path: From Linguist to Computational Linguistics and Industry' + startOffset: 133 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=133 + endOffset: 251 +- name: 'Role Definition: Principal Key Expert in AI — Advising on Technology and + Risk' + startOffset: 251 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=251 + endOffset: 342 +- name: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts"' + startOffset: 342 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=342 + endOffset: 568 +- name: 'Bot Safety Challenge: Large-Scale Chatbot Hacking Exercise and Findings' + startOffset: 568 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=568 + endOffset: 698 +- name: 'Chatbot Failures: Hallucinations, Legal Exposure, and Financial Incidents' + startOffset: 698 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=698 + endOffset: 800 +- name: 'Data Exfiltration Techniques: Overloading Prompts and Knowledge-Base Retrieval' + startOffset: 800 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=800 + endOffset: 975 +- name: 'Mitigations: Output Validation, Query Analysis, and Layered Defenses' + startOffset: 975 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=975 + endOffset: 1020 +- name: 'Non-LLM Classifiers: Robust Alternatives to Manipulable Generative Models' + startOffset: 1020 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1020 + endOffset: 1081 +- name: 'Trust and Hallucinations: User Confidence, Safety, and Adoption Risks' + startOffset: 1081 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1081 + endOffset: 1239 +- name: 'Chatbot Adoption Issues: Usability, Verbosity, and Return on Investment' + startOffset: 1239 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1239 + endOffset: 1534 +- name: 'Human-in-the-Loop Solutions: Hybrid Review to Improve Accuracy' + startOffset: 1534 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1534 + endOffset: 1633 +- name: 'AI as Assistant: Moderation Tools, Autopilot Analogy, and Workforce Impact' + startOffset: 1633 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1633 + endOffset: 1793 +- name: 'Translation Workflows: AI-Augmented Translators and Quality Control' + startOffset: 1793 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1793 + endOffset: 1948 +- name: 'Prompt Customization: Controlled Machine Translation with ChatGPT' + startOffset: 1948 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1948 + endOffset: 2144 +- name: 'Historical Linguistics: Middle & Old English Pronunciation Insights' + startOffset: 2144 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2144 + endOffset: 2708 +- name: 'Ancient Languages: Cuneiform, Sumerian Transcription, and MT Approaches' + startOffset: 2708 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2708 + endOffset: 2906 +- name: 'Script Complexity: Logograms vs. Phonetics in Ancient Texts' + startOffset: 2906 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2906 + endOffset: 3181 +- name: 'Multilingual Models: Progress and Challenges for Low-Resource Languages' + startOffset: 3181 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3181 + endOffset: 3412 +- name: 'Orthography & Data Quality: Inconsistent Spelling in Historical Corpora' + startOffset: 3412 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3412 + endOffset: 3448 +- name: 'Industry Trade-offs: Research Innovation vs. ROI and Operational Needs' + startOffset: 3448 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3448 + endOffset: 3554 +- name: 'Episode Wrap-Up: Key Takeaways on AI Trust, Safety, and Future Directions' + startOffset: 3554 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3554 + endOffset: 3593 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we’re discussing the practical application of generative AI in @@ -811,96 +892,4 @@ transcript: sec: 3593 time: '59:53' who: Alexey -dateadded: '2024-12-17' -duration: PT00H59M53S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=0 - endOffset: 133 -- name: 'Career Path: From Linguist to Computational Linguistics and Industry' - startOffset: 133 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=133 - endOffset: 251 -- name: 'Role Definition: Principal Key Expert in AI — Advising on Technology and - Risk' - startOffset: 251 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=251 - endOffset: 342 -- name: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts"' - startOffset: 342 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=342 - endOffset: 568 -- name: 'Bot Safety Challenge: Large-Scale Chatbot Hacking Exercise and Findings' - startOffset: 568 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=568 - endOffset: 698 -- name: 'Chatbot Failures: Hallucinations, Legal Exposure, and Financial Incidents' - startOffset: 698 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=698 - endOffset: 800 -- name: 'Data Exfiltration Techniques: Overloading Prompts and Knowledge-Base Retrieval' - startOffset: 800 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=800 - endOffset: 975 -- name: 'Mitigations: Output Validation, Query Analysis, and Layered Defenses' - startOffset: 975 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=975 - endOffset: 1020 -- name: 'Non-LLM Classifiers: Robust Alternatives to Manipulable Generative Models' - startOffset: 1020 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1020 - endOffset: 1081 -- name: 'Trust and Hallucinations: User Confidence, Safety, and Adoption Risks' - startOffset: 1081 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1081 - endOffset: 1239 -- name: 'Chatbot Adoption Issues: Usability, Verbosity, and Return on Investment' - startOffset: 1239 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1239 - endOffset: 1534 -- name: 'Human-in-the-Loop Solutions: Hybrid Review to Improve Accuracy' - startOffset: 1534 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1534 - endOffset: 1633 -- name: 'AI as Assistant: Moderation Tools, Autopilot Analogy, and Workforce Impact' - startOffset: 1633 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1633 - endOffset: 1793 -- name: 'Translation Workflows: AI-Augmented Translators and Quality Control' - startOffset: 1793 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1793 - endOffset: 1948 -- name: 'Prompt Customization: Controlled Machine Translation with ChatGPT' - startOffset: 1948 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1948 - endOffset: 2144 -- name: 'Historical Linguistics: Middle & Old English Pronunciation Insights' - startOffset: 2144 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2144 - endOffset: 2708 -- name: 'Ancient Languages: Cuneiform, Sumerian Transcription, and MT Approaches' - startOffset: 2708 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2708 - endOffset: 2906 -- name: 'Script Complexity: Logograms vs. Phonetics in Ancient Texts' - startOffset: 2906 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2906 - endOffset: 3181 -- name: 'Multilingual Models: Progress and Challenges for Low-Resource Languages' - startOffset: 3181 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3181 - endOffset: 3412 -- name: 'Orthography & Data Quality: Inconsistent Spelling in Historical Corpora' - startOffset: 3412 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3412 - endOffset: 3448 -- name: 'Industry Trade-offs: Research Innovation vs. ROI and Operational Needs' - startOffset: 3448 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3448 - endOffset: 3554 -- name: 'Episode Wrap-Up: Key Takeaways on AI Trust, Safety, and Future Directions' - startOffset: 3554 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3554 - endOffset: 3593 --- diff --git a/_podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md b/_podcast/to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md similarity index 95% rename from _podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md rename to _podcast/to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md index 3d6a4ceb..ca8c2948 100644 --- a/_podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md +++ b/_podcast/to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md @@ -1,20 +1,127 @@ --- +title: "Context: Isabella Bicalho’s episode traces a career arc from biology to machine learning—through internships (INRIA), freelancing, open-source contributions, teaching, and community engagement—illustrating practical projects, networking, and pedagogical work as the vehicles for growth. + +Core: The unifying idea is that continuous, community‑centered, project‑based learning—combining hands‑on applied work, open‑source contribution, mentorship, clear communication, and judicious use of AI tools—serves as the most effective pathway to build job‑ready skills, bridge disciplines, and create real-world impact in data science and ML." +short: Career advice, learning, and featuring women in ML and AI +season: 19 episode: 7 guests: - isabellabicalho +image: images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg ids: anchor: atalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura youtube: GifY8Zn-pnU -image: images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura apple: https://podcasts.apple.com/us/podcast/career-advice-learning-and-featuring-women-in-ml-and/id1541710331?i=1000680294201 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=rx69Xf98QZqGqgpEQgzX2w youtube: https://www.youtube.com/watch?v=GifY8Zn-pnU -season: 19 -short: Career advice, learning, and featuring women in ML and AI -title: 'From Biology to ML: Build a Data Science Portfolio with Open-Source, Computer - Vision & Transformers' + +description: Build a data science portfolio with open-source computer vision projects, gain real job-ready experience, networking tactics and freelance tips +intro: 'How do you pivot from biology into machine learning and build a job-ready data science portfolio using open-source, computer vision and transformers? In this episode Isabella Bicalho — a Machine Learning Engineer and Data Scientist with three years of hands-on AI development and prior computational research — walks through her path from Biology (University of Maranhão, University of Marseille) to ML, including an INRIA internship on biomarkers and immunotherapy prediction.

We cover practical steps for portfolio building: using open-source contributions and community courses (Hugging Face) to get experience, real project examples like green space segmentation with Sentinel-2 and the trade-offs between CNNs and transformers, and applied freelance work such as recommendation systems and knowledge graph automation. Isabella also explains how statistics became her gateway to transformers, how to find low-barrier open-source projects (docs, data, applied code), and how collaboration builds soft skills recruiters value.

Listen to learn concrete strategies for creating a data science portfolio, where to find computer vision and transformer projects, how to leverage community and mentorship, and how to communicate your work to land roles in machine learning.' +dateadded: 2024-12-17 + +duration: PT01H03M42S + +quotableClips: +- name: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella + Bicalho)' + startOffset: 0 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=0 + endOffset: 421 +- name: 'Career Overview: Transition from Biology to Machine Learning' + startOffset: 421 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=421 + endOffset: 509 +- name: Statistics as Gateway to Machine Learning; Progression to Transformers + startOffset: 509 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=509 + endOffset: 554 +- name: 'Education: University of Maranhão and University of Marseille' + startOffset: 554 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=554 + endOffset: 674 +- name: 'INRIA Internship: Biomarkers and Immunotherapy Prediction' + startOffset: 674 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=674 + endOffset: 865 +- name: INRIA's Role in AI Research and France's AI Ecosystem + startOffset: 865 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=865 + endOffset: 955 +- name: 'Freelance Work: Recommendation System & Knowledge Graph Automation' + startOffset: 955 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=955 + endOffset: 1132 +- name: 'Career Pivot: Choosing Engineering Over a PhD' + startOffset: 1132 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1132 + endOffset: 1342 +- name: 'First Freelance Client: CV Visibility and Networking' + startOffset: 1342 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1342 + endOffset: 1419 +- name: Leveraging Open-Source & AI for Good to Gain Experience + startOffset: 1419 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1419 + endOffset: 1590 +- name: 'Hugging Face Community Course: Computer Vision Contributions & Review' + startOffset: 1590 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1590 + endOffset: 1917 +- name: 'Teaching & Communication: Simplifying ML Jargon for Learners' + startOffset: 1917 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1917 + endOffset: 2081 +- name: 'Finding Open-Source Opportunities: Communities, Docs, and Local Chapters' + startOffset: 2081 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2081 + endOffset: 2246 +- name: 'Types of Open-Source Projects: Code, Data, and Applied Solutions' + startOffset: 2246 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2246 + endOffset: 2412 +- name: 'Green Space Segmentation: Sentinel-2, CNNs vs Transformers, Practicality' + startOffset: 2412 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2412 + endOffset: 2544 +- name: Project Work as Job-Ready Experience and Portfolio Building + startOffset: 2544 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2544 + endOffset: 2608 +- name: 'Soft Skills from Collaboration: Communication and Prioritization' + startOffset: 2608 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2608 + endOffset: 2734 +- name: 'Informational Networking: Reaching Out for Role Insights' + startOffset: 2734 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2734 + endOffset: 2995 +- name: 'Onboarding New Contributors: Low Entry Barriers and Mentorship' + startOffset: 2995 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2995 + endOffset: 3094 +- name: 'AI Assistants in Learning: Benefits and Limitations of ChatGPT' + startOffset: 3094 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3094 + endOffset: 3239 +- name: 'Data Like Substack: Spotlighting Women in Data and ML' + startOffset: 3239 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3239 + endOffset: 3447 +- name: 'Featured Interviews: Bioinformatics, Fake News Detection, AI Ethics' + startOffset: 3447 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3447 + endOffset: 3736 +- name: 'Connecting with Isabella: LinkedIn and Substack Contact Info' + startOffset: 3736 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3736 + endOffset: 3822 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3822 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3822 + endOffset: 3822 + transcript: - header: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella Bicalho)' @@ -965,124 +1072,6 @@ transcript: sec: 3822 time: '1:03:42' who: Alexey -description: Build a data science portfolio with open-source computer vision projects, - gain real job-ready experience, networking tactics and freelance tips. -intro: 'How do you pivot from biology into machine learning and build a job-ready - data science portfolio using open-source, computer vision and transformers? In this - episode Isabella Bicalho — a Machine Learning Engineer and Data Scientist with three - years of hands-on AI development and prior computational research — walks through - her path from Biology (University of Maranhão, University of Marseille) to ML, including - an INRIA internship on biomarkers and immunotherapy prediction.

We cover - practical steps for portfolio building: using open-source contributions and community - courses (Hugging Face) to get experience, real project examples like green space - segmentation with Sentinel-2 and the trade-offs between CNNs and transformers, and - applied freelance work such as recommendation systems and knowledge graph automation. - Isabella also explains how statistics became her gateway to transformers, how to - find low-barrier open-source projects (docs, data, applied code), and how collaboration - builds soft skills recruiters value.

Listen to learn concrete strategies - for creating a data science portfolio, where to find computer vision and transformer - projects, how to leverage community and mentorship, and how to communicate your - work to land roles in machine learning.' -dateadded: '2024-12-17' -duration: PT01H03M42S -quotableClips: -- name: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella - Bicalho)' - startOffset: 0 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=0 - endOffset: 421 -- name: 'Career Overview: Transition from Biology to Machine Learning' - startOffset: 421 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=421 - endOffset: 509 -- name: Statistics as Gateway to Machine Learning; Progression to Transformers - startOffset: 509 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=509 - endOffset: 554 -- name: 'Education: University of Maranhão and University of Marseille' - startOffset: 554 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=554 - endOffset: 674 -- name: 'INRIA Internship: Biomarkers and Immunotherapy Prediction' - startOffset: 674 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=674 - endOffset: 865 -- name: INRIA's Role in AI Research and France's AI Ecosystem - startOffset: 865 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=865 - endOffset: 955 -- name: 'Freelance Work: Recommendation System & Knowledge Graph Automation' - startOffset: 955 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=955 - endOffset: 1132 -- name: 'Career Pivot: Choosing Engineering Over a PhD' - startOffset: 1132 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1132 - endOffset: 1342 -- name: 'First Freelance Client: CV Visibility and Networking' - startOffset: 1342 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1342 - endOffset: 1419 -- name: Leveraging Open-Source & AI for Good to Gain Experience - startOffset: 1419 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1419 - endOffset: 1590 -- name: 'Hugging Face Community Course: Computer Vision Contributions & Review' - startOffset: 1590 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1590 - endOffset: 1917 -- name: 'Teaching & Communication: Simplifying ML Jargon for Learners' - startOffset: 1917 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1917 - endOffset: 2081 -- name: 'Finding Open-Source Opportunities: Communities, Docs, and Local Chapters' - startOffset: 2081 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2081 - endOffset: 2246 -- name: 'Types of Open-Source Projects: Code, Data, and Applied Solutions' - startOffset: 2246 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2246 - endOffset: 2412 -- name: 'Green Space Segmentation: Sentinel-2, CNNs vs Transformers, Practicality' - startOffset: 2412 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2412 - endOffset: 2544 -- name: Project Work as Job-Ready Experience and Portfolio Building - startOffset: 2544 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2544 - endOffset: 2608 -- name: 'Soft Skills from Collaboration: Communication and Prioritization' - startOffset: 2608 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2608 - endOffset: 2734 -- name: 'Informational Networking: Reaching Out for Role Insights' - startOffset: 2734 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2734 - endOffset: 2995 -- name: 'Onboarding New Contributors: Low Entry Barriers and Mentorship' - startOffset: 2995 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2995 - endOffset: 3094 -- name: 'AI Assistants in Learning: Benefits and Limitations of ChatGPT' - startOffset: 3094 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3094 - endOffset: 3239 -- name: 'Data Like Substack: Spotlighting Women in Data and ML' - startOffset: 3239 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3239 - endOffset: 3447 -- name: 'Featured Interviews: Bioinformatics, Fake News Detection, AI Ethics' - startOffset: 3447 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3447 - endOffset: 3736 -- name: 'Connecting with Isabella: LinkedIn and Substack Contact Info' - startOffset: 3736 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3736 - endOffset: 3822 -- name: Episode Wrap-Up and Closing Remarks - startOffset: 3822 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3822 - endOffset: 3822 --- Links: diff --git a/_podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md b/_podcast/to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md similarity index 95% rename from _podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md rename to _podcast/to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md index 17381446..77cfbd84 100644 --- a/_podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md +++ b/_podcast/to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md @@ -1,19 +1,148 @@ --- +title: "Context: The episode traces a journey from hands-on technical beginnings (trade school, web design, C++, DevOps, automation) through burnout and volunteering, into community management, open source, NGO founding, career coaching, and a lifestyle experiment (off-grid living), while exploring personality, team fit, product focus, and practical processes applied across corporate and nonprofit settings. + +Core: This episode centers on intentionally aligning technical skills, systems-thinking problem solving, and personal values—leveraging automation, community, and experimentation—to design a sustainable, impact-driven career and life that bridges corporate, volunteer, and personal worlds. + +Key themes: transferable problem-solving and automation; values-driven career design; community & open source as leverage; translating corporate processes to NGOs; personality and team-fit for role choice; experimentation in lifestyle and governance." +short: Career choices, transitions and promotions in and out of tech +season: 19 episode: 8 guests: - agitajaunzeme +image: images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg ids: anchor: atalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv youtube: QKWu5-6_6TE -image: images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv apple: https://podcasts.apple.com/us/podcast/career-choices-transitions-and-promotions-in-and-out/id1541710331?i=1000683499310 spotify: https://open.spotify.com/episode/0UW7fLgm9fqMG64GQwvgIN?si=ZixbzDcZT2mNkVrJjZVbeA youtube: https://www.youtube.com/watch?v=QKWu5-6_6TE -season: 19 -short: Career choices, transitions and promotions in and out of tech -title: 'DevOps to Data Engineering: Automation, Open Source & Volunteering' + +description: 'Learn DevOps-to-Data-Engineering career tactics: automation, open source & volunteering to build skills, earn rapid promotions, and lead projects.' +intro: How do you move from DevOps into data engineering while using automation, open source contributions, and volunteering to shape your career? In this episode, Agita Jaunzeme — a DevOps/DataOps engineer, community manager, educator and NGO founder focused on inclusion in Porto — walks through that exact path.

We trace her journey from trade school and early programming to configuration management and rapid promotion through scripting repetitive tasks, then into burnout, Erasmus+ volunteering, and community work at VMware. Key topics include automation case studies, building and contributing to open source (Versatile Data Kit), applying corporate processes and agile documentation to NGOs, volunteer management versus employment, spotting volunteer-to-career opportunities, and the practical differences between data scientists and data engineers. We also cover community management, career coaching, founding an NGO, meetup activities, and even an off-grid living experiment.

Listen for actionable guidance on automation best practices, how open source community work can reopen corporate doors, designing volunteer processes, and aligning technical career moves with personal values — practical takeaways for anyone navigating a career pivot into data engineering or community-driven tech work +dateadded: 2025-01-12 + +duration: PT01H01M46S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=0 + endOffset: 156 +- name: Guest Welcome & Interview Agenda + startOffset: 156 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=156 + endOffset: 182 +- name: 'Career Beginnings: Trade School, Web Design, First Programming' + startOffset: 182 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=182 + endOffset: 255 +- name: 'Education & Bootcamp: Computer Science, Accenture, C++' + startOffset: 255 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=255 + endOffset: 322 +- name: 'Transition to DevOps: Configuration Management & Early Automation' + startOffset: 322 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=322 + endOffset: 377 +- name: 'Burnout and Self-Discovery: Travel and Volunteerism' + startOffset: 377 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=377 + endOffset: 456 +- name: 'Erasmus+ Volunteering: Programs, Exchanges, and Training' + startOffset: 456 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=456 + endOffset: 560 +- name: Community Management at VMware & Versatile Data Kit (Open Source) + startOffset: 560 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=560 + endOffset: 717 +- name: 'Testing Passions: Flow, Energy, and Career Fit' + startOffset: 717 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=717 + endOffset: 869 +- name: 'Automation Case Study: Scripting Repetitive Tasks and Rapid Promotion' + startOffset: 869 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=869 + endOffset: 1156 +- name: 'Problem-Solving as a Core Skill: Transferable Technical Competencies' + startOffset: 1156 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1156 + endOffset: 1263 +- name: 'Applying Corporate Processes to NGOs: Documentation & Agile Practices' + startOffset: 1263 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1263 + endOffset: 1435 +- name: 'Volunteer Management vs. Employment: Motivation and Process Design' + startOffset: 1435 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1435 + endOffset: 1507 +- name: 'Spotting Opportunities: Transitioning into Volunteer Roles' + startOffset: 1507 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1507 + endOffset: 1610 +- name: Personality Types & Team Composition (MBTI relevance) + startOffset: 1610 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1610 + endOffset: 1793 +- name: 'Personality Traits for Data Engineering: Precision, Persistence, Detail' + startOffset: 1793 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1793 + endOffset: 2092 +- name: 'Data Scientist vs. Data Engineer: Interests and Role Differences' + startOffset: 2092 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2092 + endOffset: 2185 +- name: 'Returning to Corporate via Open Source: Community + Technical Work' + startOffset: 2185 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2185 + endOffset: 2285 +- name: 'Community Manager Role: Content, DevRel Overlap, and Events' + startOffset: 2285 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2285 + endOffset: 2423 +- name: 'Product Focus: Simplifying Vision and Goal-Setting' + startOffset: 2423 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2423 + endOffset: 2584 +- name: 'Career Coaching Course: "Align Your Career With Who You Are"' + startOffset: 2584 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2584 + endOffset: 2704 +- name: 'Founding an NGO: Legal Setup, Governance, and Launch' + startOffset: 2704 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2704 + endOffset: 2812 +- name: 'Inclusion in Porto: Connecting Expats and Locals' + startOffset: 2812 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2812 + endOffset: 2952 +- name: 'Meetup Activities: MBTI, Improv, and Community Events' + startOffset: 2952 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2952 + endOffset: 3131 +- name: 'Off-Grid Living Experiment: Land, Dome, and Lifestyle Shift' + startOffset: 3131 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3131 + endOffset: 3207 +- name: 'Off-Grid Utilities: Solar Power, Rainwater, and Well Plans' + startOffset: 3207 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3207 + endOffset: 3352 +- name: 'Cost Comparison: Off-Grid Life vs. Porto Renting' + startOffset: 3352 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3352 + endOffset: 3407 +- name: 'Unemployment Party: Mastermind Brainstorming Technique' + startOffset: 3407 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3407 + endOffset: 3657 +- name: Episode Closing & Final Remarks + startOffset: 3657 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3657 + endOffset: 3706 + transcript: - header: Podcast Introduction - line: We have a special guest today, Agita. She has done a lot in her career, including @@ -1034,143 +1163,6 @@ transcript: sec: 3706 time: '1:01:46' who: Agita -description: 'Learn DevOps-to-Data-Engineering career tactics: automation, open source - & volunteering to build skills, earn rapid promotions, and lead projects.' -intro: How do you move from DevOps into data engineering while using automation, open - source contributions, and volunteering to shape your career? In this episode, Agita - Jaunzeme — a DevOps/DataOps engineer, community manager, educator and NGO founder - focused on inclusion in Porto — walks through that exact path.

We trace - her journey from trade school and early programming to configuration management - and rapid promotion through scripting repetitive tasks, then into burnout, Erasmus+ - volunteering, and community work at VMware. Key topics include automation case studies, - building and contributing to open source (Versatile Data Kit), applying corporate - processes and agile documentation to NGOs, volunteer management versus employment, - spotting volunteer-to-career opportunities, and the practical differences between - data scientists and data engineers. We also cover community management, career coaching, - founding an NGO, meetup activities, and even an off-grid living experiment.

- Listen for actionable guidance on automation best practices, how open source community - work can reopen corporate doors, designing volunteer processes, and aligning technical - career moves with personal values — practical takeaways for anyone navigating a - career pivot into data engineering or community-driven tech work. -dateadded: '2025-01-12' -duration: PT01H01M46S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=0 - endOffset: 156 -- name: Guest Welcome & Interview Agenda - startOffset: 156 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=156 - endOffset: 182 -- name: 'Career Beginnings: Trade School, Web Design, First Programming' - startOffset: 182 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=182 - endOffset: 255 -- name: 'Education & Bootcamp: Computer Science, Accenture, C++' - startOffset: 255 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=255 - endOffset: 322 -- name: 'Transition to DevOps: Configuration Management & Early Automation' - startOffset: 322 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=322 - endOffset: 377 -- name: 'Burnout and Self-Discovery: Travel and Volunteerism' - startOffset: 377 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=377 - endOffset: 456 -- name: 'Erasmus+ Volunteering: Programs, Exchanges, and Training' - startOffset: 456 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=456 - endOffset: 560 -- name: Community Management at VMware & Versatile Data Kit (Open Source) - startOffset: 560 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=560 - endOffset: 717 -- name: 'Testing Passions: Flow, Energy, and Career Fit' - startOffset: 717 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=717 - endOffset: 869 -- name: 'Automation Case Study: Scripting Repetitive Tasks and Rapid Promotion' - startOffset: 869 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=869 - endOffset: 1156 -- name: 'Problem-Solving as a Core Skill: Transferable Technical Competencies' - startOffset: 1156 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1156 - endOffset: 1263 -- name: 'Applying Corporate Processes to NGOs: Documentation & Agile Practices' - startOffset: 1263 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1263 - endOffset: 1435 -- name: 'Volunteer Management vs. Employment: Motivation and Process Design' - startOffset: 1435 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1435 - endOffset: 1507 -- name: 'Spotting Opportunities: Transitioning into Volunteer Roles' - startOffset: 1507 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1507 - endOffset: 1610 -- name: Personality Types & Team Composition (MBTI relevance) - startOffset: 1610 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1610 - endOffset: 1793 -- name: 'Personality Traits for Data Engineering: Precision, Persistence, Detail' - startOffset: 1793 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1793 - endOffset: 2092 -- name: 'Data Scientist vs. Data Engineer: Interests and Role Differences' - startOffset: 2092 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2092 - endOffset: 2185 -- name: 'Returning to Corporate via Open Source: Community + Technical Work' - startOffset: 2185 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2185 - endOffset: 2285 -- name: 'Community Manager Role: Content, DevRel Overlap, and Events' - startOffset: 2285 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2285 - endOffset: 2423 -- name: 'Product Focus: Simplifying Vision and Goal-Setting' - startOffset: 2423 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2423 - endOffset: 2584 -- name: 'Career Coaching Course: "Align Your Career With Who You Are"' - startOffset: 2584 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2584 - endOffset: 2704 -- name: 'Founding an NGO: Legal Setup, Governance, and Launch' - startOffset: 2704 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2704 - endOffset: 2812 -- name: 'Inclusion in Porto: Connecting Expats and Locals' - startOffset: 2812 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2812 - endOffset: 2952 -- name: 'Meetup Activities: MBTI, Improv, and Community Events' - startOffset: 2952 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2952 - endOffset: 3131 -- name: 'Off-Grid Living Experiment: Land, Dome, and Lifestyle Shift' - startOffset: 3131 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3131 - endOffset: 3207 -- name: 'Off-Grid Utilities: Solar Power, Rainwater, and Well Plans' - startOffset: 3207 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3207 - endOffset: 3352 -- name: 'Cost Comparison: Off-Grid Life vs. Porto Renting' - startOffset: 3352 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3352 - endOffset: 3407 -- name: 'Unemployment Party: Mastermind Brainstorming Technique' - startOffset: 3407 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3407 - endOffset: 3657 -- name: Episode Closing & Final Remarks - startOffset: 3657 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3657 - endOffset: 3706 --- Links: diff --git a/_podcast/s19e09-linguistics-and-fairness.md b/_podcast/to-update/s19e09-linguistics-and-fairness.md similarity index 94% rename from _podcast/s19e09-linguistics-and-fairness.md rename to _podcast/to-update/s19e09-linguistics-and-fairness.md index 64e0bfc0..43531ca4 100644 --- a/_podcast/s19e09-linguistics-and-fairness.md +++ b/_podcast/to-update/s19e09-linguistics-and-fairness.md @@ -1,20 +1,165 @@ --- +title: "Context: +This episode follows Tamara’s journey from software and music‑tech engineering into computational linguistics and open‑source stewardship, and uses concrete case studies (credit‑scoring fairness, moderation systems) plus tool discussions (Fairlearn, interpretability packages, secure model serialization) to examine how technical choices, metrics, and developer practices translate into real societal outcomes. Recurring threads include tradeoffs in fairness metrics, the necessity of domain expertise and human‑in‑the‑loop processes, the engineering challenges of interoperable, secure ML tooling, and the role of community and practitioner education in shaping responsible ML. + +Core: +The unifying idea is that building fair, trustworthy AI is a sociotechnical engineering task: it requires not just algorithms but pragmatic, community‑driven tools, secure software practices, clear interpretability, and organizational processes that embed human judgment and domain knowledge so technical models produce just, accountable outcomes in the real world." +short: Linguistics and Fairness +season: 19 episode: 9 guests: - tamaraatanasoska +image: images/podcast/s19e09-linguistics-and-fairness.jpg ids: anchor: atalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 youtube: sXU9vMDBjmk -image: images/podcast/s19e09-linguistics-and-fairness.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 apple: https://podcasts.apple.com/us/podcast/linguistics-and-fairness-tamara-atanasoska/id1541710331?i=1000684411354 spotify: https://open.spotify.com/episode/6S4a85iiRzl7NU1HykXeKT?si=FNoDtj74T2ujQKzKdDWwzA youtube: https://www.youtube.com/watch?v=sXU9vMDBjmk -season: 19 -short: Linguistics and Fairness -title: 'Fairness in AI: Using Fairlearn to Mitigate Credit Scoring Bias & Build Explainable - Models' + +description: 'Discover how to use Fairlearn to mitigate credit scoring bias and build explainable models: practical tools, human-in-the-loop tips, and evaluation tradeoffs.' +intro: 'How do you reduce bias in credit scoring models without sacrificing explainability? In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., Fairlearn maintainer, and contributor to scikit-learn and skops with a background in software engineering and computational linguistics — walks through practical approaches to fairness in AI. We dig into a real credit scoring use case, empirical findings on gender disparities, and the societal harms of biased models such as debt and repossession.

Tamara explains Fairlearn’s group fairness tools, visualization and mitigation methods, and the tradeoffs between false positives, false negatives, and demographic parity. She discusses how to choose sensitive groups in domain‑specific settings, the limits of automation, the need for human‑in‑the‑loop systems, and who in an organization should decide fairness tradeoffs. The episode also covers interpretability and explainable models — inspection tools, partial dependence, and cross‑library integration with scikit‑learn and estimator APIs — plus practical concerns like secure model serialization and community contribution paths.

Listen to learn actionable guidance on auditing and mitigating credit scoring bias, building explainable models, and integrating Fairlearn into real‑world ML workflows' +dateadded: 2025-02-24 + +duration: PT00H59M14S + +quotableClips: +- name: Podcast Introduction & Episode Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=0 + endOffset: 151 +- name: 'Guest Introduction: Tamara’s Open‑Source Roles (Fairlearn, scikit‑learn, + Skope‑Rules)' + startOffset: 151 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=151 + endOffset: 198 +- name: 'Career Overview: Software Engineering to Computational Linguistics' + startOffset: 198 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=198 + endOffset: 277 +- name: 'Music Tech Experience: Ableton and Push 2 Instrument Design' + startOffset: 277 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=277 + endOffset: 401 +- name: 'Device Architecture: Laptop Computation vs Standalone Hardware' + startOffset: 401 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=401 + endOffset: 536 +- name: 'Transition to NLP & AI: Academic Interests and Motivation' + startOffset: 536 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=536 + endOffset: 604 +- name: 'Cognitive Systems Studies: Language, Neuroscience, and ML' + startOffset: 604 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=604 + endOffset: 710 +- name: 'Research Path: Returning to Study and New Projects' + startOffset: 710 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=710 + endOffset: 761 +- name: 'Music as Hobby: Balancing Creative Work and Research' + startOffset: 761 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=761 + endOffset: 824 +- name: 'Sociotechnical Framing: Modeling Language in Social Context' + startOffset: 824 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=824 + endOffset: 892 +- name: 'Fairness in AI: Credit Scoring Use Case and Real‑World Impact' + startOffset: 892 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=892 + endOffset: 910 +- name: 'Empirical Findings: Gender Disparities in Credit Models (Fairlearn Study)' + startOffset: 910 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=910 + endOffset: 1094 +- name: 'Societal Harms: Debt, Repossession, and Downstream Consequences' + startOffset: 1094 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1094 + endOffset: 1291 +- name: 'Fairlearn Tools: Group Fairness, Visualization, and Mitigation Methods' + startOffset: 1291 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1291 + endOffset: 1444 +- name: 'Sensitive Group Selection: Domain‑Specific Decisions in Credit Models' + startOffset: 1444 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1444 + endOffset: 1581 +- name: 'Limits of Automation: Human Judgment in Fairness Assessments' + startOffset: 1581 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1581 + endOffset: 1732 +- name: 'Metric Tradeoffs: False Positives vs False Negatives and Demographic Parity' + startOffset: 1732 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1732 + endOffset: 1893 +- name: 'Organizational Responsibility: Who Decides Fairness Tradeoffs?' + startOffset: 1893 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1893 + endOffset: 1991 +- name: 'Practitioner Education: Frameworks, Ambiguity, and Learning Objectives' + startOffset: 1991 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1991 + endOffset: 2123 +- name: 'Moderation Case Study: Cross‑Functional Teams and Domain Expertise' + startOffset: 2123 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2123 + endOffset: 2233 +- name: 'Human‑in‑the‑Loop: Essential Component for Fair AI Systems' + startOffset: 2233 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2233 + endOffset: 2358 +- name: 'Joining Probable: From Open‑Source Contributions to a Role' + startOffset: 2358 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2358 + endOffset: 2457 +- name: 'Probable Work: Explainability, Language Models, and Library Integration' + startOffset: 2457 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2457 + endOffset: 2574 +- name: 'Interpretability Tools: Inspection Package and Partial Dependence' + startOffset: 2574 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2574 + endOffset: 2694 +- name: 'Cross‑Library Compatibility: Fairlearn, scikit‑learn, and Estimator APIs' + startOffset: 2694 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2694 + endOffset: 2780 +- name: 'Scopes Library: Secure Model Persistence and Hugging Face Integration' + startOffset: 2780 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2780 + endOffset: 2836 +- name: 'Serialization Risks: Pickle Vulnerabilities and Secure Deserialization' + startOffset: 2836 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2836 + endOffset: 3054 +- name: 'Community Involvement: PyLadies, Sprints, and Fairlearn Events' + startOffset: 3054 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3054 + endOffset: 3130 +- name: 'Contributing to Fairlearn: Discord, Good‑First Issues, and Sprints' + startOffset: 3130 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3130 + endOffset: 3341 +- name: 'Development Ethos: Testing, Refactoring, and Custom Estimators' + startOffset: 3341 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3341 + endOffset: 3397 +- name: 'Project Updates: Upcoming Fairlearn Release and Maintainer Notes' + startOffset: 3397 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3397 + endOffset: 3442 +- name: 'Practical Quirk: Tokenization Issues Breaking "Fairlearn" in Transcripts' + startOffset: 3442 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3442 + endOffset: 3494 +- name: Closing Remarks, Contact Info, and Final Thoughts + startOffset: 3494 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3494 + endOffset: 3554 + transcript: - header: Podcast Introduction & Episode Overview - line: This week, we’ll talk about linguistic fairness and a sociotechnical perspective @@ -917,161 +1062,6 @@ transcript: sec: 3554 time: '59:14' who: Alexey -description: 'Discover how to use Fairlearn to mitigate credit scoring bias and build - explainable models: practical tools, human-in-the-loop tips, and evaluation tradeoffs.' -intro: How do you reduce bias in credit scoring models without sacrificing explainability? - In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., - Fairlearn maintainer, and contributor to scikit-learn and skops with a background - in software engineering and computational linguistics — walks through practical - approaches to fairness in AI. We dig into a real credit scoring use case, empirical - findings on gender disparities, and the societal harms of biased models such as - debt and repossession.

Tamara explains Fairlearn’s group fairness tools, - visualization and mitigation methods, and the tradeoffs between false positives, - false negatives, and demographic parity. She discusses how to choose sensitive groups - in domain‑specific settings, the limits of automation, the need for human‑in‑the‑loop - systems, and who in an organization should decide fairness tradeoffs. The episode - also covers interpretability and explainable models — inspection tools, partial - dependence, and cross‑library integration with scikit‑learn and estimator APIs — - plus practical concerns like secure model serialization and community contribution - paths.

Listen to learn actionable guidance on auditing and mitigating credit - scoring bias, building explainable models, and integrating Fairlearn into real‑world - ML workflows. -dateadded: '2025-02-24' -duration: PT00H59M14S -quotableClips: -- name: Podcast Introduction & Episode Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=0 - endOffset: 151 -- name: 'Guest Introduction: Tamara’s Open‑Source Roles (Fairlearn, scikit‑learn, - Skope‑Rules)' - startOffset: 151 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=151 - endOffset: 198 -- name: 'Career Overview: Software Engineering to Computational Linguistics' - startOffset: 198 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=198 - endOffset: 277 -- name: 'Music Tech Experience: Ableton and Push 2 Instrument Design' - startOffset: 277 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=277 - endOffset: 401 -- name: 'Device Architecture: Laptop Computation vs Standalone Hardware' - startOffset: 401 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=401 - endOffset: 536 -- name: 'Transition to NLP & AI: Academic Interests and Motivation' - startOffset: 536 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=536 - endOffset: 604 -- name: 'Cognitive Systems Studies: Language, Neuroscience, and ML' - startOffset: 604 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=604 - endOffset: 710 -- name: 'Research Path: Returning to Study and New Projects' - startOffset: 710 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=710 - endOffset: 761 -- name: 'Music as Hobby: Balancing Creative Work and Research' - startOffset: 761 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=761 - endOffset: 824 -- name: 'Sociotechnical Framing: Modeling Language in Social Context' - startOffset: 824 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=824 - endOffset: 892 -- name: 'Fairness in AI: Credit Scoring Use Case and Real‑World Impact' - startOffset: 892 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=892 - endOffset: 910 -- name: 'Empirical Findings: Gender Disparities in Credit Models (Fairlearn Study)' - startOffset: 910 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=910 - endOffset: 1094 -- name: 'Societal Harms: Debt, Repossession, and Downstream Consequences' - startOffset: 1094 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1094 - endOffset: 1291 -- name: 'Fairlearn Tools: Group Fairness, Visualization, and Mitigation Methods' - startOffset: 1291 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1291 - endOffset: 1444 -- name: 'Sensitive Group Selection: Domain‑Specific Decisions in Credit Models' - startOffset: 1444 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1444 - endOffset: 1581 -- name: 'Limits of Automation: Human Judgment in Fairness Assessments' - startOffset: 1581 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1581 - endOffset: 1732 -- name: 'Metric Tradeoffs: False Positives vs False Negatives and Demographic Parity' - startOffset: 1732 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1732 - endOffset: 1893 -- name: 'Organizational Responsibility: Who Decides Fairness Tradeoffs?' - startOffset: 1893 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1893 - endOffset: 1991 -- name: 'Practitioner Education: Frameworks, Ambiguity, and Learning Objectives' - startOffset: 1991 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1991 - endOffset: 2123 -- name: 'Moderation Case Study: Cross‑Functional Teams and Domain Expertise' - startOffset: 2123 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2123 - endOffset: 2233 -- name: 'Human‑in‑the‑Loop: Essential Component for Fair AI Systems' - startOffset: 2233 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2233 - endOffset: 2358 -- name: 'Joining Probable: From Open‑Source Contributions to a Role' - startOffset: 2358 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2358 - endOffset: 2457 -- name: 'Probable Work: Explainability, Language Models, and Library Integration' - startOffset: 2457 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2457 - endOffset: 2574 -- name: 'Interpretability Tools: Inspection Package and Partial Dependence' - startOffset: 2574 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2574 - endOffset: 2694 -- name: 'Cross‑Library Compatibility: Fairlearn, scikit‑learn, and Estimator APIs' - startOffset: 2694 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2694 - endOffset: 2780 -- name: 'Scopes Library: Secure Model Persistence and Hugging Face Integration' - startOffset: 2780 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2780 - endOffset: 2836 -- name: 'Serialization Risks: Pickle Vulnerabilities and Secure Deserialization' - startOffset: 2836 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2836 - endOffset: 3054 -- name: 'Community Involvement: PyLadies, Sprints, and Fairlearn Events' - startOffset: 3054 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3054 - endOffset: 3130 -- name: 'Contributing to Fairlearn: Discord, Good‑First Issues, and Sprints' - startOffset: 3130 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3130 - endOffset: 3341 -- name: 'Development Ethos: Testing, Refactoring, and Custom Estimators' - startOffset: 3341 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3341 - endOffset: 3397 -- name: 'Project Updates: Upcoming Fairlearn Release and Maintainer Notes' - startOffset: 3397 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3397 - endOffset: 3442 -- name: 'Practical Quirk: Tokenization Issues Breaking "Fairlearn" in Transcripts' - startOffset: 3442 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3442 - endOffset: 3494 -- name: Closing Remarks, Contact Info, and Final Thoughts - startOffset: 3494 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3494 - endOffset: 3554 --- Links: diff --git a/_podcast/s20e01-trends-in-ai-infrastructure.md b/_podcast/to-update/s20e01-trends-in-ai-infrastructure.md similarity index 93% rename from _podcast/s20e01-trends-in-ai-infrastructure.md rename to _podcast/to-update/s20e01-trends-in-ai-infrastructure.md index 5e3067ed..f3c47355 100644 --- a/_podcast/s20e01-trends-in-ai-infrastructure.md +++ b/_podcast/to-update/s20e01-trends-in-ai-infrastructure.md @@ -1,19 +1,124 @@ --- +title: "Context: A conversation with an AI-infrastructure practitioner about moving from developer tools to building DStack, exploring real-world trade-offs across hardware, software, deployment, and business models for practical AI adoption. + +Core theme (single unifying idea): Practical AI is an infrastructure-first problem — success depends less on chasing the biggest model and more on designing cost-effective, controllable, and efficient stacks (hardware, orchestration, and software) that fit hybrid cloud/on‑prem realities, leverage open-source ecosystems, and optimize distributed training and serving for real-world constraints. + +Dominant through-line: Every segment — from cost of ownership and cloud vs on‑prem trade‑offs to open vs proprietary models, decentralization, distributed training bottlenecks, orchestration gaps, and edge/federated use cases — returns to the same tension: how to deliver AI that is scalable, performant, and economically sustainable by choosing the right mix of tooling, deployment model, and optimizations. + +Key themes implied by the narrative: +- Cost and control drive architecture choices more than raw model capability. +- Hybrid cloud + on‑prem is the pragmatic reality; orchestration must adapt. +- Open-source ecosystems accelerate feedback, tooling, and business flexibility. +- Efficient distributed training and communication optimizations trump brute-force scaling. +- Decentralization (privacy, local control, edge) is often a matter of fit and trade-offs, not ideology. +- Practical provisioning, automation, and orchestration are the unsolved scaling problems for non–AI‑first organizations." +short: Trends in AI Infrastructure +season: 20 episode: 1 guests: - andreycheptsov +image: images/podcast/s20e01-trends-in-ai-infrastructure.jpg ids: anchor: atalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 youtube: 1aMuynlLM3o -image: images/podcast/s20e01-trends-in-ai-infrastructure.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 apple: https://podcasts.apple.com/us/podcast/redefining-ai-infrastructure-open-source-chips-and/id1541710331?i=1000687565459 spotify: https://open.spotify.com/episode/5MIc1pAXPxVYSr0E4pndU4 youtube: https://www.youtube.com/watch?v=1aMuynlLM3o -season: 20 -short: Trends in AI Infrastructure -title: 'Cut AI Infrastructure Costs: DStack for On‑Prem GPU Training & MLOps Alternatives' + +description: Discover DStack to cut AI infrastructure costs with on‑prem GPU training and MLOps alternatives—optimize distributed training, reduce orchestration overhead +intro: 'How can engineering teams cut AI infrastructure costs without sacrificing performance or control? In this episode, Andrey Cheptsov — founder and CEO of dstack and former JetBrains engineer — walks through the motivation behind DStack, an open‑source orchestration alternative designed to lower AI infrastructure total cost of ownership. We trace the cloud vs on‑prem economics (including MLOps limitations like SageMaker), the decision to build open‑source developer tooling, and the trade‑offs between open and proprietary models.

You’ll hear practical discussion of on‑prem GPU training and distributed training challenges: GPU requirements, PyTorch + NCCL communication bottlenecks, optimization strategies such as DeepSpeed, and tips for fine‑tuning and serving models for non–AI‑first companies. The episode also covers orchestration gaps — Kubernetes and SLURM limitations — plus bare‑metal provisioning, hybrid cloud realities, edge computing scope, and federated learning versus distributed compute.

If you’re evaluating MLOps alternatives, on‑prem GPU coordination, or ways to reduce AI infrastructure cost, this episode offers concrete perspectives on when to choose on‑prem vs cloud, how DStack fits into the stack, and practical trade‑offs for production ML workloads.' +dateadded: 2025-02-26 + +duration: PT01H06M04S + +quotableClips: +- name: Episode Kickoff & Guest Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=0 + endOffset: 166 +- name: 'Career Background: JetBrains, DataSpell, and Move into AI' + startOffset: 166 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=166 + endOffset: 327 +- name: 'Origins of DStack: Reducing AI Infrastructure Cost of Ownership' + startOffset: 327 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=327 + endOffset: 505 +- name: Cloud vs On‑Prem Costs and MLOps Limitations (SageMaker example) + startOffset: 505 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=505 + endOffset: 600 +- name: Cloud-to-On‑Prem Realities in the Post‑ChatGPT Era + startOffset: 600 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=600 + endOffset: 778 +- name: 'Choosing Open Source: Developer Tools, Feedback, and Community' + startOffset: 778 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=778 + endOffset: 1053 +- name: 'Open vs Proprietary Models: Business Models and Trade‑Offs' + startOffset: 1053 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1053 + endOffset: 1297 +- name: 'Decentralization in AI: Privacy, Control, and Industry Fit' + startOffset: 1297 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1297 + endOffset: 1816 +- name: 'Training at Scale: GPU Requirements and Distributed Challenges' + startOffset: 1816 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1816 + endOffset: 2086 +- name: 'Distributed Training Stack: PyTorch, NCCL, and Communication Bottlenecks' + startOffset: 2086 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2086 + endOffset: 2255 +- name: 'Efficiency Over Brute Force: Optimization Strategies and DeepSpeed' + startOffset: 2255 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2255 + endOffset: 2370 +- name: Fine‑Tuning & Serving Models for Non–AI‑First Companies + startOffset: 2370 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2370 + endOffset: 2836 +- name: 'Orchestration Gaps: Kubernetes Limitations for AI Workflows and SLURM' + startOffset: 2836 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2836 + endOffset: 3059 +- name: Kubernetes as the Deployment Standard vs Smaller Alternatives + startOffset: 3059 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3059 + endOffset: 3116 +- name: 'Hybrid Infrastructure Outlook: Cloud Dominance and On‑Prem Nuances' + startOffset: 3116 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3116 + endOffset: 3271 +- name: 'On‑Prem GPU Coordination: SSH, Resource Contention, and Real Examples' + startOffset: 3271 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3271 + endOffset: 3413 +- name: 'Bare‑Metal as a Service: Provisioning, Automation, and Firmware Management' + startOffset: 3413 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3413 + endOffset: 3487 +- name: 'Edge Computing Scope: Devices, Local Models, and Definition Ambiguity' + startOffset: 3487 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3487 + endOffset: 3630 +- name: 'Federated Learning vs Distributed Compute: Practicality and Use Cases' + startOffset: 3630 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3630 + endOffset: 3771 +- name: 'Closing Pick: Science‑Fiction Recommendation — The Three‑Body Problem' + startOffset: 3771 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3771 + endOffset: 3938 +- name: Episode Wrap‑Up & Links to DStack and Guest Resources + startOffset: 3938 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3938 + endOffset: 3964 + transcript: - header: Episode Kickoff & Guest Introduction - line: This week, we'll talk about AI infrastructure and everything related to it. @@ -850,111 +955,6 @@ transcript: sec: 3964 time: '1:06:04' who: Andrey -description: Discover DStack to cut AI infrastructure costs with on‑prem GPU training - and MLOps alternatives—optimize distributed training, reduce orchestration overhead. -intro: 'How can engineering teams cut AI infrastructure costs without sacrificing - performance or control? In this episode, Andrey Cheptsov — founder and CEO of dstack - and former JetBrains engineer — walks through the motivation behind DStack, an open‑source - orchestration alternative designed to lower AI infrastructure total cost of ownership. - We trace the cloud vs on‑prem economics (including MLOps limitations like SageMaker), - the decision to build open‑source developer tooling, and the trade‑offs between - open and proprietary models.

You’ll hear practical discussion of on‑prem - GPU training and distributed training challenges: GPU requirements, PyTorch + NCCL - communication bottlenecks, optimization strategies such as DeepSpeed, and tips for - fine‑tuning and serving models for non–AI‑first companies. The episode also covers - orchestration gaps — Kubernetes and SLURM limitations — plus bare‑metal provisioning, - hybrid cloud realities, edge computing scope, and federated learning versus distributed - compute.

If you’re evaluating MLOps alternatives, on‑prem GPU coordination, - or ways to reduce AI infrastructure cost, this episode offers concrete perspectives - on when to choose on‑prem vs cloud, how DStack fits into the stack, and practical - trade‑offs for production ML workloads.' -dateadded: '2025-02-26' -duration: PT01H06M04S -quotableClips: -- name: Episode Kickoff & Guest Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=0 - endOffset: 166 -- name: 'Career Background: JetBrains, DataSpell, and Move into AI' - startOffset: 166 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=166 - endOffset: 327 -- name: 'Origins of DStack: Reducing AI Infrastructure Cost of Ownership' - startOffset: 327 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=327 - endOffset: 505 -- name: Cloud vs On‑Prem Costs and MLOps Limitations (SageMaker example) - startOffset: 505 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=505 - endOffset: 600 -- name: Cloud-to-On‑Prem Realities in the Post‑ChatGPT Era - startOffset: 600 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=600 - endOffset: 778 -- name: 'Choosing Open Source: Developer Tools, Feedback, and Community' - startOffset: 778 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=778 - endOffset: 1053 -- name: 'Open vs Proprietary Models: Business Models and Trade‑Offs' - startOffset: 1053 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1053 - endOffset: 1297 -- name: 'Decentralization in AI: Privacy, Control, and Industry Fit' - startOffset: 1297 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1297 - endOffset: 1816 -- name: 'Training at Scale: GPU Requirements and Distributed Challenges' - startOffset: 1816 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1816 - endOffset: 2086 -- name: 'Distributed Training Stack: PyTorch, NCCL, and Communication Bottlenecks' - startOffset: 2086 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2086 - endOffset: 2255 -- name: 'Efficiency Over Brute Force: Optimization Strategies and DeepSpeed' - startOffset: 2255 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2255 - endOffset: 2370 -- name: Fine‑Tuning & Serving Models for Non–AI‑First Companies - startOffset: 2370 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2370 - endOffset: 2836 -- name: 'Orchestration Gaps: Kubernetes Limitations for AI Workflows and SLURM' - startOffset: 2836 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2836 - endOffset: 3059 -- name: Kubernetes as the Deployment Standard vs Smaller Alternatives - startOffset: 3059 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3059 - endOffset: 3116 -- name: 'Hybrid Infrastructure Outlook: Cloud Dominance and On‑Prem Nuances' - startOffset: 3116 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3116 - endOffset: 3271 -- name: 'On‑Prem GPU Coordination: SSH, Resource Contention, and Real Examples' - startOffset: 3271 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3271 - endOffset: 3413 -- name: 'Bare‑Metal as a Service: Provisioning, Automation, and Firmware Management' - startOffset: 3413 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3413 - endOffset: 3487 -- name: 'Edge Computing Scope: Devices, Local Models, and Definition Ambiguity' - startOffset: 3487 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3487 - endOffset: 3630 -- name: 'Federated Learning vs Distributed Compute: Practicality and Use Cases' - startOffset: 3630 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3630 - endOffset: 3771 -- name: 'Closing Pick: Science‑Fiction Recommendation — The Three‑Body Problem' - startOffset: 3771 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3771 - endOffset: 3938 -- name: Episode Wrap‑Up & Links to DStack and Guest Resources - startOffset: 3938 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3938 - endOffset: 3964 --- Links: diff --git a/_podcast/s20e02-competitive-machine-learning-and-teaching.md b/_podcast/to-update/s20e02-competitive-machine-learning-and-teaching.md similarity index 89% rename from _podcast/s20e02-competitive-machine-learning-and-teaching.md rename to _podcast/to-update/s20e02-competitive-machine-learning-and-teaching.md index dde3bc62..6d511d38 100644 --- a/_podcast/s20e02-competitive-machine-learning-and-teaching.md +++ b/_podcast/to-update/s20e02-competitive-machine-learning-and-teaching.md @@ -1,19 +1,142 @@ --- +title: "Context: A Kaggle Grandmaster recounts a career arc from competitive modeling and open-source tooling (MLEM) through industry roles, curriculum design, large-scale online teaching, and mentoring. Episodes segments cover how competitions teach iterative problem-solving, validation, infrastructure and teamwork; how those skills map (and sometimes must be adapted) to production ML and MLOps; how to design practical coursework and assessments; and how to show business value and respond to new tools like AutoML and generative AI. + +Core through-line (single high-level theme): Hands-on, competition-driven practice—grounded in iteration, rigorous validation, tooling, and community—is the crucible that converts data-science craft into production-ready systems, scalable education, and demonstrable career and business impact. + +Key themes that support this through-line: +- Competitions as accelerated, low-risk labs for learning baselines, feature engineering, and workflows. +- The necessity of infrastructure, repeatable pipelines, and MLOps to make contest solutions production-ready. +- Teaching and curriculum design that mirror real-world system projects to transfer practical skills at scale. +- Community, mentorship, documentation, and open-source tooling as force multipliers for learning and adoption. +- Communicating business value and adapting competitive techniques to regional and organizational contexts. +- New productivity tools (AutoML, generative AI) change how work is done but reinforce the need for sound validation and system design." +short: Competitive Machine Learning and Teaching +season: 20 episode: 2 guests: - alexanderguschin +image: images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg ids: anchor: atalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 youtube: NfAJAr7FvyY&t -image: images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 apple: https://podcasts.apple.com/us/podcast/competitive-machine-leaning-and-teaching-alexander/id1541710331?i=1000692309866 spotify: https://open.spotify.com/episode/6xsov9a1US8D8w5xKcjkNm youtube: https://www.youtube.com/watch?v=NfAJAr7FvyY&t -season: 20 -short: Competitive Machine Learning and Teaching -title: 'From Kaggle to Production: MLOps, Competition Strategies & Curriculum Design' + +description: Master Kaggle strategies, MLOps and curriculum design to convert competition skills into production ML, scalable courses, teamwork and career boosts +intro: How do you turn Kaggle competition wins into production-ready machine learning and effective teaching? In this episode, Alexander Guschin — a machine learning engineer with 10+ years’ experience, a Kaggle Grandmaster ranked 5th globally, leader of DS and SE teams, open-source contributor, and instructor to 100K+ students — walks through that transition. We cover MLOps and tooling anecdotes (including the MLEM story), practical competition strategies like baselines, iteration and infrastructure, and how those practices map to production ML. Alexander also discusses preparing for competitions while studying, regional career differences, solo vs. team collaboration, and demoing Kaggle’s business value to managers. For educators and program leads, he outlines curriculum design grounded in machine learning system design projects, problem-centered assignments (a bot-detection case study), dual leaderboards for ML and engineering, and scaling online courses—drawing on his Coursera work and student-built software. Listeners will gain actionable guidance on competition strategy, MLOps best practices, designing real-world assignments, and how to use competitive experience to deliver production-grade ML and teach it effectively +dateadded: 2025-02-26 + +duration: PT01H05M09S + +quotableClips: +- name: Episode Start + startOffset: 0 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=0 + endOffset: 230 +- name: Guest Introduction & Kaggle Grandmaster Credentials + startOffset: 230 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=230 + endOffset: 388 +- name: Early Industry Roles & Open-Source Contributions + startOffset: 388 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=388 + endOffset: 516 +- name: MLEM Story & Tooling Anecdotes + startOffset: 516 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=516 + endOffset: 701 +- name: Kaggle Beginnings & Local Community Influence + startOffset: 701 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=701 + endOffset: 887 +- name: Balancing Competitions with University Studies + startOffset: 887 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=887 + endOffset: 956 +- name: Time Investment & Learning Curve on Competitions + startOffset: 956 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=956 + endOffset: 1030 +- name: Kaggle for Skill Broadening, Domain Exposure & Interviews + startOffset: 1030 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1030 + endOffset: 1302 +- name: 'Competition Preparation: Iteration, Baselines & Infrastructure' + startOffset: 1302 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1302 + endOffset: 1365 +- name: Applying Competition Experience to Production ML + startOffset: 1365 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1365 + endOffset: 1578 +- name: Regional Differences in Kaggle’s Career Value + startOffset: 1578 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1578 + endOffset: 1748 +- name: 'Collaboration Strategies: Teamwork vs. Solo Competitions' + startOffset: 1748 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1748 + endOffset: 1890 +- name: Teaching Teens & Participation in AI Olympiads + startOffset: 1890 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1890 + endOffset: 2005 +- name: 'Transition to Teaching: From Competitor to Instructor' + startOffset: 2005 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2005 + endOffset: 2277 +- name: 'Practical Curriculum Design: Production ML & MLOps' + startOffset: 2277 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2277 + endOffset: 2470 +- name: Machine Learning System Design Projects & Real-World Work + startOffset: 2470 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2470 + endOffset: 2810 +- name: 'Problem-Centered Assignments: Bot Detection Case Study' + startOffset: 2810 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2810 + endOffset: 3010 +- name: Teamwork, Communication & Dual Leaderboards (ML + Technical) + startOffset: 3010 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3010 + endOffset: 3250 +- name: 'Online Education at Scale: Coursera Course & 100k Students' + startOffset: 3250 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3250 + endOffset: 3382 +- name: Teaching Platform Development & Student-Built Software + startOffset: 3382 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3382 + endOffset: 3455 +- name: Documentation, Mentorship & Industry Partnerships + startOffset: 3455 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3455 + endOffset: 3591 +- name: Demonstrating Kaggle’s Business Value to Managers + startOffset: 3591 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3591 + endOffset: 3708 +- name: 'Competition Essentials: EDA, Validation & No Single Trick' + startOffset: 3708 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3708 + endOffset: 3791 +- name: 'Generative AI & AutoML: Productivity vs. Winning Solutions' + startOffset: 3791 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3791 + endOffset: 3913 +- name: 'Career Reflections: Current Activity and Kaggle Legacy' + startOffset: 3913 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3913 + endOffset: 4134 +- name: Closing Remarks & Episode Wrap-Up + startOffset: 4134 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=4134 + endOffset: 3909 + transcript: - header: Episode Start - header: Guest Introduction & Kaggle Grandmaster Credentials @@ -608,130 +731,6 @@ transcript: sec: 4139 time: '1:08:59' who: Alexey -description: Master Kaggle strategies, MLOps and curriculum design to convert competition - skills into production ML, scalable courses, teamwork and career boosts. -intro: How do you turn Kaggle competition wins into production-ready machine learning - and effective teaching? In this episode, Alexander Guschin — a machine learning engineer - with 10+ years’ experience, a Kaggle Grandmaster ranked 5th globally, leader of - DS and SE teams, open-source contributor, and instructor to 100K+ students — walks - through that transition. We cover MLOps and tooling anecdotes (including the MLEM - story), practical competition strategies like baselines, iteration and infrastructure, - and how those practices map to production ML. Alexander also discusses preparing - for competitions while studying, regional career differences, solo vs. team collaboration, - and demoing Kaggle’s business value to managers. For educators and program leads, - he outlines curriculum design grounded in machine learning system design projects, - problem-centered assignments (a bot-detection case study), dual leaderboards for - ML and engineering, and scaling online courses—drawing on his Coursera work and - student-built software. Listeners will gain actionable guidance on competition strategy, - MLOps best practices, designing real-world assignments, and how to use competitive - experience to deliver production-grade ML and teach it effectively. -dateadded: '2025-02-26' -duration: PT01H05M09S -quotableClips: -- name: Episode Start - startOffset: 0 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=0 - endOffset: 230 -- name: Guest Introduction & Kaggle Grandmaster Credentials - startOffset: 230 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=230 - endOffset: 388 -- name: Early Industry Roles & Open-Source Contributions - startOffset: 388 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=388 - endOffset: 516 -- name: MLEM Story & Tooling Anecdotes - startOffset: 516 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=516 - endOffset: 701 -- name: Kaggle Beginnings & Local Community Influence - startOffset: 701 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=701 - endOffset: 887 -- name: Balancing Competitions with University Studies - startOffset: 887 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=887 - endOffset: 956 -- name: Time Investment & Learning Curve on Competitions - startOffset: 956 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=956 - endOffset: 1030 -- name: Kaggle for Skill Broadening, Domain Exposure & Interviews - startOffset: 1030 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1030 - endOffset: 1302 -- name: 'Competition Preparation: Iteration, Baselines & Infrastructure' - startOffset: 1302 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1302 - endOffset: 1365 -- name: Applying Competition Experience to Production ML - startOffset: 1365 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1365 - endOffset: 1578 -- name: Regional Differences in Kaggle’s Career Value - startOffset: 1578 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1578 - endOffset: 1748 -- name: 'Collaboration Strategies: Teamwork vs. Solo Competitions' - startOffset: 1748 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1748 - endOffset: 1890 -- name: Teaching Teens & Participation in AI Olympiads - startOffset: 1890 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1890 - endOffset: 2005 -- name: 'Transition to Teaching: From Competitor to Instructor' - startOffset: 2005 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2005 - endOffset: 2277 -- name: 'Practical Curriculum Design: Production ML & MLOps' - startOffset: 2277 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2277 - endOffset: 2470 -- name: Machine Learning System Design Projects & Real-World Work - startOffset: 2470 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2470 - endOffset: 2810 -- name: 'Problem-Centered Assignments: Bot Detection Case Study' - startOffset: 2810 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2810 - endOffset: 3010 -- name: Teamwork, Communication & Dual Leaderboards (ML + Technical) - startOffset: 3010 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3010 - endOffset: 3250 -- name: 'Online Education at Scale: Coursera Course & 100k Students' - startOffset: 3250 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3250 - endOffset: 3382 -- name: Teaching Platform Development & Student-Built Software - startOffset: 3382 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3382 - endOffset: 3455 -- name: Documentation, Mentorship & Industry Partnerships - startOffset: 3455 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3455 - endOffset: 3591 -- name: Demonstrating Kaggle’s Business Value to Managers - startOffset: 3591 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3591 - endOffset: 3708 -- name: 'Competition Essentials: EDA, Validation & No Single Trick' - startOffset: 3708 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3708 - endOffset: 3791 -- name: 'Generative AI & AutoML: Productivity vs. Winning Solutions' - startOffset: 3791 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3791 - endOffset: 3913 -- name: 'Career Reflections: Current Activity and Kaggle Legacy' - startOffset: 3913 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3913 - endOffset: 4134 -- name: Closing Remarks & Episode Wrap-Up - startOffset: 4134 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=4134 - endOffset: 3909 --- Links: diff --git a/_podcast/s20e03-trends-in-data-engineering.md b/_podcast/to-update/s20e03-trends-in-data-engineering.md similarity index 91% rename from _podcast/s20e03-trends-in-data-engineering.md rename to _podcast/to-update/s20e03-trends-in-data-engineering.md index 91eb8204..49735877 100644 --- a/_podcast/s20e03-trends-in-data-engineering.md +++ b/_podcast/to-update/s20e03-trends-in-data-engineering.md @@ -1,19 +1,140 @@ --- +title: "Context — This episode traces the practical and technological shifts shaping data engineering today: the rise of open-source, standards-driven building blocks (table formats like Iceberg/Delta, catalogs, DuckDB), new orchestration and workflow patterns, AI-driven tooling, specialization in governance/quality/streaming, and emerging marketplaces and platforms (like DLT Plus) that package reusable data products. + +Core theme — Data engineering is transitioning from monolithic, vendor-locked stacks to a composable, metadata-first ecosystem: teams win by adopting open, portable standards (headless table formats, catalogs, embeddable query engines), designing interoperable, cost-efficient pipelines, and packaging reusable data products—while practitioners pivot toward specialization and metadata-aware, SQL/Python-first skills to build and govern interoperable, AI-enabled data workflows." +short: Trends in Data Engineering +season: 20 episode: 3 guests: - adrianbrudaru +image: images/podcast/s20e03-trends-in-data-engineering.jpg ids: anchor: atalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae youtube: AlCFKbFIEM8 -image: images/podcast/s20e03-trends-in-data-engineering.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae apple: https://podcasts.apple.com/us/podcast/trends-in-data-engineering-adrian-brudaru/id1541710331?i=1000698294801 spotify: https://open.spotify.com/episode/35QbCW6Evqk1EPMKUDGGdv youtube: https://www.youtube.com/watch?v=AlCFKbFIEM8 -season: 20 -short: Trends in Data Engineering -title: 'Future-Proof Data Engineering: Adopt Apache Iceberg, DuckDB & AI-Powered Pipelines' + +description: Discover Apache Iceberg, DuckDB & AI-powered pipelines - learn cost-efficient table formats, orchestration tactics and a career roadmap for data engineers +intro: How do you future‑proof data engineering against vendor lock‑in, rising AI demand, and exploding metadata complexity? In this episode, Adrian Brudaru — a former business analyst turned freelancer and co‑founder of DLT — walks through practical choices for building resilient, cost‑efficient pipelines. Adrian explains DLT as a Python‑based ingestion standard, the DLT Plus vision and marketplace for reusable data products, and why the industry is shifting toward specialization in governance, data quality, and streaming.

Key topics include adopting Apache Iceberg as a table format (Parquet storage and reduced vendor lock‑in), the role of data catalogs and metadata tooling (AWS Glue and peers), and how DuckDB enables embeddable local OLAP and portable query execution. We cover cost‑efficient patterns—DuckDB with GitHub Actions and headless table formats—dbt’s influence and alternatives like SQLMesh, orchestration choices (Airflow, Prefect, Dagster, GitHub Actions), and the 2025 trend of AI integration in pipelines and AI agents. Listeners will get actionable guidance on tool selection, beginner and transition roadmaps, and when Iceberg, DuckDB, or AI‑powered approaches make sense for their data engineering stack +dateadded: 2025-03-14 + +duration: PT01H02M16S + +quotableClips: +- name: Episode opening & guest introduction + startOffset: 1 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1 + endOffset: 143 +- name: Perspective on evolving data engineering challenges + startOffset: 143 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=143 + endOffset: 190 +- name: 'Career journey: startups, freelancing, founding DLT' + startOffset: 190 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=190 + endOffset: 243 +- name: DLT as a Python-based ingestion standard and market impact + startOffset: 243 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=243 + endOffset: 465 +- name: DLT Plus vision and partnership outreach for freelancers + startOffset: 465 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=465 + endOffset: 663 +- name: 'Industry shift toward specialization: governance, data quality, streaming' + startOffset: 663 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=663 + endOffset: 757 +- name: 'Early-career opportunities: AI projects and startup hiring' + startOffset: 757 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=757 + endOffset: 872 +- name: Modern data stack critique and open-source "postmodern" alternatives + startOffset: 872 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=872 + endOffset: 1000 +- name: '2025 trends: AI integration in data engineering and Apache Iceberg adoption' + startOffset: 1000 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1000 + endOffset: 1097 +- name: 'Apache Iceberg explained: table format, Parquet storage, vendor lock-in reduction' + startOffset: 1097 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1097 + endOffset: 1287 +- name: 'Database layers and catalog role: storage, compute, access, metadata & lineage' + startOffset: 1287 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1287 + endOffset: 1421 +- name: Metadata and catalog tooling overview (AWS Glue and peers) + startOffset: 1421 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1421 + endOffset: 1558 +- name: 'DuckDB impact: embeddable local OLAP and portable query engine' + startOffset: 1558 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1558 + endOffset: 1660 +- name: 'Cost-efficient pipelines: DuckDB with GitHub Actions and headless table formats' + startOffset: 1660 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1660 + endOffset: 1831 +- name: Headless table formats and DLT support for Delta Lake and Iceberg + startOffset: 1831 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1831 + endOffset: 1889 +- name: dbt's influence on engineering workflows and alternatives like SQLMesh + startOffset: 1889 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1889 + endOffset: 2137 +- name: 'Workflow orchestration options in 2025: Airflow, Prefect, Dagster, GitHub + Actions' + startOffset: 2137 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2137 + endOffset: 2282 +- name: 'AI engineering convergence: data engineers building AI agents' + startOffset: 2282 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2282 + endOffset: 2466 +- name: 'Beginner roadmap: SQL, Python, capturing business requirements, building + a portfolio' + startOffset: 2466 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2466 + endOffset: 2682 +- name: Tool selection guidance and vendor caution for modern data stacks + startOffset: 2682 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2682 + endOffset: 2756 +- name: 'Transition paths: senior backend engineers moving into data engineering' + startOffset: 2756 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2756 + endOffset: 2884 +- name: 'Job market outlook: senior vs junior data engineering opportunities' + startOffset: 2884 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2884 + endOffset: 2982 +- name: 'Table format comparisons: Delta, Hudi, and Iceberg differences' + startOffset: 2982 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2982 + endOffset: 3079 +- name: 'Streaming architectures and tools: micro-batching, Kafka, SQS, Flink' + startOffset: 3079 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3079 + endOffset: 3375 +- name: AI-driven commoditization and code generation in data engineering + startOffset: 3375 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3375 + endOffset: 3582 +- name: 'DLT roadmap: DLT Plus and a marketplace for reusable data products' + startOffset: 3582 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3582 + endOffset: 3679 +- name: Episode wrap-up and key takeaways + startOffset: 3679 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3679 + endOffset: 3736 + transcript: - header: Episode opening & guest introduction - line: This week, we’ll talk about trends in data engineering. Our special guest @@ -517,137 +638,6 @@ transcript: sec: 3737 time: '1:02:17' who: Alexey -description: Discover Apache Iceberg, DuckDB & AI-powered pipelines - learn cost-efficient - table formats, orchestration tactics and a career roadmap for data engineers. -intro: How do you future‑proof data engineering against vendor lock‑in, rising AI - demand, and exploding metadata complexity? In this episode, Adrian Brudaru — a former - business analyst turned freelancer and co‑founder of DLT — walks through practical - choices for building resilient, cost‑efficient pipelines. Adrian explains DLT as - a Python‑based ingestion standard, the DLT Plus vision and marketplace for reusable - data products, and why the industry is shifting toward specialization in governance, - data quality, and streaming.

Key topics include adopting Apache Iceberg - as a table format (Parquet storage and reduced vendor lock‑in), the role of data - catalogs and metadata tooling (AWS Glue and peers), and how DuckDB enables embeddable - local OLAP and portable query execution. We cover cost‑efficient patterns—DuckDB - with GitHub Actions and headless table formats—dbt’s influence and alternatives - like SQLMesh, orchestration choices (Airflow, Prefect, Dagster, GitHub Actions), - and the 2025 trend of AI integration in pipelines and AI agents. Listeners will - get actionable guidance on tool selection, beginner and transition roadmaps, and - when Iceberg, DuckDB, or AI‑powered approaches make sense for their data engineering - stack. -dateadded: '2025-03-14' -duration: PT01H02M16S -quotableClips: -- name: Episode opening & guest introduction - startOffset: 1 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1 - endOffset: 143 -- name: Perspective on evolving data engineering challenges - startOffset: 143 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=143 - endOffset: 190 -- name: 'Career journey: startups, freelancing, founding DLT' - startOffset: 190 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=190 - endOffset: 243 -- name: DLT as a Python-based ingestion standard and market impact - startOffset: 243 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=243 - endOffset: 465 -- name: DLT Plus vision and partnership outreach for freelancers - startOffset: 465 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=465 - endOffset: 663 -- name: 'Industry shift toward specialization: governance, data quality, streaming' - startOffset: 663 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=663 - endOffset: 757 -- name: 'Early-career opportunities: AI projects and startup hiring' - startOffset: 757 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=757 - endOffset: 872 -- name: Modern data stack critique and open-source "postmodern" alternatives - startOffset: 872 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=872 - endOffset: 1000 -- name: '2025 trends: AI integration in data engineering and Apache Iceberg adoption' - startOffset: 1000 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1000 - endOffset: 1097 -- name: 'Apache Iceberg explained: table format, Parquet storage, vendor lock-in reduction' - startOffset: 1097 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1097 - endOffset: 1287 -- name: 'Database layers and catalog role: storage, compute, access, metadata & lineage' - startOffset: 1287 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1287 - endOffset: 1421 -- name: Metadata and catalog tooling overview (AWS Glue and peers) - startOffset: 1421 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1421 - endOffset: 1558 -- name: 'DuckDB impact: embeddable local OLAP and portable query engine' - startOffset: 1558 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1558 - endOffset: 1660 -- name: 'Cost-efficient pipelines: DuckDB with GitHub Actions and headless table formats' - startOffset: 1660 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1660 - endOffset: 1831 -- name: Headless table formats and DLT support for Delta Lake and Iceberg - startOffset: 1831 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1831 - endOffset: 1889 -- name: dbt's influence on engineering workflows and alternatives like SQLMesh - startOffset: 1889 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1889 - endOffset: 2137 -- name: 'Workflow orchestration options in 2025: Airflow, Prefect, Dagster, GitHub - Actions' - startOffset: 2137 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2137 - endOffset: 2282 -- name: 'AI engineering convergence: data engineers building AI agents' - startOffset: 2282 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2282 - endOffset: 2466 -- name: 'Beginner roadmap: SQL, Python, capturing business requirements, building - a portfolio' - startOffset: 2466 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2466 - endOffset: 2682 -- name: Tool selection guidance and vendor caution for modern data stacks - startOffset: 2682 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2682 - endOffset: 2756 -- name: 'Transition paths: senior backend engineers moving into data engineering' - startOffset: 2756 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2756 - endOffset: 2884 -- name: 'Job market outlook: senior vs junior data engineering opportunities' - startOffset: 2884 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2884 - endOffset: 2982 -- name: 'Table format comparisons: Delta, Hudi, and Iceberg differences' - startOffset: 2982 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2982 - endOffset: 3079 -- name: 'Streaming architectures and tools: micro-batching, Kafka, SQS, Flink' - startOffset: 3079 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3079 - endOffset: 3375 -- name: AI-driven commoditization and code generation in data engineering - startOffset: 3375 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3375 - endOffset: 3582 -- name: 'DLT roadmap: DLT Plus and a marketplace for reusable data products' - startOffset: 3582 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3582 - endOffset: 3679 -- name: Episode wrap-up and key takeaways - startOffset: 3679 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3679 - endOffset: 3736 --- Links: diff --git a/_podcast/s20e04-mlops-in-corporations-and-startups.md b/_podcast/to-update/s20e04-mlops-in-corporations-and-startups.md similarity index 96% rename from _podcast/s20e04-mlops-in-corporations-and-startups.md rename to _podcast/to-update/s20e04-mlops-in-corporations-and-startups.md index e227ade0..3ab82409 100644 --- a/_podcast/s20e04-mlops-in-corporations-and-startups.md +++ b/_podcast/to-update/s20e04-mlops-in-corporations-and-startups.md @@ -1,20 +1,140 @@ --- +title: "The episode’s single unifying idea is pragmatic trade‑offs: how to move fast and deliver value in ML-driven products and careers while deliberately managing the risks that speed introduces—technical debt, vendor lock‑in, operational overhead, and team burnout. Every segment circles back to the same decision framework: choose lean, observable, portable primitives and SaaS or managed services pragmatically to ship quickly; invest in minimal, automatable MLOps and instrumentation so you can iterate safely; and prioritize foundational skills, mentorship, and ownership to sustain learning and long‑term flexibility. In short, be intentional about early architectural, tooling, and career choices—opt for simplicity and visibility to accelerate outcomes today while preserving the ability to evolve, scale, and de‑risk tomorrow." +short: MLOps in Corporations and Startups +season: 20 episode: 4 guests: - nemanjaradojkovic +image: images/podcast/s20e04-mlops-in-corporations-and-startups.jpg ids: anchor: atalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 youtube: DX9c__a4jzg -image: images/podcast/s20e04-mlops-in-corporations-and-startups.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 apple: https://podcasts.apple.com/us/podcast/mlops-in-corporations-and-startups-nemanja-radojkovic/id1541710331?i=1000699195928 spotify: https://open.spotify.com/episode/6V8gkTSz7LuPjQYC4rO019 youtube: https://www.youtube.com/watch?v=DX9c__a4jzg -season: 20 -short: MLOps in Corporations and Startups -title: 'Lean MLOps for Startups: SaaS-First MVP Stack, Avoid Vendor Lock-In & Manage - Tech Debt' + +description: 'Learn Lean MLOps strategies for startups: build a SaaS-first MVP stack, avoid vendor lock-in, and manage technical debt for faster, portable ML launches.' +intro: How can an early-stage startup ship ML features fast without getting locked into cloud vendors or drowning in technical debt? In this episode, Nemanja Radojkovic—an electrical engineer turned data scientist and MLOps engineer, DataCamp instructor, and long-time practitioner—walks through pragmatic, lean MLOps strategies for startups.

We cover shoestring tactics for rapid prototyping, a SaaS‑first MVP stack and its trade‑offs, cloud credits versus migration friction, and how to avoid vendor lock‑in with managed services like Vertex AI or SageMaker. Nemanja unpacks priorities for an MVP stack, low‑code speed versus future flexibility, minimal stacks (Python, CI/CD orchestration, Dagster), and observability options (Logfire, Prometheus/Grafana, Streamlit). The conversation also addresses technical debt management, data engineering reliability, on‑premise vs cloud decisions, and distributed compute choices (Dask, Spark).

Listen to learn concrete frameworks for choosing tools, balancing portability and managed services, and practical steps to manage tech debt while moving quickly. This episode is for startup engineers and founders who need actionable guidance on lean MLOps, SaaS‑first approaches, vendor lock‑in avoidance, and building a resilient MVP stack +dateadded: 2025-03-15 + +duration: PT01H01M06S + +quotableClips: +- name: Episode Introduction & Topic Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=0 + endOffset: 135 +- name: 'Career Journey: Academia → Consulting → Finance Machine Learning Engineering' + startOffset: 135 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=135 + endOffset: 363 +- name: 'Startup Pace: Agility, Speed, and Managerial Insights' + startOffset: 363 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=363 + endOffset: 474 +- name: 'Lean MLOps: Shoestring Strategies for Early-Stage Companies' + startOffset: 474 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=474 + endOffset: 714 +- name: 'SaaS-First Approach: Vendor Solutions for Small Teams' + startOffset: 714 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=714 + endOffset: 774 +- name: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock‑in' + startOffset: 774 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=774 + endOffset: 906 +- name: 'Cloud Complexity: Infrastructure as Code and Operational Overhead' + startOffset: 906 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=906 + endOffset: 1058 +- name: 'MVP Stack: Prioritizing Tools for Rapid Prototyping and Launch' + startOffset: 1058 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1058 + endOffset: 1159 +- name: 'Portability vs Managed Services: Avoiding Vendor Lock‑In (Vertex AI, SageMaker)' + startOffset: 1159 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1159 + endOffset: 1295 +- name: 'Low‑Code Trade-offs: Speed vs Future Flexibility' + startOffset: 1295 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1295 + endOffset: 1342 +- name: 'Career Decision Framework: Choosing Startups or Corporations' + startOffset: 1342 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1342 + endOffset: 1650 +- name: 'End‑to‑End Ownership: Multidisciplinary Work in Startups' + startOffset: 1650 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1650 + endOffset: 1777 +- name: 'Corporate Processes: "Agile" vs Bureaucratic Planning Cycles' + startOffset: 1777 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1777 + endOffset: 1997 +- name: 'Platform & Frameworks: Automating Developer Workflows' + startOffset: 1997 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1997 + endOffset: 2072 +- name: 'Team Scale Advantages: Redundancy, Support, and Internal Mobility' + startOffset: 2072 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2072 + endOffset: 2148 +- name: 'Startup Intensity: Learning Curve, Burnout Risk, and Rewards' + startOffset: 2148 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2148 + endOffset: 2274 +- name: 'AI‑Assisted Coding: Productivity Gains and Technical Debt Risks' + startOffset: 2274 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2274 + endOffset: 2401 +- name: 'Technical Debt Management: Notes, Awareness, and Security Implications' + startOffset: 2401 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2401 + endOffset: 2592 +- name: 'Early‑Career Advice: Mentorship, Pairing, and Role Selection' + startOffset: 2592 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2592 + endOffset: 2650 +- name: 'Minimal MLOps Stack: Python, CI/CD Orchestration, and Dagster' + startOffset: 2650 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2650 + endOffset: 2755 +- name: 'Observability Choices: Logfire, Prometheus/Grafana, and Streamlit' + startOffset: 2755 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2755 + endOffset: 2891 +- name: 'Product Modularity: Desire for Standalone Model Registries & Observability' + startOffset: 2891 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2891 + endOffset: 2940 +- name: 'Skill Investment: Foundational Tools (Linux, Python, Bash) vs New Tech' + startOffset: 2940 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2940 + endOffset: 3087 +- name: 'Market Signals for Learning: Job Postings, Airflow, and Targeted Skills' + startOffset: 3087 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3087 + endOffset: 3343 +- name: 'Data Engineering Reliability: Quality, Lineage, and LLM Unpredictability' + startOffset: 3343 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3343 + endOffset: 3429 +- name: 'On‑Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' + startOffset: 3429 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3429 + endOffset: 3609 +- name: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade‑offs' + startOffset: 3609 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3609 + endOffset: 3701 +- name: Closing Remarks and Next Steps + startOffset: 3701 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3701 + endOffset: 3666 + transcript: - header: Episode Introduction & Topic Overview - line: This week, we’ll talk about MLOps in corporations versus startups. Our special @@ -1229,139 +1349,6 @@ transcript: sec: 3726 time: '1:02:06' who: Nemanja -description: 'Learn Lean MLOps strategies for startups: build a SaaS-first MVP stack, - avoid vendor lock-in, and manage technical debt for faster, portable ML launches.' -intro: How can an early-stage startup ship ML features fast without getting locked - into cloud vendors or drowning in technical debt? In this episode, Nemanja Radojkovic—an - electrical engineer turned data scientist and MLOps engineer, DataCamp instructor, - and long-time practitioner—walks through pragmatic, lean MLOps strategies for startups. -

We cover shoestring tactics for rapid prototyping, a SaaS‑first MVP stack - and its trade‑offs, cloud credits versus migration friction, and how to avoid vendor - lock‑in with managed services like Vertex AI or SageMaker. Nemanja unpacks priorities - for an MVP stack, low‑code speed versus future flexibility, minimal stacks (Python, - CI/CD orchestration, Dagster), and observability options (Logfire, Prometheus/Grafana, - Streamlit). The conversation also addresses technical debt management, data engineering - reliability, on‑premise vs cloud decisions, and distributed compute choices (Dask, - Spark).

Listen to learn concrete frameworks for choosing tools, balancing - portability and managed services, and practical steps to manage tech debt while - moving quickly. This episode is for startup engineers and founders who need actionable - guidance on lean MLOps, SaaS‑first approaches, vendor lock‑in avoidance, and building - a resilient MVP stack. -dateadded: '2025-03-15' -duration: PT01H01M06S -quotableClips: -- name: Episode Introduction & Topic Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=0 - endOffset: 135 -- name: 'Career Journey: Academia → Consulting → Finance Machine Learning Engineering' - startOffset: 135 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=135 - endOffset: 363 -- name: 'Startup Pace: Agility, Speed, and Managerial Insights' - startOffset: 363 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=363 - endOffset: 474 -- name: 'Lean MLOps: Shoestring Strategies for Early-Stage Companies' - startOffset: 474 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=474 - endOffset: 714 -- name: 'SaaS-First Approach: Vendor Solutions for Small Teams' - startOffset: 714 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=714 - endOffset: 774 -- name: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock‑in' - startOffset: 774 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=774 - endOffset: 906 -- name: 'Cloud Complexity: Infrastructure as Code and Operational Overhead' - startOffset: 906 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=906 - endOffset: 1058 -- name: 'MVP Stack: Prioritizing Tools for Rapid Prototyping and Launch' - startOffset: 1058 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1058 - endOffset: 1159 -- name: 'Portability vs Managed Services: Avoiding Vendor Lock‑In (Vertex AI, SageMaker)' - startOffset: 1159 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1159 - endOffset: 1295 -- name: 'Low‑Code Trade-offs: Speed vs Future Flexibility' - startOffset: 1295 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1295 - endOffset: 1342 -- name: 'Career Decision Framework: Choosing Startups or Corporations' - startOffset: 1342 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1342 - endOffset: 1650 -- name: 'End‑to‑End Ownership: Multidisciplinary Work in Startups' - startOffset: 1650 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1650 - endOffset: 1777 -- name: 'Corporate Processes: "Agile" vs Bureaucratic Planning Cycles' - startOffset: 1777 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1777 - endOffset: 1997 -- name: 'Platform & Frameworks: Automating Developer Workflows' - startOffset: 1997 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1997 - endOffset: 2072 -- name: 'Team Scale Advantages: Redundancy, Support, and Internal Mobility' - startOffset: 2072 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2072 - endOffset: 2148 -- name: 'Startup Intensity: Learning Curve, Burnout Risk, and Rewards' - startOffset: 2148 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2148 - endOffset: 2274 -- name: 'AI‑Assisted Coding: Productivity Gains and Technical Debt Risks' - startOffset: 2274 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2274 - endOffset: 2401 -- name: 'Technical Debt Management: Notes, Awareness, and Security Implications' - startOffset: 2401 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2401 - endOffset: 2592 -- name: 'Early‑Career Advice: Mentorship, Pairing, and Role Selection' - startOffset: 2592 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2592 - endOffset: 2650 -- name: 'Minimal MLOps Stack: Python, CI/CD Orchestration, and Dagster' - startOffset: 2650 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2650 - endOffset: 2755 -- name: 'Observability Choices: Logfire, Prometheus/Grafana, and Streamlit' - startOffset: 2755 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2755 - endOffset: 2891 -- name: 'Product Modularity: Desire for Standalone Model Registries & Observability' - startOffset: 2891 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2891 - endOffset: 2940 -- name: 'Skill Investment: Foundational Tools (Linux, Python, Bash) vs New Tech' - startOffset: 2940 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2940 - endOffset: 3087 -- name: 'Market Signals for Learning: Job Postings, Airflow, and Targeted Skills' - startOffset: 3087 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3087 - endOffset: 3343 -- name: 'Data Engineering Reliability: Quality, Lineage, and LLM Unpredictability' - startOffset: 3343 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3343 - endOffset: 3429 -- name: 'On‑Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' - startOffset: 3429 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3429 - endOffset: 3609 -- name: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade‑offs' - startOffset: 3609 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3609 - endOffset: 3701 -- name: Closing Remarks and Next Steps - startOffset: 3701 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3701 - endOffset: 3666 --- Links: diff --git a/_podcast/s20e05-data-intensive-ai.md b/_podcast/to-update/s20e05-data-intensive-ai.md similarity index 95% rename from _podcast/s20e05-data-intensive-ai.md rename to _podcast/to-update/s20e05-data-intensive-ai.md index d5b494fa..e4b50df0 100644 --- a/_podcast/s20e05-data-intensive-ai.md +++ b/_podcast/to-update/s20e05-data-intensive-ai.md @@ -1,20 +1,126 @@ --- +title: "Context: a practitioner’s tour through the end-to-end work of turning data and models into reliable, efficient products—from Java and data engineering foundations to AI fine-tuning, prompt craft, tooling choices, and developer workflows. + +Core: the episode’s through-line is a data‑centric engineering mindset for trustworthy, production-ready AI: rigorous testing and pipeline design to ensure data trust, deliberate choices about models and tools for cost and performance, prompt and token-efficiency techniques to make inference practical, and pragmatic engineering patterns (architecture, caching, assistants) that let teams ship AI features and sustain them—while using content and teaching as a way to refine thinking and capture business value." +short: Data Intensive AI +season: 20 episode: 5 guests: - bartoszmikulski +image: images/podcast/s20e05-data-intensive-ai.jpg ids: anchor: atalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi youtube: BP6w_vKySN0 -image: images/podcast/s20e05-data-intensive-ai.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi apple: https://podcasts.apple.com/us/podcast/data-intensive-ai-bartosz-mikulski/id1541710331?i=1000700288876 spotify: https://open.spotify.com/episode/0nFSU92IQDbM4C9FLvdn4z youtube: https://www.youtube.com/watch?v=BP6w_vKySN0 -season: 20 -short: Data Intensive AI -title: Build Trustworthy AI with Data Pipeline Testing & Prompt Engineering (Caching, - Compression & Tools) + +description: Master data pipeline testing and prompt engineering—learn snapshot tests, prompt compression & caching to ensure data trust and cut model costs +intro: How do you turn prototype AI into reliable production systems that stakeholders can trust? In this episode, Bartosz Mikulski — an AI and data engineer who helps move projects from demo to production, builds testing infrastructure, and teaches practitioners — walks through practical approaches to building trustworthy AI through data pipeline testing and prompt engineering.

We dig into testing strategies for data pipelines (snapshot and integration testing), tools like Great Expectations, Soda, SQL vs Spark tests, and guidance on when to use Apache Spark. Bartosz explains the data engineering role in preprocessing and fine-tuning, plus “invisible” AI use cases like augmented generation and review analysis. On the prompt side, he covers in-context learning, prompt evaluation and formatting tradeoffs, token optimization with prompt compression, and prompt caching and model efficiency (attention caching, Claude). He also discusses open-source tools (DeepSeek, Perplexity), AI-driven product patterns (lead scoring, Chrome extension architectures), and coding assistants like Cursor versus GitHub Copilot.

Listen for concrete testing practices, prompt optimization techniques (caching and compression), and tool recommendations you can apply to increase model reliability and reduce production risk +dateadded: 2025-03-26 + +duration: PT01H01M37S + +quotableClips: +- name: Episode Opening & Guest Overview (Data Intensive AI) + startOffset: 0 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=0 + endOffset: 122 +- name: Book Contribution Clarified & Testing Focus + startOffset: 122 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=122 + endOffset: 240 +- name: 'Career Path: Java → Data Engineering → AI Engineering' + startOffset: 240 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=240 + endOffset: 364 +- name: 'Publishing Routine: Blogging Frequency & Content Practice' + startOffset: 364 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=364 + endOffset: 545 +- name: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct"' + startOffset: 545 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=545 + endOffset: 707 +- name: 'Test Strategy for Data Pipelines: Snapshot & Integration Testing' + startOffset: 707 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=707 + endOffset: 794 +- name: 'Testing Tools: Great Expectations, Soda, SQL Tests vs Spark Tests' + startOffset: 794 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=794 + endOffset: 1030 +- name: 'Technology Choice: When to Use Apache Spark' + startOffset: 1030 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1030 + endOffset: 1118 +- name: 'Data Engineering’s Role in AI: Preprocessing & Fine-Tuning Data' + startOffset: 1118 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1118 + endOffset: 1306 +- name: 'Invisible AI Use Cases: Augmented Generation & Review Analysis' + startOffset: 1306 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1306 + endOffset: 1513 +- name: 'Prompt Engineering Basics: In-Context Learning & Examples' + startOffset: 1513 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1513 + endOffset: 1696 +- name: 'Prompt Evaluation: Formatting, Examples, and Cost Tradeoffs' + startOffset: 1696 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1696 + endOffset: 1800 +- name: 'Prompt Compression: Token Optimization Techniques' + startOffset: 1800 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1800 + endOffset: 1905 +- name: Prompt Caching & Model Efficiency (attention caching, Claude) + startOffset: 1905 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1905 + endOffset: 2022 +- name: Open-Source Models & Tools Experience (DeepSeek, Perplexity) + startOffset: 2022 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2022 + endOffset: 2154 +- name: 'AI for Lead Scoring: LinkedIn Automation & Qualification' + startOffset: 2154 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2154 + endOffset: 2464 +- name: 'Chrome Extension Architecture: Backend AI Integration Pattern' + startOffset: 2464 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2464 + endOffset: 2525 +- name: 'Coding Assistants: Cursor Workflow & Productivity Boosts' + startOffset: 2525 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2525 + endOffset: 2678 +- name: 'Code AI Comparison: Cursor vs GitHub Copilot & Alternatives' + startOffset: 2678 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2678 + endOffset: 2839 +- name: 'Search-Focused Assistants: Using Perplexity & Tool Selection' + startOffset: 2839 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2839 + endOffset: 3129 +- name: 'Website Hosting: Static Site Generators & GitHub Pages' + startOffset: 3129 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3129 + endOffset: 3190 +- name: 'Blogging as Business: Attracting Clients & Teaching Workshops' + startOffset: 3190 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3190 + endOffset: 3377 +- name: 'AI-Assisted Writing: Drafting, Rewriting, and Maintaining Voice' + startOffset: 3377 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3377 + endOffset: 3621 +- name: Episode Wrap-Up & Guest Resources (blog link invitation) + startOffset: 3621 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3621 + endOffset: 3697 + transcript: - header: Episode Opening & Guest Overview (Data Intensive AI) - line: This week, we’ll talk about Data Intensive AI. Our special guest today is @@ -1028,123 +1134,6 @@ transcript: sec: 3697 time: '1:01:37' who: Alexey -description: Master data pipeline testing and prompt engineering—learn snapshot tests, - prompt compression & caching to ensure data trust and cut model costs. -intro: How do you turn prototype AI into reliable production systems that stakeholders - can trust? In this episode, Bartosz Mikulski — an AI and data engineer who helps - move projects from demo to production, builds testing infrastructure, and teaches - practitioners — walks through practical approaches to building trustworthy AI through - data pipeline testing and prompt engineering.

We dig into testing strategies - for data pipelines (snapshot and integration testing), tools like Great Expectations, - Soda, SQL vs Spark tests, and guidance on when to use Apache Spark. Bartosz explains - the data engineering role in preprocessing and fine-tuning, plus “invisible” AI - use cases like augmented generation and review analysis. On the prompt side, he - covers in-context learning, prompt evaluation and formatting tradeoffs, token optimization - with prompt compression, and prompt caching and model efficiency (attention caching, - Claude). He also discusses open-source tools (DeepSeek, Perplexity), AI-driven product - patterns (lead scoring, Chrome extension architectures), and coding assistants like - Cursor versus GitHub Copilot.

Listen for concrete testing practices, prompt - optimization techniques (caching and compression), and tool recommendations you - can apply to increase model reliability and reduce production risk. -dateadded: '2025-03-26' -duration: PT01H01M37S -quotableClips: -- name: Episode Opening & Guest Overview (Data Intensive AI) - startOffset: 0 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=0 - endOffset: 122 -- name: Book Contribution Clarified & Testing Focus - startOffset: 122 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=122 - endOffset: 240 -- name: 'Career Path: Java → Data Engineering → AI Engineering' - startOffset: 240 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=240 - endOffset: 364 -- name: 'Publishing Routine: Blogging Frequency & Content Practice' - startOffset: 364 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=364 - endOffset: 545 -- name: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct"' - startOffset: 545 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=545 - endOffset: 707 -- name: 'Test Strategy for Data Pipelines: Snapshot & Integration Testing' - startOffset: 707 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=707 - endOffset: 794 -- name: 'Testing Tools: Great Expectations, Soda, SQL Tests vs Spark Tests' - startOffset: 794 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=794 - endOffset: 1030 -- name: 'Technology Choice: When to Use Apache Spark' - startOffset: 1030 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1030 - endOffset: 1118 -- name: 'Data Engineering’s Role in AI: Preprocessing & Fine-Tuning Data' - startOffset: 1118 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1118 - endOffset: 1306 -- name: 'Invisible AI Use Cases: Augmented Generation & Review Analysis' - startOffset: 1306 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1306 - endOffset: 1513 -- name: 'Prompt Engineering Basics: In-Context Learning & Examples' - startOffset: 1513 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1513 - endOffset: 1696 -- name: 'Prompt Evaluation: Formatting, Examples, and Cost Tradeoffs' - startOffset: 1696 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1696 - endOffset: 1800 -- name: 'Prompt Compression: Token Optimization Techniques' - startOffset: 1800 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1800 - endOffset: 1905 -- name: Prompt Caching & Model Efficiency (attention caching, Claude) - startOffset: 1905 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1905 - endOffset: 2022 -- name: Open-Source Models & Tools Experience (DeepSeek, Perplexity) - startOffset: 2022 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2022 - endOffset: 2154 -- name: 'AI for Lead Scoring: LinkedIn Automation & Qualification' - startOffset: 2154 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2154 - endOffset: 2464 -- name: 'Chrome Extension Architecture: Backend AI Integration Pattern' - startOffset: 2464 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2464 - endOffset: 2525 -- name: 'Coding Assistants: Cursor Workflow & Productivity Boosts' - startOffset: 2525 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2525 - endOffset: 2678 -- name: 'Code AI Comparison: Cursor vs GitHub Copilot & Alternatives' - startOffset: 2678 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2678 - endOffset: 2839 -- name: 'Search-Focused Assistants: Using Perplexity & Tool Selection' - startOffset: 2839 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2839 - endOffset: 3129 -- name: 'Website Hosting: Static Site Generators & GitHub Pages' - startOffset: 3129 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3129 - endOffset: 3190 -- name: 'Blogging as Business: Attracting Clients & Teaching Workshops' - startOffset: 3190 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3190 - endOffset: 3377 -- name: 'AI-Assisted Writing: Drafting, Rewriting, and Maintaining Voice' - startOffset: 3377 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3377 - endOffset: 3621 -- name: Episode Wrap-Up & Guest Resources (blog link invitation) - startOffset: 3621 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3621 - endOffset: 3697 --- Links: diff --git a/_podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md b/_podcast/to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md similarity index 96% rename from _podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md rename to _podcast/to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md index 8125f1d0..7c53361c 100644 --- a/_podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md +++ b/_podcast/to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md @@ -1,20 +1,125 @@ --- +title: "Context: Eddy’s journey from industrial engineering and analyst tools to staff data engineer frames conversations about modern data stacks, digital warehouses, and FinOps as practical responses to real business problems. +Core: The episode’s unifying idea is that building impactful data systems requires translating domain and analyst expertise into operational, scalable, and cost-conscious engineering—combining the right tools (ELT, dbt, cloud platforms), disciplined practices (testing, monitoring, CI/CD), and FinOps accountability—to deliver trusted metrics, align technical work with business value, and enable continuous learning and adaptation." +short: From Supply Chain Management to Digital Warehousing and FinOps +season: 20 episode: 6 guests: - eddyzulkifly +image: images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg ids: anchor: datatalksclub/episodes/From-Supply-Chain-Management-to-Digital-Warehousing-and-FinOps---Eddy-Zulkifly-e313t7b youtube: 7ePp6wuxM5s -image: images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Supply-Chain-Management-to-Digital-Warehousing-and-FinOps---Eddy-Zulkifly-e313t7b apple: https://podcasts.apple.com/us/podcast/from-supply-chain-management-to-digital-warehousing/id1541710331?i=1000702233986 spotify: https://open.spotify.com/episode/33YZpX7zE6YcBGbQK9Iclp youtube: https://www.youtube.com/watch?v=7ePp6wuxM5s -season: 20 -short: From Supply Chain Management to Digital Warehousing and FinOps -title: 'FinOps for Data Engineers: Optimize Cloud Costs with dbt, BigQuery & Modern - Data Stack' + +description: Master FinOps for data engineers—optimize BigQuery costs with dbt, cloud cost modeling, tagging and forecasting to cut spend and boost pipeline efficiency +intro: How can data teams optimize cloud costs for analytics without slowing down delivery? In this episode, Eddy Zulkifly, Staff Data Engineer at Kinaxis, walks through practical FinOps strategies for data engineers working with the modern data stack. Eddy brings a decade of experience across Google Cloud, Azure, and AWS, plus prior roles at Home Depot and ongoing graduate studies at Georgia Tech, and explains how his background in supply chain and analytics shapes cost-aware engineering.

We cover building a digital data warehouse using ELT, dbt, BigQuery and orchestration; operational differences like change velocity, monitoring, and tests; and translating business needs into metric trees and data specs for FinOps. Eddy breaks down cloud cost modeling—VM sizing, storage tiers, reservation instances, and multi-cloud comparisons—alongside cost-tagging, OUCS and standardized reporting across AWS/GCP/Azure. He also shares vendor negotiation tactics, demand-forecasting analogies for capacity planning, and the strategic responsibilities of senior data engineers.

Listen to learn actionable approaches to cloud cost optimization, practical dbt and BigQuery patterns, and how to embed FinOps practices into your data platform and team workflows +dateadded: 2025-04-30 + +duration: PT00H59M54S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=0 + endOffset: 95 +- name: 'Guest Introduction: Eddy Zulkifly, Staff Data Engineer at Kinaxis' + startOffset: 95 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=95 + endOffset: 134 +- name: 'Career Origins: Industrial Engineering, Supply Chain & Excel Macros' + startOffset: 134 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=134 + endOffset: 380 +- name: 'Career Pivot: From Business Analyst to Data Engineering' + startOffset: 380 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=380 + endOffset: 468 +- name: Analyst Skills as a Foundation for Data Engineering + startOffset: 468 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=468 + endOffset: 486 +- name: 'Docker & Terraform: Learning Curve for Data Practitioners' + startOffset: 486 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=486 + endOffset: 498 +- name: 'Tools & Low-Code Beginnings: Excel, Alteryx, Tableau' + startOffset: 498 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=498 + endOffset: 673 +- name: 'Retail & Warehousing Experience: Forecasting, Preload Optimization' + startOffset: 673 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=673 + endOffset: 1317 +- name: 'Digital Data Warehousing: Data as Inventory and Pipelines' + startOffset: 1317 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1317 + endOffset: 1356 +- name: 'Modern Data Stack: ELT, dbt, BigQuery and Orchestration' + startOffset: 1356 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1356 + endOffset: 1474 +- name: 'Operational Differences: Change Velocity, Monitoring, and Tests' + startOffset: 1474 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1474 + endOffset: 1670 +- name: 'Metric Trees & Data Specs: Translating Business Requirements for FinOps' + startOffset: 1670 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1670 + endOffset: 1796 +- name: 'Building a Digital Warehouse: Stack Choices and Open-Source Tools' + startOffset: 1796 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1796 + endOffset: 1900 +- name: 'FinOps Overview: Cloud Cost Optimization for SaaS Platforms' + startOffset: 1900 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1900 + endOffset: 2055 +- name: Vendor Negotiations & Reservation Instances for Cost Savings + startOffset: 2055 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2055 + endOffset: 2171 +- name: 'Cloud Cost Modeling: VM Sizing, Storage Tiers and Multi-Cloud Comparison' + startOffset: 2171 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2171 + endOffset: 2343 +- name: 'Demand Forecasting Analogy: Inventory Planning Applied to Cloud Capacity' + startOffset: 2343 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2343 + endOffset: 2418 +- name: FinOps Foundation, Cost Tagging & Accountability Best Practices + startOffset: 2418 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2418 + endOffset: 2681 +- name: OUCS & Standardizing Cloud Cost Reporting Across AWS/GCP/Azure + startOffset: 2681 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2681 + endOffset: 2777 +- name: 'FinOps Processes: Parallels with DevOps, DataOps and CI/CD' + startOffset: 2777 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2777 + endOffset: 2881 +- name: 'Staff Data Engineer Responsibilities: Technical & Strategic FinOps Work' + startOffset: 2881 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2881 + endOffset: 3025 +- name: 'Continuous Learning: Georgia Tech Master’s, dbt, Python and Applied Analytics' + startOffset: 3025 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3025 + endOffset: 3365 +- name: 'Career Advice: Certifications, Mentorship, Community and Time Management' + startOffset: 3365 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3365 + endOffset: 3572 +- name: Closing Remarks & Key Takeaways + startOffset: 3572 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3572 + endOffset: 3594 + transcript: - header: Podcast Introduction - line: Let’s get started. This week, we’ll discuss Digital Data Warehousing and FinOps. @@ -971,122 +1076,6 @@ transcript: sec: 3594 time: '59:54' who: Eddy -description: Master FinOps for data engineers—optimize BigQuery costs with dbt, cloud - cost modeling, tagging and forecasting to cut spend and boost pipeline efficiency. -intro: How can data teams optimize cloud costs for analytics without slowing down - delivery? In this episode, Eddy Zulkifly, Staff Data Engineer at Kinaxis, walks through - practical FinOps strategies for data engineers working with the modern data stack. - Eddy brings a decade of experience across Google Cloud, Azure, and AWS, plus prior - roles at Home Depot and ongoing graduate studies at Georgia Tech, and explains how - his background in supply chain and analytics shapes cost-aware engineering.

- We cover building a digital data warehouse using ELT, dbt, BigQuery and orchestration; - operational differences like change velocity, monitoring, and tests; and translating - business needs into metric trees and data specs for FinOps. Eddy breaks down cloud - cost modeling—VM sizing, storage tiers, reservation instances, and multi-cloud comparisons—alongside - cost-tagging, OUCS and standardized reporting across AWS/GCP/Azure. He also shares - vendor negotiation tactics, demand-forecasting analogies for capacity planning, - and the strategic responsibilities of senior data engineers.

Listen to - learn actionable approaches to cloud cost optimization, practical dbt and BigQuery - patterns, and how to embed FinOps practices into your data platform and team workflows. -dateadded: '2025-04-30' -duration: PT00H59M54S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=0 - endOffset: 95 -- name: 'Guest Introduction: Eddy Zulkifly, Staff Data Engineer at Kinaxis' - startOffset: 95 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=95 - endOffset: 134 -- name: 'Career Origins: Industrial Engineering, Supply Chain & Excel Macros' - startOffset: 134 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=134 - endOffset: 380 -- name: 'Career Pivot: From Business Analyst to Data Engineering' - startOffset: 380 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=380 - endOffset: 468 -- name: Analyst Skills as a Foundation for Data Engineering - startOffset: 468 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=468 - endOffset: 486 -- name: 'Docker & Terraform: Learning Curve for Data Practitioners' - startOffset: 486 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=486 - endOffset: 498 -- name: 'Tools & Low-Code Beginnings: Excel, Alteryx, Tableau' - startOffset: 498 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=498 - endOffset: 673 -- name: 'Retail & Warehousing Experience: Forecasting, Preload Optimization' - startOffset: 673 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=673 - endOffset: 1317 -- name: 'Digital Data Warehousing: Data as Inventory and Pipelines' - startOffset: 1317 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1317 - endOffset: 1356 -- name: 'Modern Data Stack: ELT, dbt, BigQuery and Orchestration' - startOffset: 1356 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1356 - endOffset: 1474 -- name: 'Operational Differences: Change Velocity, Monitoring, and Tests' - startOffset: 1474 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1474 - endOffset: 1670 -- name: 'Metric Trees & Data Specs: Translating Business Requirements for FinOps' - startOffset: 1670 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1670 - endOffset: 1796 -- name: 'Building a Digital Warehouse: Stack Choices and Open-Source Tools' - startOffset: 1796 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1796 - endOffset: 1900 -- name: 'FinOps Overview: Cloud Cost Optimization for SaaS Platforms' - startOffset: 1900 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1900 - endOffset: 2055 -- name: Vendor Negotiations & Reservation Instances for Cost Savings - startOffset: 2055 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2055 - endOffset: 2171 -- name: 'Cloud Cost Modeling: VM Sizing, Storage Tiers and Multi-Cloud Comparison' - startOffset: 2171 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2171 - endOffset: 2343 -- name: 'Demand Forecasting Analogy: Inventory Planning Applied to Cloud Capacity' - startOffset: 2343 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2343 - endOffset: 2418 -- name: FinOps Foundation, Cost Tagging & Accountability Best Practices - startOffset: 2418 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2418 - endOffset: 2681 -- name: OUCS & Standardizing Cloud Cost Reporting Across AWS/GCP/Azure - startOffset: 2681 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2681 - endOffset: 2777 -- name: 'FinOps Processes: Parallels with DevOps, DataOps and CI/CD' - startOffset: 2777 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2777 - endOffset: 2881 -- name: 'Staff Data Engineer Responsibilities: Technical & Strategic FinOps Work' - startOffset: 2881 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2881 - endOffset: 3025 -- name: 'Continuous Learning: Georgia Tech Master’s, dbt, Python and Applied Analytics' - startOffset: 3025 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3025 - endOffset: 3365 -- name: 'Career Advice: Certifications, Mentorship, Community and Time Management' - startOffset: 3365 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3365 - endOffset: 3572 -- name: Closing Remarks & Key Takeaways - startOffset: 3572 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3572 - endOffset: 3594 --- Links: diff --git a/_podcast/s20e07-build-strong-career-in-data.md b/_podcast/to-update/s20e07-build-strong-career-in-data.md similarity index 96% rename from _podcast/s20e07-build-strong-career-in-data.md rename to _podcast/to-update/s20e07-build-strong-career-in-data.md index c12e1cca..91a7fcff 100644 --- a/_podcast/s20e07-build-strong-career-in-data.md +++ b/_podcast/to-update/s20e07-build-strong-career-in-data.md @@ -1,20 +1,120 @@ --- +title: "A practical, curiosity-driven bridge between research and engineering: relentlessly iterate with hands‑on prototyping, rigorous evaluation, and open dissemination to solve real-world ML problems (ex: long‑context LLMs), while leveraging community, mentorship, and strategic projects to accelerate career growth and drive measurable impact." +short: Build a Strong Career in Data +season: 20 episode: 7 guests: - lavanyagupta +image: images/podcast/s20e07-build-strong-career-in-data.jpg ids: anchor: atalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p youtube: ekG5zJioyFs -image: images/podcast/s20e07-build-strong-career-in-data.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p apple: https://podcasts.apple.com/us/podcast/build-a-strong-career-in-data-lavanya-gupta/id1541710331?i=1000706988972 spotify: https://open.spotify.com/episode/2mJXd0lSZFPKJA0ZrG9iS2 youtube: https://www.youtube.com/watch?v=ekG5zJioyFs -season: 20 -short: Build a Strong Career in Data -title: 'Benchmarking Long-Context LLMs for Finance: Chunking, Retrieval, Summarization - & Career Tips' + +description: Discover long-context LLMs, chunking and retrieval for finance benchmarking—learn 32k–64k context limits, summarization tips, prototyping & career advice +intro: How do you evaluate and deploy long-context LLMs for real-world financial documents—when context windows stretch into tens of thousands of tokens? In this episode, Lavanya Gupta, a CMU LTI alum and Sr. AI/ML Applied Scientist at JPMorgan Chase’s MLCOE, walks through practical benchmarking and production strategies for long-context LLMs in finance. Drawing on her published work "Long Context LLMs on Financial Concepts" (EMNLP) and 5+ years of industrial research, Lavanya explains empirical findings around context-window performance (a notable droparound 32k–64k), and outlines the pragmatic trio of chunking, retrieval, and summarization for processing large documents. She also discusses industry research practices—publishing from corporate teams, dissemination via arXiv and endorsements—and rapid prototyping techniques like Streamlit for demos and feedback. Listeners will get concrete guidance on LLM benchmarking, context window trade-offs, dataset and licensing lessons from a Kaggle success, and actionable career advice on transitioning into ML roles, networking, portfolios, and interview prep. Tune in to learn how to benchmark long-context LLMs for financial NLP and translate research into production-ready workflows +dateadded: 2025-05-12 + +duration: PT00H58M10S + +quotableClips: +- name: Episode Introduction & Topic Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=0 + endOffset: 122 +- name: 'Career Overview: From Software Engineering to ML & Master''s' + startOffset: 122 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=122 + endOffset: 205 +- name: 'Origin of ML Interest: Hackathons and Computer Vision' + startOffset: 205 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=205 + endOffset: 295 +- name: 'Early Project Case Study: OCR for Organization Charts' + startOffset: 295 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=295 + endOffset: 523 +- name: 'Role Snapshot: LLM Benchmarking at a Financial Institution' + startOffset: 523 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=523 + endOffset: 615 +- name: 'Research Focus: Evaluating Long-Context LLMs' + startOffset: 615 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=615 + endOffset: 756 +- name: 'Empirical Findings: Context Window Performance Droparound 32k–64k' + startOffset: 756 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=756 + endOffset: 894 +- name: 'Practical Approach: Chunking, Retrieval, and Summarization for Large Docs' + startOffset: 894 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=894 + endOffset: 928 +- name: 'Published Work: "Long Context LLMs on Financial Concepts" (EMNLP)' + startOffset: 928 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=928 + endOffset: 1048 +- name: 'Industry Research Practices: Publishing from Corporate Teams' + startOffset: 1048 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1048 + endOffset: 1185 +- name: 'Motivation for Publications: Manager Support and Community Sharing' + startOffset: 1185 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1185 + endOffset: 1330 +- name: 'Dissemination Paths: arXiv, Endorsement, and Early Publications' + startOffset: 1330 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1330 + endOffset: 1501 +- name: 'Self-Learning & MLOps: Zoom Camps, Tutorials, and Mentoring' + startOffset: 1501 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1501 + endOffset: 1814 +- name: 'Rapid Prototyping Tools: Streamlit for Demos and Feedback' + startOffset: 1814 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1814 + endOffset: 2004 +- name: 'Kaggle Success Story: Building and Licensing a High-Impact Dataset' + startOffset: 2004 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2004 + endOffset: 2252 +- name: 'Community Contribution: Women in Data Science and Open Mentoring' + startOffset: 2252 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2252 + endOffset: 2473 +- name: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows"' + startOffset: 2473 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2473 + endOffset: 2724 +- name: 'Career Pivot Guidance: Non-CS Backgrounds Entering Data Roles' + startOffset: 2724 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2724 + endOffset: 2908 +- name: 'Networking & Mentorship: Cold Outreach and Building Rapport' + startOffset: 2908 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2908 + endOffset: 3088 +- name: 'Portfolio Strategy: Community Visibility vs. Job-Targeted Projects' + startOffset: 3088 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3088 + endOffset: 3273 +- name: 'Interview Preparation: LeetCode, Conceptual Mastery, and Mock Interviews' + startOffset: 3273 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3273 + endOffset: 3416 +- name: 'Project Selection: Industry-Backed Work for Real-World Impact' + startOffset: 3416 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3416 + endOffset: 3466 +- name: Episode Wrap-Up & Final Career Advice + startOffset: 3466 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3466 + endOffset: 3490 + transcript: - header: Episode Introduction & Topic Overview - line: This week we'll talk about building a strong career in data and we have a @@ -1055,118 +1155,6 @@ transcript: sec: 3490 time: '58:10' who: Alexey -description: Discover long-context LLMs, chunking and retrieval for finance benchmarking—learn - 32k–64k context limits, summarization tips, prototyping & career advice. -intro: How do you evaluate and deploy long-context LLMs for real-world financial documents—when - context windows stretch into tens of thousands of tokens? In this episode, Lavanya - Gupta, a CMU LTI alum and Sr. AI/ML Applied Scientist at JPMorgan Chase’s MLCOE, - walks through practical benchmarking and production strategies for long-context - LLMs in finance. Drawing on her published work "Long Context LLMs on Financial Concepts" - (EMNLP) and 5+ years of industrial research, Lavanya explains empirical findings - around context-window performance (a notable droparound 32k–64k), and outlines the - pragmatic trio of chunking, retrieval, and summarization for processing large documents. - She also discusses industry research practices—publishing from corporate teams, - dissemination via arXiv and endorsements—and rapid prototyping techniques like Streamlit - for demos and feedback. Listeners will get concrete guidance on LLM benchmarking, - context window trade-offs, dataset and licensing lessons from a Kaggle success, - and actionable career advice on transitioning into ML roles, networking, portfolios, - and interview prep. Tune in to learn how to benchmark long-context LLMs for financial - NLP and translate research into production-ready workflows. -dateadded: '2025-05-12' -duration: PT00H58M10S -quotableClips: -- name: Episode Introduction & Topic Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=0 - endOffset: 122 -- name: 'Career Overview: From Software Engineering to ML & Master''s' - startOffset: 122 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=122 - endOffset: 205 -- name: 'Origin of ML Interest: Hackathons and Computer Vision' - startOffset: 205 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=205 - endOffset: 295 -- name: 'Early Project Case Study: OCR for Organization Charts' - startOffset: 295 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=295 - endOffset: 523 -- name: 'Role Snapshot: LLM Benchmarking at a Financial Institution' - startOffset: 523 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=523 - endOffset: 615 -- name: 'Research Focus: Evaluating Long-Context LLMs' - startOffset: 615 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=615 - endOffset: 756 -- name: 'Empirical Findings: Context Window Performance Droparound 32k–64k' - startOffset: 756 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=756 - endOffset: 894 -- name: 'Practical Approach: Chunking, Retrieval, and Summarization for Large Docs' - startOffset: 894 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=894 - endOffset: 928 -- name: 'Published Work: "Long Context LLMs on Financial Concepts" (EMNLP)' - startOffset: 928 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=928 - endOffset: 1048 -- name: 'Industry Research Practices: Publishing from Corporate Teams' - startOffset: 1048 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1048 - endOffset: 1185 -- name: 'Motivation for Publications: Manager Support and Community Sharing' - startOffset: 1185 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1185 - endOffset: 1330 -- name: 'Dissemination Paths: arXiv, Endorsement, and Early Publications' - startOffset: 1330 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1330 - endOffset: 1501 -- name: 'Self-Learning & MLOps: Zoom Camps, Tutorials, and Mentoring' - startOffset: 1501 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1501 - endOffset: 1814 -- name: 'Rapid Prototyping Tools: Streamlit for Demos and Feedback' - startOffset: 1814 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1814 - endOffset: 2004 -- name: 'Kaggle Success Story: Building and Licensing a High-Impact Dataset' - startOffset: 2004 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2004 - endOffset: 2252 -- name: 'Community Contribution: Women in Data Science and Open Mentoring' - startOffset: 2252 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2252 - endOffset: 2473 -- name: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows"' - startOffset: 2473 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2473 - endOffset: 2724 -- name: 'Career Pivot Guidance: Non-CS Backgrounds Entering Data Roles' - startOffset: 2724 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2724 - endOffset: 2908 -- name: 'Networking & Mentorship: Cold Outreach and Building Rapport' - startOffset: 2908 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2908 - endOffset: 3088 -- name: 'Portfolio Strategy: Community Visibility vs. Job-Targeted Projects' - startOffset: 3088 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3088 - endOffset: 3273 -- name: 'Interview Preparation: LeetCode, Conceptual Mastery, and Mock Interviews' - startOffset: 3273 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3273 - endOffset: 3416 -- name: 'Project Selection: Industry-Backed Work for Real-World Impact' - startOffset: 3416 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3416 - endOffset: 3466 -- name: Episode Wrap-Up & Final Career Advice - startOffset: 3466 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3466 - endOffset: 3490 --- Links: diff --git a/_podcast/s20e08-from-hackathons-to-developer-advocacy.md b/_podcast/to-update/s20e08-from-hackathons-to-developer-advocacy.md similarity index 96% rename from _podcast/s20e08-from-hackathons-to-developer-advocacy.md rename to _podcast/to-update/s20e08-from-hackathons-to-developer-advocacy.md index 89077127..5710505b 100644 --- a/_podcast/s20e08-from-hackathons-to-developer-advocacy.md +++ b/_podcast/to-update/s20e08-from-hackathons-to-developer-advocacy.md @@ -1,20 +1,142 @@ --- +title: "Context: This episode weaves together practical production tips (video, audio, lighting), hands-on developer programs (hackathons, MLH fellowship), contribution and onboarding best practices, content/demo strategies, and leadership for scaling community initiatives. + +Core narrative: Empowering developer growth by building repeatable, hands-on learning and contribution pathways—well-run hackathons, mentorship-driven fellowships, clear onboarding and demo workflows, and scalable program design—so more people can learn by doing, successfully contribute to real projects, and transition into lasting technical roles." +short: From Hackathons To Developer Advocacy +season: 20 episode: 8 guests: - willrussell +image: images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg ids: anchor: datatalksclub/episodes/From-Hackathons-to-Developer-Advocacy---Will-Russel-e339a5f youtube: vXbMUfHE1OE -image: images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Hackathons-to-Developer-Advocacy---Will-Russel-e339a5f apple: https://podcasts.apple.com/us/podcast/from-hackathons-to-developer-advocacy-will-russel/id1541710331?i=1000709634418 spotify: https://open.spotify.com/episode/4Lt785S38GuK0W2m7naRKt youtube: https://www.youtube.com/watch?v=vXbMUfHE1OE -season: 20 -short: From Hackathons To Developer Advocacy -title: 'Run Hackathons & Scale MLH-Style Open Source Fellowships: Organize, Onboard, - Judge' + +description: 'Master hackathons, MLH Fellowship & onboarding: organize events, judge with scoring matrices, scale open-source mentorships to onboard hireable contributors.' +intro: How do you run hackathons and scale MLH‑style open source fellowships while actually getting contributors onboarded, mentored, and judged fairly? In this episode Will Russell — Developer Advocate at Kestra who previously built open source education programs — walks through practical approaches to organizing hackathons and fellowship programs that move people from first contribution to sustained involvement.

We cover formats and online tools for running events, leadership and soft skills for coordination, judging strategies (scoring matrices, categories, tie‑breakers), and sponsor‑driven challenges. Will shares the MLH Fellowship mentorship model, contribution best practices (PR quality, Git skills), and concrete onboarding tactics for complex repos — including environment setup, maintainer collaboration, and cloud workarounds like Colab and VMs. A Willmojis case study highlights image recognition and demo workflow ideas. The conversation also addresses program scalability, budgets, accessibility for students and career changers, and how developer advocacy, documentation, and video demos support adoption.

Listen to learn actionable frameworks for organizing hackathons, onboarding contributors, and scaling MLH‑style open source fellowships so your program produces real contributions and sustainable community growth +dateadded: 2025-05-26 + +duration: PT01H01M29S + +quotableClips: +- name: Episode Opening & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=0 + endOffset: 201 +- name: 'Video Production Setup: Camera, Lens & Webcam Workflow' + startOffset: 201 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=201 + endOffset: 343 +- name: 'Audio Setup: Microphone, Preamp & Pop Filtering' + startOffset: 343 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=343 + endOffset: 415 +- name: 'Lighting Strategy: 45° Key Light & Background Separation' + startOffset: 415 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=415 + endOffset: 641 +- name: Early Career & Hackathon Discovery + startOffset: 641 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=641 + endOffset: 706 +- name: 'Hackathons as Learning: Git, Teamwork & Building Projects' + startOffset: 706 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=706 + endOffset: 736 +- name: 'Open Source Education Programs: Path from Contract to Full-Time' + startOffset: 736 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=736 + endOffset: 964 +- name: 'Organizing Hackathons: Leadership, Coordination & Soft Skills' + startOffset: 964 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=964 + endOffset: 1207 +- name: 'Career Trade-offs: Maintaining Technical Depth vs. Community Work' + startOffset: 1207 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1207 + endOffset: 1322 +- name: Role Variety at Small Companies vs. Specialized Teams + startOffset: 1322 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1322 + endOffset: 1398 +- name: 'Running Hackathons: Format, Online Tools & Office Hours' + startOffset: 1398 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1398 + endOffset: 1526 +- name: 'Judging Strategies: Scoring Matrices, Categories & Tie-Breakers' + startOffset: 1526 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1526 + endOffset: 1574 +- name: Sponsor-Driven Challenges & Themed Categories + startOffset: 1574 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1574 + endOffset: 1784 +- name: 'Case Study — Willmojis: Image Recognition, Font Generation & Demo' + startOffset: 1784 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1784 + endOffset: 2019 +- name: 'Major League Hacking (MLH): Community Support & Rep Experience' + startOffset: 2019 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2019 + endOffset: 2143 +- name: 'MLH Fellowship: Mentorship Model & Contributing to Large Repos' + startOffset: 2143 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2143 + endOffset: 2342 +- name: 'Contribution Best Practices: PR Quality, Git Skills & Onboarding' + startOffset: 2342 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2342 + endOffset: 2476 +- name: 'Onboarding Complex Projects: Environment Setup & Maintainer Collaboration' + startOffset: 2476 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2476 + endOffset: 2620 +- name: Hardware Constraints & Cloud Workarounds (Colab, VMs) + startOffset: 2620 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2620 + endOffset: 2762 +- name: 'Program Scalability: Repeatability, Budgets & AI Opportunities' + startOffset: 2762 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2762 + endOffset: 2890 +- name: 'Fellowship Accessibility: Students, Career Changers & Motivation' + startOffset: 2890 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2890 + endOffset: 2954 +- name: 'Developer Advocacy at Kestra: Documentation, Demos & Outreach' + startOffset: 2954 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2954 + endOffset: 3109 +- name: 'Content Workflow: Bullet Points, Demos & Collaboration with Writers' + startOffset: 3109 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3109 + endOffset: 3220 +- name: 'Video Strategy: Define Goal, Maintain Pace & Full Walkthroughs' + startOffset: 3220 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3220 + endOffset: 3270 +- name: 'Feature Demo Example: "After Execution" Notifications in Workflows' + startOffset: 3270 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3270 + endOffset: 3442 +- name: 'Learn with Kestra Series: Tool Tutorials (Docker, Postgres, Git)' + startOffset: 3442 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3442 + endOffset: 3599 +- name: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around"' + startOffset: 3599 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3599 + endOffset: 3689 +- name: Episode Closing & Call to Explore Kestra Content + startOffset: 3689 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3689 + endOffset: 3689 + transcript: - header: Episode Opening & Guest Overview - line: This week we’ll discuss many topics—developer advocacy, organizing hackathons, @@ -1451,140 +1573,6 @@ transcript: sec: 3689 time: '1:01:29' who: Alexey -description: 'Master hackathons, MLH Fellowship & onboarding: organize events, judge - with scoring matrices, scale open-source mentorships to onboard hireable contributors.' -intro: How do you run hackathons and scale MLH‑style open source fellowships while - actually getting contributors onboarded, mentored, and judged fairly? In this episode - Will Russell — Developer Advocate at Kestra who previously built open source education - programs — walks through practical approaches to organizing hackathons and fellowship - programs that move people from first contribution to sustained involvement.

- We cover formats and online tools for running events, leadership and soft skills - for coordination, judging strategies (scoring matrices, categories, tie‑breakers), - and sponsor‑driven challenges. Will shares the MLH Fellowship mentorship model, - contribution best practices (PR quality, Git skills), and concrete onboarding tactics - for complex repos — including environment setup, maintainer collaboration, and cloud - workarounds like Colab and VMs. A Willmojis case study highlights image recognition - and demo workflow ideas. The conversation also addresses program scalability, budgets, - accessibility for students and career changers, and how developer advocacy, documentation, - and video demos support adoption.

Listen to learn actionable frameworks - for organizing hackathons, onboarding contributors, and scaling MLH‑style open source - fellowships so your program produces real contributions and sustainable community - growth. -dateadded: '2025-05-26' -duration: PT01H01M29S -quotableClips: -- name: Episode Opening & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=0 - endOffset: 201 -- name: 'Video Production Setup: Camera, Lens & Webcam Workflow' - startOffset: 201 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=201 - endOffset: 343 -- name: 'Audio Setup: Microphone, Preamp & Pop Filtering' - startOffset: 343 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=343 - endOffset: 415 -- name: 'Lighting Strategy: 45° Key Light & Background Separation' - startOffset: 415 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=415 - endOffset: 641 -- name: Early Career & Hackathon Discovery - startOffset: 641 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=641 - endOffset: 706 -- name: 'Hackathons as Learning: Git, Teamwork & Building Projects' - startOffset: 706 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=706 - endOffset: 736 -- name: 'Open Source Education Programs: Path from Contract to Full-Time' - startOffset: 736 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=736 - endOffset: 964 -- name: 'Organizing Hackathons: Leadership, Coordination & Soft Skills' - startOffset: 964 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=964 - endOffset: 1207 -- name: 'Career Trade-offs: Maintaining Technical Depth vs. Community Work' - startOffset: 1207 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1207 - endOffset: 1322 -- name: Role Variety at Small Companies vs. Specialized Teams - startOffset: 1322 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1322 - endOffset: 1398 -- name: 'Running Hackathons: Format, Online Tools & Office Hours' - startOffset: 1398 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1398 - endOffset: 1526 -- name: 'Judging Strategies: Scoring Matrices, Categories & Tie-Breakers' - startOffset: 1526 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1526 - endOffset: 1574 -- name: Sponsor-Driven Challenges & Themed Categories - startOffset: 1574 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1574 - endOffset: 1784 -- name: 'Case Study — Willmojis: Image Recognition, Font Generation & Demo' - startOffset: 1784 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1784 - endOffset: 2019 -- name: 'Major League Hacking (MLH): Community Support & Rep Experience' - startOffset: 2019 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2019 - endOffset: 2143 -- name: 'MLH Fellowship: Mentorship Model & Contributing to Large Repos' - startOffset: 2143 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2143 - endOffset: 2342 -- name: 'Contribution Best Practices: PR Quality, Git Skills & Onboarding' - startOffset: 2342 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2342 - endOffset: 2476 -- name: 'Onboarding Complex Projects: Environment Setup & Maintainer Collaboration' - startOffset: 2476 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2476 - endOffset: 2620 -- name: Hardware Constraints & Cloud Workarounds (Colab, VMs) - startOffset: 2620 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2620 - endOffset: 2762 -- name: 'Program Scalability: Repeatability, Budgets & AI Opportunities' - startOffset: 2762 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2762 - endOffset: 2890 -- name: 'Fellowship Accessibility: Students, Career Changers & Motivation' - startOffset: 2890 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2890 - endOffset: 2954 -- name: 'Developer Advocacy at Kestra: Documentation, Demos & Outreach' - startOffset: 2954 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2954 - endOffset: 3109 -- name: 'Content Workflow: Bullet Points, Demos & Collaboration with Writers' - startOffset: 3109 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3109 - endOffset: 3220 -- name: 'Video Strategy: Define Goal, Maintain Pace & Full Walkthroughs' - startOffset: 3220 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3220 - endOffset: 3270 -- name: 'Feature Demo Example: "After Execution" Notifications in Workflows' - startOffset: 3270 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3270 - endOffset: 3442 -- name: 'Learn with Kestra Series: Tool Tutorials (Docker, Postgres, Git)' - startOffset: 3442 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3442 - endOffset: 3599 -- name: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around"' - startOffset: 3599 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3599 - endOffset: 3689 -- name: Episode Closing & Call to Explore Kestra Content - startOffset: 3689 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3689 - endOffset: 3689 --- Links: diff --git a/_podcast/s20e09-taking-your-freelance-career-to-next-level.md b/_podcast/to-update/s20e09-taking-your-freelance-career-to-next-level.md similarity index 93% rename from _podcast/s20e09-taking-your-freelance-career-to-next-level.md rename to _podcast/to-update/s20e09-taking-your-freelance-career-to-next-level.md index 6b59556f..ae6ac6b7 100644 --- a/_podcast/s20e09-taking-your-freelance-career-to-next-level.md +++ b/_podcast/to-update/s20e09-taking-your-freelance-career-to-next-level.md @@ -1,28 +1,103 @@ --- +title: "Context: Through Dimitri’s journey and practical segments on job data, client acquisition, pricing, AI tools, and scaling, the episode maps the real-world mechanics of going freelance in data. + +Core narrative: The unifying idea is that a sustainable, scalable freelance data career is built by starting with market demand—validate financial targets, specialize around high‑impact problems, productize repeatable analytics offerings, leverage productivity tools (including AI) to deliver efficiently, and adopt deliberate pricing and client‑retention models (projects, subscriptions, or agency paths) so you can reliably land clients, capture value, and grow on your own terms." +short: Taking your Freelance Career to the Next Level +season: 20 episode: 9 guests: - dimitrivisnadi -date: 2025-11-07 -topics: -- Freelance -- Career Growth -- Consulting -- Personal Branding -- Entrepreneurship -- Remote Work -- Business Development +image: images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg ids: anchor: datatalksclub/episodes/Can-You-Quit-Your-Job-and-Still-Succeed-as-a-Data-Freelancer-e360j7e youtube: S93V8RgwBig -image: images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Can-You-Quit-Your-Job-and-Still-Succeed-as-a-Data-Freelancer-e360j7e apple: https://podcasts.apple.com/us/podcast/can-you-quit-your-job-and-still-succeed-as-a-data-freelancer/id1541710331?i=1000718997257 spotify: https://open.spotify.com/episode/3BknrKqhLggx1G5ZbrfgFc youtube: https://www.youtube.com/watch?v=S93V8RgwBig -season: 20 -short: Taking your Freelance Career to the Next Level -title: 'Data Freelancer Playbook: Land Clients, Price Services & Use AI for Productivity' + +description: Master data freelancer tactics, pricing strategies and AI tools to land clients, price services confidently, and boost productivity for higher income +intro: How do you move from employed data pro to a sustainable data freelancer who consistently lands clients, prices services well, and uses AI to boost productivity? In this episode, Dimitri Visnadi — an independent data consultant focused on data strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer Science from UCL — walks through a practical playbook for data freelancers.

Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus problem-solving, and how to validate freelance viability with financial targets. He explains how to land initial clients through recruiters and LinkedIn, the idea behind a data-freelancer job board, market-driven specialization, and insights on rates, top skills and data management. You’ll hear about scaling choices (lifestyle business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course and community approaches for branding and marketing, subscription models and client relationship management, high-impact small analyses, pricing strategies (hourly vs packages), and transition planning.

Listen to get concrete guidance on landing clients, setting prices, structuring offers, and using AI tools to increase productivity as a freelance data consultant +topics: +- Freelance +- Career Growth +- Consulting +- Personal Branding +- Entrepreneurship +- Remote Work +- Business Development +dateadded: 2025-07-28 +date: 2025-11-07 + +duration: PT01H05M29S + +quotableClips: +- name: Episode Opening & Dimitri’s Data Journey + startOffset: 0 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=0 + endOffset: 341 +- name: Job Tenure Trends & Freelancer Types + startOffset: 341 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=341 + endOffset: 650 +- name: Expertise vs Problem-Solving in Freelance Work + startOffset: 650 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=650 + endOffset: 853 +- name: 'Validating Freelance Viability: Financial Targets' + startOffset: 853 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=853 + endOffset: 968 +- name: 'Landing Initial Clients: Recruiters & LinkedIn' + startOffset: 968 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=968 + endOffset: 1053 +- name: Market Trends & Building a Data-Freelancer Job Board + startOffset: 1053 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1053 + endOffset: 1431 +- name: Market-Driven Specialization & Starting Paths + startOffset: 1431 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1431 + endOffset: 1508 +- name: 'Job Board Insights: Rates, Top Skills & "Data Management"' + startOffset: 1508 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1508 + endOffset: 1968 +- name: 'Lifestyle Business vs Agency: Scaling Choices' + startOffset: 1968 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1968 + endOffset: 2281 +- name: 'AI Tools for Productivity: Claude, ChatGPT, Cursor' + startOffset: 2281 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2281 + endOffset: 2730 +- name: 'Course & Community: Branding, Marketing, and Support' + startOffset: 2730 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2730 + endOffset: 2913 +- name: Subscription Model & Client Relationship Management + startOffset: 2913 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2913 + endOffset: 3140 +- name: 'High-Impact Analytics: Small Analyses, Big Returns' + startOffset: 3140 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3140 + endOffset: 3407 +- name: 'Pricing Strategies: Hourly, Project Packages, and Transitioning' + startOffset: 3407 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3407 + endOffset: 3662 +- name: Notice Periods & Transition Planning for Freelancers + startOffset: 3662 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3662 + endOffset: 3929 +- name: Episode Wrap-up & Final Advice + startOffset: 3929 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3929 + endOffset: 3929 + transcript: - header: Episode Opening & Dimitri’s Data Journey - header: Episode Opening & Dimitri’s Data Journey @@ -590,92 +665,6 @@ transcript: sec: 3929 time: '1:05:29' who: Alexey -intro: How do you move from employed data pro to a sustainable data freelancer who - consistently lands clients, prices services well, and uses AI to boost productivity? - In this episode, Dimitri Visnadi — an independent data consultant focused on data - strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles - at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer - Science from UCL — walks through a practical playbook for data freelancers.

- Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus - problem-solving, and how to validate freelance viability with financial targets. - He explains how to land initial clients through recruiters and LinkedIn, the idea - behind a data-freelancer job board, market-driven specialization, and insights on - rates, top skills and data management. You’ll hear about scaling choices (lifestyle - business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course - and community approaches for branding and marketing, subscription models and client - relationship management, high-impact small analyses, pricing strategies (hourly - vs packages), and transition planning.

Listen to get concrete guidance - on landing clients, setting prices, structuring offers, and using AI tools to increase - productivity as a freelance data consultant. -description: Master data freelancer tactics, pricing strategies and AI tools to land - clients, price services confidently, and boost productivity for higher income. -dateadded: '2025-07-28' -duration: PT01H05M29S -quotableClips: -- name: Episode Opening & Dimitri’s Data Journey - startOffset: 0 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=0 - endOffset: 341 -- name: Job Tenure Trends & Freelancer Types - startOffset: 341 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=341 - endOffset: 650 -- name: Expertise vs Problem-Solving in Freelance Work - startOffset: 650 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=650 - endOffset: 853 -- name: 'Validating Freelance Viability: Financial Targets' - startOffset: 853 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=853 - endOffset: 968 -- name: 'Landing Initial Clients: Recruiters & LinkedIn' - startOffset: 968 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=968 - endOffset: 1053 -- name: Market Trends & Building a Data-Freelancer Job Board - startOffset: 1053 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1053 - endOffset: 1431 -- name: Market-Driven Specialization & Starting Paths - startOffset: 1431 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1431 - endOffset: 1508 -- name: 'Job Board Insights: Rates, Top Skills & "Data Management"' - startOffset: 1508 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1508 - endOffset: 1968 -- name: 'Lifestyle Business vs Agency: Scaling Choices' - startOffset: 1968 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1968 - endOffset: 2281 -- name: 'AI Tools for Productivity: Claude, ChatGPT, Cursor' - startOffset: 2281 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2281 - endOffset: 2730 -- name: 'Course & Community: Branding, Marketing, and Support' - startOffset: 2730 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2730 - endOffset: 2913 -- name: Subscription Model & Client Relationship Management - startOffset: 2913 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2913 - endOffset: 3140 -- name: 'High-Impact Analytics: Small Analyses, Big Returns' - startOffset: 3140 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3140 - endOffset: 3407 -- name: 'Pricing Strategies: Hourly, Project Packages, and Transitioning' - startOffset: 3407 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3407 - endOffset: 3662 -- name: Notice Periods & Transition Planning for Freelancers - startOffset: 3662 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3662 - endOffset: 3929 -- name: Episode Wrap-up & Final Advice - startOffset: 3929 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3929 - endOffset: 3929 --- Links: diff --git a/_podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md b/_podcast/to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md similarity index 94% rename from _podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md rename to _podcast/to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md index d95104a2..0d8bc1f8 100644 --- a/_podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md +++ b/_podcast/to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md @@ -1,20 +1,118 @@ --- +title: "Context: An electrical-engineering researcher turned founder and freelancer describes moving from simulation-driven academia into startups and consulting, recounting a pivot to synthetic medical imaging, building IoT/data prototypes, client acquisition, and practical tooling and workflows while balancing technical depth, cashflow risks, and continuous learning. + +Core: The episode’s unifying idea is a scientific, problem-first approach to data engineering and product development—validate hypotheses quickly with minimal viable (often manual) solutions, iterate fast using domain specialization and systems thinking, and pragmatically balance technical rigor with business constraints to turn research into real, sustainable products and freelance work." +short: From Simulation Algorithms to Production-Grade Data Systems +season: 21 episode: 1 guests: - orellgarten +image: images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg ids: anchor: datatalksclub/episodes/From-Simulations-to-Freelance-Data-Engineering-Orells-Journey-Out-of-Academia-and-Into-Consulting---Orell-Garten-e369a6b youtube: pkcpH5N-GP8 -image: images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Simulations-to-Freelance-Data-Engineering-Orells-Journey-Out-of-Academia-and-Into-Consulting---Orell-Garten-e369a6b apple: https://podcasts.apple.com/us/podcast/from-simulations-to-freelance-data-engineering-orells/id1541710331?i=1000720245457 spotify: https://open.spotify.com/episode/5HCSIO0mO8Pr5Yv9puZ72R youtube: https://www.youtube.com/watch?v=pkcpH5N-GP8 -season: 21 -short: From Simulation Algorithms to Production-Grade Data Systems -title: 'Synthetic Medical Imaging Data for AI: Startup Data Engineering, MVPs & Freelance - Transition' + +description: 'Learn synthetic medical imaging & data engineering: build MVPs, integrate simulation-HPC, optimize ETL, and shift to freelance with client-acquisition tactics.' +intro: 'How do you turn simulation research into usable synthetic medical imaging data for AI, build a minimal viable data pipeline, and pivot into freelance consulting? In this episode, Orell Garten — an electrical engineer trained in simulation algorithms who left a PhD during COVID and explored productization through a government-funded startup program — walks through that journey. We cover his simulation work in RF and wave propagation, the startup pivot to synthetic medical imaging data for AI, and the go-to-market lesson of problem-first versus technology-first.

Listen for practical data engineering guidance: minimal viable data work, simulation–HPC integration, secure data management, and an MVP workflow built on manual extraction, CSVs, and local analysis. Orell also discusses scientific-method product discovery, preventing overengineering with weekly feedback, and tool choices (Python, C++, DBT, Docker, DuckDB). He explains launching a freelance practice via LinkedIn, prototype delivery for IoT data engineering, client acquisition, and managing runway and cashflow. If you’re building synthetic data pipelines, medical imaging datasets, or transitioning to freelance data engineering, this episode delivers concrete tactics, risks to plan for, and hands-on techniques you can apply immediately.' +dateadded: 2025-08-05 + +duration: PT01H03M31S + +quotableClips: +- name: Episode Introduction & Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=0 + endOffset: 139 +- name: 'Career Background: Electrical Engineering and Simulation Algorithms' + startOffset: 139 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=139 + endOffset: 196 +- name: Transition Out of Academia During COVID + startOffset: 196 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=196 + endOffset: 282 +- name: 'Simulation Research: RF and Wave Propagation Modeling' + startOffset: 282 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=282 + endOffset: 544 +- name: 'Startup Pivot: Synthetic Medical Imaging Data for AI' + startOffset: 544 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=544 + endOffset: 582 +- name: 'Go-to-Market Lesson: Problem-First vs Technology-First' + startOffset: 582 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=582 + endOffset: 800 +- name: 'Early Data Engineering Practice: Minimal Viable Data Work' + startOffset: 800 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=800 + endOffset: 861 +- name: Simulation-HPC Integration and Secure Data Management + startOffset: 861 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=861 + endOffset: 965 +- name: 'Iteration Differences: Academia vs. Startup Timelines' + startOffset: 965 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=965 + endOffset: 1075 +- name: Scientific Method in Product Discovery and Hypothesis Testing + startOffset: 1075 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1075 + endOffset: 1174 +- name: 'Freelance Launch: From CTO Role to Consulting via LinkedIn' + startOffset: 1174 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1174 + endOffset: 1379 +- name: 'Prototype Delivery: IoT Data Engineering Proof of Concept' + startOffset: 1379 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1379 + endOffset: 1533 +- name: 'Freelance Risks: Runway, Cashflow, and Operating Expenses' + startOffset: 1533 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1533 + endOffset: 1850 +- name: 'Client Acquisition: Networking, Recruiters, and Referrals' + startOffset: 1850 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1850 + endOffset: 2062 +- name: 'Specialization: Industrial Data Integration and Custom ETL' + startOffset: 2062 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2062 + endOffset: 2340 +- name: 'MVP Workflow: Manual Extraction, CSVs, and Local Analysis' + startOffset: 2340 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2340 + endOffset: 2607 +- name: 'Preventing Overengineering: Weekly Feedback and Iteration' + startOffset: 2607 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2607 + endOffset: 2999 +- name: 'Continuous Learning: Practical Experiments and DuckDB' + startOffset: 2999 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2999 + endOffset: 3222 +- name: 'LLMs for Data Cleaning: Domain Knowledge Limitations' + startOffset: 3222 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3222 + endOffset: 3509 +- name: 'Tech Stack & Systems Thinking: Python, C++, DBT, Docker' + startOffset: 3509 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3509 + endOffset: 3653 +- name: 'Manual Data Exploration: Handling Edge Cases Before Automation' + startOffset: 3653 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3653 + endOffset: 3811 +- name: Closing Remarks and Freelancing Advice + startOffset: 3811 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3811 + endOffset: 3811 + transcript: - header: Episode Introduction & Overview - line: This week, we'll talk about many different things. We will discuss our guest’s @@ -783,115 +881,6 @@ transcript: sec: 3811 time: '1:03:31' who: Alexey -description: 'Learn synthetic medical imaging & data engineering: build MVPs, integrate - simulation-HPC, optimize ETL, and shift to freelance with client-acquisition tactics.' -intro: 'How do you turn simulation research into usable synthetic medical imaging - data for AI, build a minimal viable data pipeline, and pivot into freelance consulting? - In this episode, Orell Garten — an electrical engineer trained in simulation algorithms - who left a PhD during COVID and explored productization through a government-funded - startup program — walks through that journey. We cover his simulation work in RF - and wave propagation, the startup pivot to synthetic medical imaging data for AI, - and the go-to-market lesson of problem-first versus technology-first.

Listen - for practical data engineering guidance: minimal viable data work, simulation–HPC - integration, secure data management, and an MVP workflow built on manual extraction, - CSVs, and local analysis. Orell also discusses scientific-method product discovery, - preventing overengineering with weekly feedback, and tool choices (Python, C++, - DBT, Docker, DuckDB). He explains launching a freelance practice via LinkedIn, prototype - delivery for IoT data engineering, client acquisition, and managing runway and cashflow. - If you’re building synthetic data pipelines, medical imaging datasets, or transitioning - to freelance data engineering, this episode delivers concrete tactics, risks to - plan for, and hands-on techniques you can apply immediately.' -dateadded: '2025-08-05' -duration: PT01H03M31S -quotableClips: -- name: Episode Introduction & Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=0 - endOffset: 139 -- name: 'Career Background: Electrical Engineering and Simulation Algorithms' - startOffset: 139 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=139 - endOffset: 196 -- name: Transition Out of Academia During COVID - startOffset: 196 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=196 - endOffset: 282 -- name: 'Simulation Research: RF and Wave Propagation Modeling' - startOffset: 282 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=282 - endOffset: 544 -- name: 'Startup Pivot: Synthetic Medical Imaging Data for AI' - startOffset: 544 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=544 - endOffset: 582 -- name: 'Go-to-Market Lesson: Problem-First vs Technology-First' - startOffset: 582 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=582 - endOffset: 800 -- name: 'Early Data Engineering Practice: Minimal Viable Data Work' - startOffset: 800 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=800 - endOffset: 861 -- name: Simulation-HPC Integration and Secure Data Management - startOffset: 861 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=861 - endOffset: 965 -- name: 'Iteration Differences: Academia vs. Startup Timelines' - startOffset: 965 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=965 - endOffset: 1075 -- name: Scientific Method in Product Discovery and Hypothesis Testing - startOffset: 1075 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1075 - endOffset: 1174 -- name: 'Freelance Launch: From CTO Role to Consulting via LinkedIn' - startOffset: 1174 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1174 - endOffset: 1379 -- name: 'Prototype Delivery: IoT Data Engineering Proof of Concept' - startOffset: 1379 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1379 - endOffset: 1533 -- name: 'Freelance Risks: Runway, Cashflow, and Operating Expenses' - startOffset: 1533 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1533 - endOffset: 1850 -- name: 'Client Acquisition: Networking, Recruiters, and Referrals' - startOffset: 1850 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1850 - endOffset: 2062 -- name: 'Specialization: Industrial Data Integration and Custom ETL' - startOffset: 2062 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2062 - endOffset: 2340 -- name: 'MVP Workflow: Manual Extraction, CSVs, and Local Analysis' - startOffset: 2340 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2340 - endOffset: 2607 -- name: 'Preventing Overengineering: Weekly Feedback and Iteration' - startOffset: 2607 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2607 - endOffset: 2999 -- name: 'Continuous Learning: Practical Experiments and DuckDB' - startOffset: 2999 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2999 - endOffset: 3222 -- name: 'LLMs for Data Cleaning: Domain Knowledge Limitations' - startOffset: 3222 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3222 - endOffset: 3509 -- name: 'Tech Stack & Systems Thinking: Python, C++, DBT, Docker' - startOffset: 3509 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3509 - endOffset: 3653 -- name: 'Manual Data Exploration: Handling Edge Cases Before Automation' - startOffset: 3653 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3653 - endOffset: 3811 -- name: Closing Remarks and Freelancing Advice - startOffset: 3811 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3811 - endOffset: 3811 --- Links: diff --git a/_podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md b/_podcast/to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md similarity index 96% rename from _podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md rename to _podcast/to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md index 5fbb9020..9073d00b 100644 --- a/_podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md +++ b/_podcast/to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md @@ -1,20 +1,142 @@ --- +title: "Context: The episode examines a practical approach to data work—moving from engineering to product thinking, accepting imperfect data (wabi‑sabi), diagnosing trust failures, prioritizing maintenance/rollout/innovation, using simple reliability signals and feedback loops, and aligning team time and processes to measurable business impact—especially as generative AI raises readiness demands and legacy systems require pragmatic replacement. + +Core: Adopt a mindful, impact‑first data strategy that accepts and communicates inevitable imperfection, prioritizes process and measurable business outcomes over perfect tooling, and restores trust through clear signals, closed feedback loops, and disciplined allocation of maintenance, rollout, and innovation effort so data products remain reliable, scalable, and ready to deliver real ROI." +short: 'Mindful Data Strategy: From Pipelines to Business Impact' +season: 21 episode: 2 guests: - liorbarak +image: images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg ids: anchor: datatalksclub/episodes/How-to-Rebuild-Data-Trust--Mindful-Data-Strategy-and-Maintenance-vs-Innovation---Lior-Barak-e36obcs youtube: B76J4QkZPWs -image: images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/How-to-Rebuild-Data-Trust--Mindful-Data-Strategy-and-Maintenance-vs-Innovation---Lior-Barak-e36obcs apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/54B0xvUI1eQjXW0s1eqgbI youtube: https://www.youtube.com/watch?v=B76J4QkZPWs -season: 21 -short: 'Mindful Data Strategy: From Pipelines to Business Impact' -title: 'Restore Data Trust: Practical Data Quality, Prioritization & Generative AI - Readiness' + +description: Discover how to restore data trust with data quality fixes, prioritization and generative AI readiness—KPI diagnosis, incident-driven roadmaps and rollout tips +intro: How do you restore data trust and make your organization ready for generative AI without drowning in tools or endless cleanup? In this episode, Lior Barak — author of Data is Like a Plate of Hummus, co‑host of WHAT the Data?!, and founder of Tale About Data — walks through practical approaches to data quality, prioritization, and generative AI readiness from his 12+ years building data teams.

We cover a mindful data strategy that accepts imperfection (Wabi‑sabi), the shift from engineering to product thinking, and automating data infrastructure. Lior explains common data trust failures and hallucination risks with generative models, offers diagnostic tactics for core KPI and dashboard inaccuracies, and pinpoints pipeline failure modes (ingestion, SQL logic, lineage). You’ll hear a trust‑restoration framework focused on maintenance, rollouts, and innovation, a traffic‑light dashboard for reliability, incident analysis to find recurring causes, and practical work allocation and team stress benchmarks (≈45% maintenance).

Listen to learn concrete steps to prioritize data work by business impact, measure readiness for AI by ROI and product signals, and manage legacy systems and executive ad‑hoc requests with intent and impact in mind +dateadded: 2025-08-18 + +duration: PT01H06M05S + +quotableClips: +- name: Podcast Introduction and Episode Overview (mindful data strategy) + startOffset: 0 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=0 + endOffset: 144 +- name: 'Lior Barak: Background and shift from engineering to product' + startOffset: 144 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=144 + endOffset: 246 +- name: 'Startup and platform experience: automating data infrastructure' + startOffset: 246 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=246 + endOffset: 385 +- name: Product management learning paths for engineers and data scientists + startOffset: 385 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=385 + endOffset: 500 +- name: 'Wabi‑sabi applied to data: accepting imperfection and communicating it' + startOffset: 500 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=500 + endOffset: 588 +- name: 'Data trust crisis: industry stats and common trust failures' + startOffset: 588 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=588 + endOffset: 707 +- name: 'Generative AI and hallucinations: managing expectations for models' + startOffset: 707 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=707 + endOffset: 849 +- name: 'Data quality metaphor: Lego bricks and pragmatic trade‑offs' + startOffset: 849 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=849 + endOffset: 1052 +- name: 'Prioritization vs. tooling: translating data work into business impact' + startOffset: 1052 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1052 + endOffset: 1250 +- name: 'Core KPI diagnosis: investigating dashboard inaccuracies' + startOffset: 1250 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1250 + endOffset: 1322 +- name: 'Pipeline failure points: ingestion, SQL logic, and lineage checks' + startOffset: 1322 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1322 + endOffset: 1406 +- name: 'Process failures over tool fixes: focusing on root causes' + startOffset: 1406 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1406 + endOffset: 1692 +- name: 'Trust restoration framework: maintenance, rollouts, and innovation' + startOffset: 1692 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1692 + endOffset: 1756 +- name: 'Incident analysis: using incidents to identify recurring problems' + startOffset: 1756 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1756 + endOffset: 1847 +- name: Dashboard traffic‑light system for data reliability (green/yellow/red) + startOffset: 1847 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1847 + endOffset: 1998 +- name: 'Analyst feedback and automation: closing the communication loop' + startOffset: 1998 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1998 + endOffset: 2299 +- name: 'Work allocation: tracking maintenance, rollout, and innovation time' + startOffset: 2299 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2299 + endOffset: 2481 +- name: 'Team stress index and guideline: ~45% maintenance as healthy baseline' + startOffset: 2481 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2481 + endOffset: 2592 +- name: 'Data product lifecycle: development, rollout, maturity, and decline' + startOffset: 2592 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2592 + endOffset: 2747 +- name: 'Zen practices for data teams: mindfulness, acceptance, and planning' + startOffset: 2747 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2747 + endOffset: 3014 +- name: 'Generative AI demand: why data readiness matters now' + startOffset: 3014 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3014 + endOffset: 3101 +- name: 'Measuring readiness by impact: ROI and product success signals' + startOffset: 3101 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3101 + endOffset: 3379 +- name: 'Legacy systems strategy: minimal maintenance and planned replacement' + startOffset: 3379 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3379 + endOffset: 3551 +- name: 'Replacing legacy: selling the change through user impact' + startOffset: 3551 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3551 + endOffset: 3623 +- name: 'Executive ad‑hoc requests: elicit intent and quantify expected impact' + startOffset: 3623 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3623 + endOffset: 3753 +- name: 'Career guidance: choosing analytics, engineering, or product paths' + startOffset: 3753 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3753 + endOffset: 3876 +- name: Closing reflections, resources, and suggested next steps + startOffset: 3876 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3876 + endOffset: 3965 +- name: Episode Outro and Hummus Banter + startOffset: 3965 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3965 + endOffset: 3965 + transcript: - header: Podcast Introduction and Episode Overview (mindful data strategy) - line: This week we'll talk about mindful data strategy and how teams can shift from @@ -1269,140 +1391,6 @@ transcript: sec: 3965 time: '1:06:05' who: Alexey -description: Discover how to restore data trust with data quality fixes, prioritization - and generative AI readiness—KPI diagnosis, incident-driven roadmaps and rollout - tips. -intro: How do you restore data trust and make your organization ready for generative - AI without drowning in tools or endless cleanup? In this episode, Lior Barak — author - of Data is Like a Plate of Hummus, co‑host of WHAT the Data?!, and founder of Tale - About Data — walks through practical approaches to data quality, prioritization, - and generative AI readiness from his 12+ years building data teams.

We - cover a mindful data strategy that accepts imperfection (Wabi‑sabi), the shift from - engineering to product thinking, and automating data infrastructure. Lior explains - common data trust failures and hallucination risks with generative models, offers - diagnostic tactics for core KPI and dashboard inaccuracies, and pinpoints pipeline - failure modes (ingestion, SQL logic, lineage). You’ll hear a trust‑restoration framework - focused on maintenance, rollouts, and innovation, a traffic‑light dashboard for - reliability, incident analysis to find recurring causes, and practical work allocation - and team stress benchmarks (≈45% maintenance).

Listen to learn concrete - steps to prioritize data work by business impact, measure readiness for AI by ROI - and product signals, and manage legacy systems and executive ad‑hoc requests with - intent and impact in mind. -dateadded: '2025-08-18' -duration: PT01H06M05S -quotableClips: -- name: Podcast Introduction and Episode Overview (mindful data strategy) - startOffset: 0 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=0 - endOffset: 144 -- name: 'Lior Barak: Background and shift from engineering to product' - startOffset: 144 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=144 - endOffset: 246 -- name: 'Startup and platform experience: automating data infrastructure' - startOffset: 246 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=246 - endOffset: 385 -- name: Product management learning paths for engineers and data scientists - startOffset: 385 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=385 - endOffset: 500 -- name: 'Wabi‑sabi applied to data: accepting imperfection and communicating it' - startOffset: 500 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=500 - endOffset: 588 -- name: 'Data trust crisis: industry stats and common trust failures' - startOffset: 588 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=588 - endOffset: 707 -- name: 'Generative AI and hallucinations: managing expectations for models' - startOffset: 707 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=707 - endOffset: 849 -- name: 'Data quality metaphor: Lego bricks and pragmatic trade‑offs' - startOffset: 849 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=849 - endOffset: 1052 -- name: 'Prioritization vs. tooling: translating data work into business impact' - startOffset: 1052 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1052 - endOffset: 1250 -- name: 'Core KPI diagnosis: investigating dashboard inaccuracies' - startOffset: 1250 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1250 - endOffset: 1322 -- name: 'Pipeline failure points: ingestion, SQL logic, and lineage checks' - startOffset: 1322 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1322 - endOffset: 1406 -- name: 'Process failures over tool fixes: focusing on root causes' - startOffset: 1406 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1406 - endOffset: 1692 -- name: 'Trust restoration framework: maintenance, rollouts, and innovation' - startOffset: 1692 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1692 - endOffset: 1756 -- name: 'Incident analysis: using incidents to identify recurring problems' - startOffset: 1756 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1756 - endOffset: 1847 -- name: Dashboard traffic‑light system for data reliability (green/yellow/red) - startOffset: 1847 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1847 - endOffset: 1998 -- name: 'Analyst feedback and automation: closing the communication loop' - startOffset: 1998 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1998 - endOffset: 2299 -- name: 'Work allocation: tracking maintenance, rollout, and innovation time' - startOffset: 2299 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2299 - endOffset: 2481 -- name: 'Team stress index and guideline: ~45% maintenance as healthy baseline' - startOffset: 2481 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2481 - endOffset: 2592 -- name: 'Data product lifecycle: development, rollout, maturity, and decline' - startOffset: 2592 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2592 - endOffset: 2747 -- name: 'Zen practices for data teams: mindfulness, acceptance, and planning' - startOffset: 2747 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2747 - endOffset: 3014 -- name: 'Generative AI demand: why data readiness matters now' - startOffset: 3014 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3014 - endOffset: 3101 -- name: 'Measuring readiness by impact: ROI and product success signals' - startOffset: 3101 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3101 - endOffset: 3379 -- name: 'Legacy systems strategy: minimal maintenance and planned replacement' - startOffset: 3379 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3379 - endOffset: 3551 -- name: 'Replacing legacy: selling the change through user impact' - startOffset: 3551 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3551 - endOffset: 3623 -- name: 'Executive ad‑hoc requests: elicit intent and quantify expected impact' - startOffset: 3623 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3623 - endOffset: 3753 -- name: 'Career guidance: choosing analytics, engineering, or product paths' - startOffset: 3753 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3753 - endOffset: 3876 -- name: Closing reflections, resources, and suggested next steps - startOffset: 3876 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3876 - endOffset: 3965 -- name: Episode Outro and Hummus Banter - startOffset: 3965 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3965 - endOffset: 3965 --- Links: diff --git a/_podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md b/_podcast/to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md similarity index 96% rename from _podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md rename to _podcast/to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md index 0ee2b285..7174dc12 100644 --- a/_podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md +++ b/_podcast/to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md @@ -1,20 +1,106 @@ --- +title: "A deliberate, project-first career pivot: leveraging medical and statistical domain knowledge while learning by doing—through freelance projects, structured cohorts, public-facing content, and portfolio/dev-ops work—to build practical ML skills, visibility, and job readiness while balancing time and commitments." +short: 'From Medicine to Machine Learning: How Public Learning Turned into a Career' +season: 21 episode: 3 guests: - pastorsoto +image: images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg ids: anchor: datatalksclub/episodes/From-Medicine-to-Machine-Learning-How-Public-Learning-Turned-into-a-Career---Pastor-Soto-e376e66 youtube: 5km62e4nDaw -image: images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Medicine-to-Machine-Learning-How-Public-Learning-Turned-into-a-Career---Pastor-Soto-e376e66 apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/22Gc1bDecKA33KHAaSF9fx youtube: https://www.youtube.com/watch?v=5km62e4nDaw -season: 21 -short: 'From Medicine to Machine Learning: How Public Learning Turned into a Career' -title: 'Medicine to Machine Learning: Build Healthcare ML Portfolio, Freelance on - Upwork & Deploy to Cloud' + +description: Learn how to build a healthcare ML portfolio, land Upwork freelance gigs and deploy Dockerized models to AWS—practical tips, capstones, and career strategies +intro: How do you go from medical school to shipping production-ready healthcare ML—and get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine learning engineer and mentor who transitioned from medicine and criminology into production ML, walks through the practical steps he used to build a healthcare ML portfolio and freelance career.

We cover his career trajectory (statistician → data analyst → data engineer), the skill progression from SPSS and R to Python, and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp and public learning—publishing exercises, leaderboards, and focused content—attracted interviews and opportunities. He also breaks down portfolio tactics (Notion notes, capstone projects using healthcare datasets), production topics (Dockerized models, AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and soft skills like English communication and handling critique.

Listeners will come away with concrete, repeatable strategies for building a healthcare machine learning portfolio, landing freelance work, and deploying models to the cloud—plus time-management and mentoring practices that make it sustainable +dateadded: 2025-08-22 + +duration: PT01H01M07S + +quotableClips: +- name: Podcast Introduction & Event Announcements + startOffset: 0 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=0 + endOffset: 94 +- name: 'Guest Overview: Transition from Medicine and Criminology to Machine Learning + Career' + startOffset: 94 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=94 + endOffset: 201 +- name: 'Career Trajectory: Statistician → Data Analyst → Data Engineer' + startOffset: 201 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=201 + endOffset: 351 +- name: 'Skill Progression: SPSS, Excel, R, and Transition to Python' + startOffset: 351 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=351 + endOffset: 365 +- name: 'Freelancing Beginnings: First Upwork Gigs and Early Projects' + startOffset: 365 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=365 + endOffset: 548 +- name: 'Learning-by-Doing: Accepting Unknown Projects to Build Skills' + startOffset: 548 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=548 + endOffset: 704 +- name: 'Balancing Dual Paths: Medical School and Data Work' + startOffset: 704 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=704 + endOffset: 828 +- name: 'Medical Reasoning in Data Science: Probability, Reranking, and Triage' + startOffset: 828 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=828 + endOffset: 869 +- name: 'Communication Skills: Improving English for Remote Work' + startOffset: 869 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=869 + endOffset: 1443 +- name: 'Live Cohorts & ML Zoom Camp: Benefits of Structured, Hands-On Learning' + startOffset: 1443 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1443 + endOffset: 1647 +- name: 'Public Learning Strategy: Leaderboards, Posting, and Personal Branding' + startOffset: 1647 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1647 + endOffset: 1820 +- name: 'Content Framing: Owning Topics (ROC, Classifier Evaluation)' + startOffset: 1820 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1820 + endOffset: 1970 +- name: 'Recruiter Outreach: LinkedIn Visibility and Meta Interview Experience' + startOffset: 1970 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1970 + endOffset: 2116 +- name: 'Handling Critique: Social Media Feedback and Community Engagement' + startOffset: 2116 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2116 + endOffset: 2463 +- name: 'Portfolio Building: Notes, Notion, and Structured Content Workflow' + startOffset: 2463 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2463 + endOffset: 2868 +- name: 'Capstone Projects: Healthcare Datasets, Dockerized Models, and AWS Deployment' + startOffset: 2868 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2868 + endOffset: 3053 +- name: 'Community Contribution: Mentoring with DeepLearning.AI and Stanford Coding + Place' + startOffset: 3053 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3053 + endOffset: 3420 +- name: 'Time Management: Productivity Strategies for Medical Students and ML Learners' + startOffset: 3420 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3420 + endOffset: 3600 +- name: 'Final Reflections: Consistency, Career Next Steps, and Motivation' + startOffset: 3600 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3600 + endOffset: 3667 + transcript: - header: Podcast Introduction & Event Announcements - line: Hi everyone, welcome to our event. This event is presented by Redox Club, @@ -859,104 +945,6 @@ transcript: sec: 3667 time: '1:01:07' who: Alexey -description: Learn how to build a healthcare ML portfolio, land Upwork freelance gigs - and deploy Dockerized models to AWS—practical tips, capstones, and career strategies. -intro: How do you go from medical school to shipping production-ready healthcare ML—and - get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine - learning engineer and mentor who transitioned from medicine and criminology into - production ML, walks through the practical steps he used to build a healthcare ML - portfolio and freelance career.

We cover his career trajectory (statistician - → data analyst → data engineer), the skill progression from SPSS and R to Python, - and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp - and public learning—publishing exercises, leaderboards, and focused content—attracted - interviews and opportunities. He also breaks down portfolio tactics (Notion notes, - capstone projects using healthcare datasets), production topics (Dockerized models, - AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and - soft skills like English communication and handling critique.

Listeners - will come away with concrete, repeatable strategies for building a healthcare machine - learning portfolio, landing freelance work, and deploying models to the cloud—plus - time-management and mentoring practices that make it sustainable. -dateadded: '2025-08-22' -duration: PT01H01M07S -quotableClips: -- name: Podcast Introduction & Event Announcements - startOffset: 0 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=0 - endOffset: 94 -- name: 'Guest Overview: Transition from Medicine and Criminology to Machine Learning - Career' - startOffset: 94 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=94 - endOffset: 201 -- name: 'Career Trajectory: Statistician → Data Analyst → Data Engineer' - startOffset: 201 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=201 - endOffset: 351 -- name: 'Skill Progression: SPSS, Excel, R, and Transition to Python' - startOffset: 351 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=351 - endOffset: 365 -- name: 'Freelancing Beginnings: First Upwork Gigs and Early Projects' - startOffset: 365 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=365 - endOffset: 548 -- name: 'Learning-by-Doing: Accepting Unknown Projects to Build Skills' - startOffset: 548 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=548 - endOffset: 704 -- name: 'Balancing Dual Paths: Medical School and Data Work' - startOffset: 704 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=704 - endOffset: 828 -- name: 'Medical Reasoning in Data Science: Probability, Reranking, and Triage' - startOffset: 828 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=828 - endOffset: 869 -- name: 'Communication Skills: Improving English for Remote Work' - startOffset: 869 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=869 - endOffset: 1443 -- name: 'Live Cohorts & ML Zoom Camp: Benefits of Structured, Hands-On Learning' - startOffset: 1443 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1443 - endOffset: 1647 -- name: 'Public Learning Strategy: Leaderboards, Posting, and Personal Branding' - startOffset: 1647 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1647 - endOffset: 1820 -- name: 'Content Framing: Owning Topics (ROC, Classifier Evaluation)' - startOffset: 1820 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1820 - endOffset: 1970 -- name: 'Recruiter Outreach: LinkedIn Visibility and Meta Interview Experience' - startOffset: 1970 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1970 - endOffset: 2116 -- name: 'Handling Critique: Social Media Feedback and Community Engagement' - startOffset: 2116 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2116 - endOffset: 2463 -- name: 'Portfolio Building: Notes, Notion, and Structured Content Workflow' - startOffset: 2463 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2463 - endOffset: 2868 -- name: 'Capstone Projects: Healthcare Datasets, Dockerized Models, and AWS Deployment' - startOffset: 2868 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2868 - endOffset: 3053 -- name: 'Community Contribution: Mentoring with DeepLearning.AI and Stanford Coding - Place' - startOffset: 3053 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3053 - endOffset: 3420 -- name: 'Time Management: Productivity Strategies for Medical Students and ML Learners' - startOffset: 3420 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3420 - endOffset: 3600 -- name: 'Final Reflections: Consistency, Career Next Steps, and Motivation' - startOffset: 3600 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3600 - endOffset: 3667 --- Links: diff --git a/_podcast/s21e05-from-astronomy-to-applied-ml.md b/_podcast/to-update/s21e05-from-astronomy-to-applied-ml.md similarity index 96% rename from _podcast/s21e05-from-astronomy-to-applied-ml.md rename to _podcast/to-update/s21e05-from-astronomy-to-applied-ml.md index b9839ef3..f95e9852 100644 --- a/_podcast/s21e05-from-astronomy-to-applied-ml.md +++ b/_podcast/to-update/s21e05-from-astronomy-to-applied-ml.md @@ -1,19 +1,148 @@ --- +title: "Modern astrophysical discovery—illustrated by the challenge of finding rare radio-emitting stars—depends on the seamless integration of domain knowledge, careful physics-informed data curation, and production-grade, scalable data/ML workflows: from telescope instrumentation and multi-wavelength cross-matching through positional-uncertainty analysis, to cloud-native pipelines, reproducible tooling, and deployment. The episode’s through-line is that building curated, interpretable datasets and end-to-end infrastructure (not just models) is the essential bridge that turns complex observational data into reliable science, practical education, and transferable career skills." +short: From Astronomy to Applied ML +season: 21 episode: 5 guests: - danielegbo +image: images/podcast/s21e05-from-astronomy-to-applied-ml.jpg ids: anchor: datatalksclub/episodes/From-Astronomy-to-Applied-ML---Daniel-Egbo-e38ha20 youtube: b92gwrsVQtg -image: images/podcast/s21e05-from-astronomy-to-applied-ml.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Astronomy-to-Applied-ML---Daniel-Egbo-e38ha20 apple: https://podcasts.apple.com/us/podcast/from-astronomy-to-applied-ml-daniel-egbo/id1541710331?i=1000728601772 spotify: https://open.spotify.com/episode/0hV7d1zSKO7ykGDZxjXyJ8 youtube: https://www.youtube.com/watch?v=b92gwrsVQtg -season: 21 -short: From Astronomy to Applied ML -title: 'Detecting Radio-Emitting Stars with MEERKAT: Building ML & Cloud Data Pipelines' + +description: Discover MEERKAT radio-emitting stars using ML & cloud pipelines — learn Astropy tools, catalog cross-matching, and production deployment at scale +intro: 'How do you find rare radio-emitting stars in massive MEERKAT datasets—and turn that search into reliable machine learning and cloud data pipelines? In this episode Daniel Egbo, an astrophysicist turned ML engineer and PhD candidate at the University of Cape Town, walks through the practical intersection of astronomy, ML, and cloud engineering. We cover MEERKAT and SKA context, the electromagnetic spectrum, and the core research goal: detecting point sources in radio images and confirming them via multi-wavelength cross-matching and physics-based verification. Daniel explains positional uncertainty, foreground/background confusion, and why curated datasets are essential for future ML. He also shares tooling and infrastructure practices—Astropy, NumPy/SciPy, JupyterHub, cloud compute, orchestration with Airflow/Kestra, MinIO and Spark—and outlines an end-to-end pipeline pattern (MySQL → MinIO → Spark → warehouse). Listeners will come away with concrete methods for building reproducible astronomical data workflows, practical machine learning readiness steps, and resources for learning and deployment (edge testing, LLMs, and community courses) to apply to radio telescope and astronomical data projects.' +dateadded: 2025-09-30 + +duration: PT01H04M35S + +quotableClips: +- name: Podcast Introduction & Lunar Eclipse Anecdote + startOffset: 0 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=0 + endOffset: 73 +- name: 'Career Overview: From Nigeria to PhD in Cape Town' + startOffset: 73 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=73 + endOffset: 252 +- name: 'MEERKAT and SKA: Radio Telescope Project Overview' + startOffset: 252 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=252 + endOffset: 289 +- name: 'Electromagnetic Spectrum: Radio to Gamma Explained' + startOffset: 289 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=289 + endOffset: 379 +- name: 'Research Goal: Identifying Radio-Emitting Stars in MEERKAT Data' + startOffset: 379 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=379 + endOffset: 405 +- name: Telescope Types and Observing Constraints (Optical, Infrared, X-ray) + startOffset: 405 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=405 + endOffset: 480 +- name: Radio Telescope Site Requirements and Space-based X-ray Observatories + startOffset: 480 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=480 + endOffset: 639 +- name: 'Data Workflow: Detecting Point Sources in Radio Images' + startOffset: 639 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=639 + endOffset: 710 +- name: Cross-matching Multi-wavelength Catalogs and Positional Astronomy + startOffset: 710 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=710 + endOffset: 815 +- name: 'Positional Uncertainty: 2D Projection, Foreground/Background Confusion' + startOffset: 815 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=815 + endOffset: 930 +- name: 'Physics-based Verification: Using Prior Observations to Confirm Sources' + startOffset: 930 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=930 + endOffset: 995 +- name: Radio Stars Rarity and Sensitivity Improvements with New Telescopes + startOffset: 995 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=995 + endOffset: 1074 +- name: Building Curated Datasets as Foundation for Future Machine Learning + startOffset: 1074 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1074 + endOffset: 1291 +- name: 'Early ML Journey: Dataset Scale, Cloud Needs, and Inspiration' + startOffset: 1291 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1291 + endOffset: 1473 +- name: 'Python Astronomy Tooling: Astropy, NumPy, SciPy for Big Data' + startOffset: 1473 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1473 + endOffset: 1547 +- name: 'Cloud Computing Practices: JupyterHub and Remote Analysis' + startOffset: 1547 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1547 + endOffset: 1618 +- name: 'ML ZoomCamp Impact: Transitioning to Reusable Code and Production Practices' + startOffset: 1618 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1618 + endOffset: 1886 +- name: 'Edge Deployment Internship: Testing Models on Intel Hardware' + startOffset: 1886 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1886 + endOffset: 2018 +- name: 'LLM Exploration: LangChain, Hugging Face, RAG and Vector Databases' + startOffset: 2018 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2018 + endOffset: 2568 +- name: 'Course Projects: Orchestration with Kestra, Airflow, MinIO and Spark' + startOffset: 2568 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2568 + endOffset: 2648 +- name: Airflow 3.0 Setup Experience and Astronomer CLI Learnings + startOffset: 2648 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2648 + endOffset: 2715 +- name: 'End-to-End Pipeline Example: MySQL → MinIO → Spark → Warehouse (dbt next)' + startOffset: 2715 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2715 + endOffset: 2859 +- name: 'AI Training Ecosystem: LangChain Academy, Arize, NVIDIA Deep Learning Institute' + startOffset: 2859 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2859 + endOffset: 3020 +- name: 'Student Benefits: Free NVIDIA Courses and Deploying on GPUs (A100/H100)' + startOffset: 3020 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3020 + endOffset: 3121 +- name: 'BRICS Astronomy Bootcamp: Beginner-Friendly Data Analytics Program' + startOffset: 3121 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3121 + endOffset: 3312 +- name: 'Sharing Projects: Colab Notebooks, Public Portfolios and GitHub Visibility' + startOffset: 3312 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3312 + endOffset: 3479 +- name: 'Career Advice: Learn Python, Do Structured Projects, Leverage Domain Knowledge' + startOffset: 3479 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3479 + endOffset: 3621 +- name: 'Tools & Sponsors: Data Load Tool for Pipelines and Community Support' + startOffset: 3621 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3621 + endOffset: 3669 +- name: 'Learning Resources: Astropy Tutorials, Course GitHub and YouTube Archive' + startOffset: 3669 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3669 + endOffset: 3742 +- name: 'Closing Remarks: Encouragement to Share Progress and Course Availability' + startOffset: 3742 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3742 + endOffset: 3875 + transcript: - header: Podcast Introduction & Lunar Eclipse Anecdote - line: Hi everyone, welcome to our event. This event is brought to you by Data Talks @@ -1153,146 +1282,6 @@ transcript: sec: 3875 time: '1:04:35' who: Alexey -description: Discover MEERKAT radio-emitting stars using ML & cloud pipelines — learn - Astropy tools, catalog cross-matching, and production deployment at scale. -intro: 'How do you find rare radio-emitting stars in massive MEERKAT datasets—and - turn that search into reliable machine learning and cloud data pipelines? In this - episode Daniel Egbo, an astrophysicist turned ML engineer and PhD candidate at the - University of Cape Town, walks through the practical intersection of astronomy, - ML, and cloud engineering. We cover MEERKAT and SKA context, the electromagnetic - spectrum, and the core research goal: detecting point sources in radio images and - confirming them via multi-wavelength cross-matching and physics-based verification. - Daniel explains positional uncertainty, foreground/background confusion, and why - curated datasets are essential for future ML. He also shares tooling and infrastructure - practices—Astropy, NumPy/SciPy, JupyterHub, cloud compute, orchestration with Airflow/Kestra, - MinIO and Spark—and outlines an end-to-end pipeline pattern (MySQL → MinIO → Spark - → warehouse). Listeners will come away with concrete methods for building reproducible - astronomical data workflows, practical machine learning readiness steps, and resources - for learning and deployment (edge testing, LLMs, and community courses) to apply - to radio telescope and astronomical data projects.' -dateadded: '2025-09-30' -duration: PT01H04M35S -quotableClips: -- name: Podcast Introduction & Lunar Eclipse Anecdote - startOffset: 0 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=0 - endOffset: 73 -- name: 'Career Overview: From Nigeria to PhD in Cape Town' - startOffset: 73 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=73 - endOffset: 252 -- name: 'MEERKAT and SKA: Radio Telescope Project Overview' - startOffset: 252 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=252 - endOffset: 289 -- name: 'Electromagnetic Spectrum: Radio to Gamma Explained' - startOffset: 289 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=289 - endOffset: 379 -- name: 'Research Goal: Identifying Radio-Emitting Stars in MEERKAT Data' - startOffset: 379 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=379 - endOffset: 405 -- name: Telescope Types and Observing Constraints (Optical, Infrared, X-ray) - startOffset: 405 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=405 - endOffset: 480 -- name: Radio Telescope Site Requirements and Space-based X-ray Observatories - startOffset: 480 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=480 - endOffset: 639 -- name: 'Data Workflow: Detecting Point Sources in Radio Images' - startOffset: 639 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=639 - endOffset: 710 -- name: Cross-matching Multi-wavelength Catalogs and Positional Astronomy - startOffset: 710 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=710 - endOffset: 815 -- name: 'Positional Uncertainty: 2D Projection, Foreground/Background Confusion' - startOffset: 815 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=815 - endOffset: 930 -- name: 'Physics-based Verification: Using Prior Observations to Confirm Sources' - startOffset: 930 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=930 - endOffset: 995 -- name: Radio Stars Rarity and Sensitivity Improvements with New Telescopes - startOffset: 995 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=995 - endOffset: 1074 -- name: Building Curated Datasets as Foundation for Future Machine Learning - startOffset: 1074 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1074 - endOffset: 1291 -- name: 'Early ML Journey: Dataset Scale, Cloud Needs, and Inspiration' - startOffset: 1291 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1291 - endOffset: 1473 -- name: 'Python Astronomy Tooling: Astropy, NumPy, SciPy for Big Data' - startOffset: 1473 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1473 - endOffset: 1547 -- name: 'Cloud Computing Practices: JupyterHub and Remote Analysis' - startOffset: 1547 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1547 - endOffset: 1618 -- name: 'ML ZoomCamp Impact: Transitioning to Reusable Code and Production Practices' - startOffset: 1618 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1618 - endOffset: 1886 -- name: 'Edge Deployment Internship: Testing Models on Intel Hardware' - startOffset: 1886 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1886 - endOffset: 2018 -- name: 'LLM Exploration: LangChain, Hugging Face, RAG and Vector Databases' - startOffset: 2018 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2018 - endOffset: 2568 -- name: 'Course Projects: Orchestration with Kestra, Airflow, MinIO and Spark' - startOffset: 2568 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2568 - endOffset: 2648 -- name: Airflow 3.0 Setup Experience and Astronomer CLI Learnings - startOffset: 2648 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2648 - endOffset: 2715 -- name: 'End-to-End Pipeline Example: MySQL → MinIO → Spark → Warehouse (dbt next)' - startOffset: 2715 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2715 - endOffset: 2859 -- name: 'AI Training Ecosystem: LangChain Academy, Arize, NVIDIA Deep Learning Institute' - startOffset: 2859 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2859 - endOffset: 3020 -- name: 'Student Benefits: Free NVIDIA Courses and Deploying on GPUs (A100/H100)' - startOffset: 3020 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3020 - endOffset: 3121 -- name: 'BRICS Astronomy Bootcamp: Beginner-Friendly Data Analytics Program' - startOffset: 3121 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3121 - endOffset: 3312 -- name: 'Sharing Projects: Colab Notebooks, Public Portfolios and GitHub Visibility' - startOffset: 3312 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3312 - endOffset: 3479 -- name: 'Career Advice: Learn Python, Do Structured Projects, Leverage Domain Knowledge' - startOffset: 3479 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3479 - endOffset: 3621 -- name: 'Tools & Sponsors: Data Load Tool for Pipelines and Community Support' - startOffset: 3621 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3621 - endOffset: 3669 -- name: 'Learning Resources: Astropy Tutorials, Course GitHub and YouTube Archive' - startOffset: 3669 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3669 - endOffset: 3742 -- name: 'Closing Remarks: Encouragement to Share Progress and Course Availability' - startOffset: 3742 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3742 - endOffset: 3875 --- Links: diff --git a/_podcast/s21e07-lessons-from-two-decades-of-ai.md b/_podcast/to-update/s21e07-lessons-from-two-decades-of-ai.md similarity index 94% rename from _podcast/s21e07-lessons-from-two-decades-of-ai.md rename to _podcast/to-update/s21e07-lessons-from-two-decades-of-ai.md index 24017ec1..36c3384b 100644 --- a/_podcast/s21e07-lessons-from-two-decades-of-ai.md +++ b/_podcast/to-update/s21e07-lessons-from-two-decades-of-ai.md @@ -1,20 +1,146 @@ --- +title: "Context: The episode follows a two-decade arc from game-AI research and evolutionary/RL methods through industry product leadership to present work on LLM-driven multi‑agent assistants—covering technical deep dives (prompt engineering, orchestration vs flow, sequential thinking servers, code generation, procedural content), tooling and deployment challenges (local models, model specialization, monitoring), and career/publishing lessons. + +Core narrative: The unifying idea is that practical, production‑ready AI agents are built by applying game‑AI engineering principles—minimal, modular task decomposition; evolutionary and learning‑based search; and clear orchestration patterns—to modern LLMs and multi‑agent systems, balancing creative capabilities with efficiency, tooling, and real‑world deployability." +short: Lessons from Two Decades of AI +season: 21 episode: 7 guests: - micheallanham +image: images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg ids: anchor: datatalksclub/episodes/Lessons-from-Two-Decades-of-AI---Micheal-Lanham-e38oarc youtube: DSxqUlumM3A -image: images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Lessons-from-Two-Decades-of-AI---Micheal-Lanham-e38oarc apple: https://podcasts.apple.com/us/podcast/lessons-from-two-decades-of-ai-micheal-lanham/id1541710331?i=1000728604349 spotify: https://open.spotify.com/episode/7uhe5ZysRi07S6mb14nnox youtube: https://www.youtube.com/watch?v=DSxqUlumM3A -season: 21 -short: Lessons from Two Decades of AI -title: 'Build Multi-Agent AI Assistants: Game AI Roots, Evolutionary Algorithms & - Practical LLM Tooling' + +description: Discover multi-agent AI, evolutionary algorithms and LLM tooling—learn agent workflows, prompt engineering, game AI examples, code patterns & career tips +intro: How do you design practical multi-agent AI assistants that scale from game AI experiments to real-world LLM tooling? In this episode, Micheal Lanham — best‑selling author and AI engineer with two decades of work across games, graphics, GIS and machine learning — traces the path from game AI and reinforcement learning to evolutionary algorithms and modern agent architectures. We explore his research on games for cognitive testing, evolutionary deep learning for hyperparameter and architecture search, and how those methods inform prompt engineering and multi-agent workflows.

Key topics include minimalist agent workflow design and task decomposition, flow versus orchestration, parallel collaboration patterns, agent tooling such as the OpenAI Agent SDK and MCP integration, sequential “thinking” servers and scratchpads, plus practical code examples from game development and GPT-5 Pro case studies. The conversation also covers generative AI in games, local and open‑source LLM trends, model specialization, and evaluation/monitoring pipelines. Whether you’re building AI assistants, experimenting with evolutionary algorithms, or integrating LLM tooling into products, this episode offers concrete techniques, tooling insights, and career guidance for AI engineers +dateadded: 2025-10-01 + +duration: PT01H48S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=0 + endOffset: 67 +- name: 'Career Snapshot: Two Decades from Game AI to AI Agents' + startOffset: 67 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=67 + endOffset: 156 +- name: 'Early Research: Games for Cognitive Testing & Neural Networks' + startOffset: 156 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=156 + endOffset: 195 +- name: 'Industry Experience: Consulting, Product Development, Leadership' + startOffset: 195 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=195 + endOffset: 259 +- name: Evolutionary Algorithms in Industry Optimization + startOffset: 259 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=259 + endOffset: 328 +- name: 'Current Focus: Multi-Agent AI Support Assistants' + startOffset: 328 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=328 + endOffset: 345 +- name: 'Publishing Breakthrough: Reverse-Engineering Pokémon Go & AR' + startOffset: 345 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=345 + endOffset: 456 +- name: Sound Design & Waveform Analysis Applied to Games + startOffset: 456 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=456 + endOffset: 481 +- name: Reinforcement Learning Roots and Alberta Research + startOffset: 481 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=481 + endOffset: 549 +- name: 'Evolutionary Deep Learning: Hyperparameter Search & Architecture Tuning' + startOffset: 549 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=549 + endOffset: 600 +- name: 'Move to NLP: Early LLM Work and Rise of AI Agents' + startOffset: 600 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=600 + endOffset: 849 +- name: Evolutionary Algorithms for Prompt Engineering + startOffset: 849 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=849 + endOffset: 1099 +- name: 'AI Agents Book: Editions, Teaching, and Vibe Coding for Games' + startOffset: 1099 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1099 + endOffset: 1257 +- name: 'Agent Workflow Design: Minimalism and Task Decomposition' + startOffset: 1257 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1257 + endOffset: 1428 +- name: 'Flow vs Orchestration: Sequential Pipelines and Manager Agents' + startOffset: 1428 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1428 + endOffset: 1585 +- name: 'Collaboration Patterns: Parallel Agent Interaction & Use Cases' + startOffset: 1585 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1585 + endOffset: 1891 +- name: 'Agent Tooling: OpenAI Agent SDK and MCP Integration' + startOffset: 1891 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1891 + endOffset: 2005 +- name: 'Sequential Thinking Servers: Internal Reasoning & Scratchpads' + startOffset: 2005 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2005 + endOffset: 2142 +- name: 'Coding Agents in Game Development: Practical Examples' + startOffset: 2142 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2142 + endOffset: 2218 +- name: 'End-to-End Code Generation: GPT-5 Pro Case Studies' + startOffset: 2218 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2218 + endOffset: 2337 +- name: 'Generative AI in Games: Procedural Content and Infinite Playability' + startOffset: 2337 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2337 + endOffset: 2502 +- name: 'Technical Challenges: Implementing Space Invaders with Agents' + startOffset: 2502 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2502 + endOffset: 2740 +- name: 'Local Model Trend: Running LLMs on Private GPUs' + startOffset: 2740 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2740 + endOffset: 2774 +- name: Open-Source Large Models and Low-Latency Providers + startOffset: 2774 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2774 + endOffset: 2920 +- name: 'Model Specialization: Smaller Task-Focused LLMs Emerging' + startOffset: 2920 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2920 + endOffset: 3316 +- name: 'Career Advice: Transitioning to AI Engineering & LLM Skills' + startOffset: 3316 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3316 + endOffset: 3459 +- name: 'Evaluation & Monitoring: Feedback Pipelines and Tools (Arize Phoenix)' + startOffset: 3459 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3459 + endOffset: 3530 +- name: 'Publishing Details: Second Edition and Availability' + startOffset: 3530 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3530 + endOffset: 3623 +- name: Closing Remarks and Links + startOffset: 3623 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3623 + endOffset: 3648 + transcript: - header: Podcast Introduction - line: Hi everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -909,143 +1035,6 @@ transcript: sec: 3648 time: '1:00:48' who: Michael -description: Discover multi-agent AI, evolutionary algorithms and LLM tooling—learn - agent workflows, prompt engineering, game AI examples, code patterns & career tips. -intro: How do you design practical multi-agent AI assistants that scale from game - AI experiments to real-world LLM tooling? In this episode, Micheal Lanham — best‑selling - author and AI engineer with two decades of work across games, graphics, GIS and - machine learning — traces the path from game AI and reinforcement learning to evolutionary - algorithms and modern agent architectures. We explore his research on games for - cognitive testing, evolutionary deep learning for hyperparameter and architecture - search, and how those methods inform prompt engineering and multi-agent workflows. -

Key topics include minimalist agent workflow design and task decomposition, - flow versus orchestration, parallel collaboration patterns, agent tooling such as - the OpenAI Agent SDK and MCP integration, sequential “thinking” servers and scratchpads, - plus practical code examples from game development and GPT-5 Pro case studies. The - conversation also covers generative AI in games, local and open‑source LLM trends, - model specialization, and evaluation/monitoring pipelines. Whether you’re building - AI assistants, experimenting with evolutionary algorithms, or integrating LLM tooling - into products, this episode offers concrete techniques, tooling insights, and career - guidance for AI engineers. -dateadded: '2025-10-01' -duration: PT01H48S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=0 - endOffset: 67 -- name: 'Career Snapshot: Two Decades from Game AI to AI Agents' - startOffset: 67 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=67 - endOffset: 156 -- name: 'Early Research: Games for Cognitive Testing & Neural Networks' - startOffset: 156 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=156 - endOffset: 195 -- name: 'Industry Experience: Consulting, Product Development, Leadership' - startOffset: 195 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=195 - endOffset: 259 -- name: Evolutionary Algorithms in Industry Optimization - startOffset: 259 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=259 - endOffset: 328 -- name: 'Current Focus: Multi-Agent AI Support Assistants' - startOffset: 328 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=328 - endOffset: 345 -- name: 'Publishing Breakthrough: Reverse-Engineering Pokémon Go & AR' - startOffset: 345 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=345 - endOffset: 456 -- name: Sound Design & Waveform Analysis Applied to Games - startOffset: 456 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=456 - endOffset: 481 -- name: Reinforcement Learning Roots and Alberta Research - startOffset: 481 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=481 - endOffset: 549 -- name: 'Evolutionary Deep Learning: Hyperparameter Search & Architecture Tuning' - startOffset: 549 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=549 - endOffset: 600 -- name: 'Move to NLP: Early LLM Work and Rise of AI Agents' - startOffset: 600 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=600 - endOffset: 849 -- name: Evolutionary Algorithms for Prompt Engineering - startOffset: 849 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=849 - endOffset: 1099 -- name: 'AI Agents Book: Editions, Teaching, and Vibe Coding for Games' - startOffset: 1099 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1099 - endOffset: 1257 -- name: 'Agent Workflow Design: Minimalism and Task Decomposition' - startOffset: 1257 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1257 - endOffset: 1428 -- name: 'Flow vs Orchestration: Sequential Pipelines and Manager Agents' - startOffset: 1428 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1428 - endOffset: 1585 -- name: 'Collaboration Patterns: Parallel Agent Interaction & Use Cases' - startOffset: 1585 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1585 - endOffset: 1891 -- name: 'Agent Tooling: OpenAI Agent SDK and MCP Integration' - startOffset: 1891 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1891 - endOffset: 2005 -- name: 'Sequential Thinking Servers: Internal Reasoning & Scratchpads' - startOffset: 2005 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2005 - endOffset: 2142 -- name: 'Coding Agents in Game Development: Practical Examples' - startOffset: 2142 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2142 - endOffset: 2218 -- name: 'End-to-End Code Generation: GPT-5 Pro Case Studies' - startOffset: 2218 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2218 - endOffset: 2337 -- name: 'Generative AI in Games: Procedural Content and Infinite Playability' - startOffset: 2337 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2337 - endOffset: 2502 -- name: 'Technical Challenges: Implementing Space Invaders with Agents' - startOffset: 2502 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2502 - endOffset: 2740 -- name: 'Local Model Trend: Running LLMs on Private GPUs' - startOffset: 2740 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2740 - endOffset: 2774 -- name: Open-Source Large Models and Low-Latency Providers - startOffset: 2774 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2774 - endOffset: 2920 -- name: 'Model Specialization: Smaller Task-Focused LLMs Emerging' - startOffset: 2920 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2920 - endOffset: 3316 -- name: 'Career Advice: Transitioning to AI Engineering & LLM Skills' - startOffset: 3316 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3316 - endOffset: 3459 -- name: 'Evaluation & Monitoring: Feedback Pipelines and Tools (Arize Phoenix)' - startOffset: 3459 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3459 - endOffset: 3530 -- name: 'Publishing Details: Second Edition and Availability' - startOffset: 3530 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3530 - endOffset: 3623 -- name: Closing Remarks and Links - startOffset: 3623 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3623 - endOffset: 3648 --- Links: diff --git a/_podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md b/_podcast/to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md similarity index 96% rename from _podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md rename to _podcast/to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md index f4ae25f8..c3dd2044 100644 --- a/_podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md +++ b/_podcast/to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md @@ -1,20 +1,140 @@ --- +title: "A single through-line: the episode is about a hands‑on, end‑to‑end journey into applied machine learning — a multidisciplinary career pivot powered by self‑education and cohort/community support that takes messy, high‑frequency industrial data through pragmatic tool‑building, model development, explainability tradeoffs, and MLOps (APIs, containers, Terraform, ONNX) into real production impact, with a commitment to teaching and scaling that practice to others." +short: 'From Semiconductors to Machine Learning: A Career in Data and Teaching' +season: 21 episode: 8 guests: - dashelruizperez +image: images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg ids: anchor: datatalksclub/episodes/From-Semiconductors-to-Machine-Learning-A-Career-in-Data-and-Teaching-e395t53 youtube: B2tzuUg5uZs -image: images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Semiconductors-to-Machine-Learning-A-Career-in-Data-and-Teaching-e395t53 apple: https://podcasts.apple.com/us/podcast/from-semiconductors-to-machine-learning-a-career-in/id1541710331?i=1000731197034 spotify: https://open.spotify.com/episode/1znRtNRf5IUYcBblJYH53r youtube: https://www.youtube.com/watch?v=B2tzuUg5uZs -season: 21 -short: 'From Semiconductors to Machine Learning: A Career in Data and Teaching' -title: 'Predictive Maintenance & Yield Analytics for Semiconductors: Deploy ML with - Flask, Docker & MLOps' + +description: 'Learn predictive maintenance & yield analytics for semiconductors: deploy ML with Flask, Docker & MLOps to boost yield, enable explainability, and ship APIs.' +intro: 'How do you move machine learning for predictive maintenance and yield analytics out of a notebook and into production on the fab floor? In this episode, Dashel Ruiz Perez—data analyst, ML engineer, and educator who spent nearly a decade at Microchip Technology—walks through practical steps for deploying ML to improve semiconductor yield. Drawing on millisecond tool logs, process telemetry, and a “Wafers at Risk” predictive model, Dashel explains how to build explainable yield analytics, iterate with Kaggle-style EDA and feature engineering, and ensure model portability with ONNX.

Listen for concrete implementation details: turning models into Flask REST APIs, containerizing with Docker, using Google Cloud and Terraform for infrastructure automation, and MLOps best practices for production monitoring. Dashel also covers hands-on learning paths from ML Zoomcamp—course deliverables beyond Jupyter notebooks, common roadblocks (Mac M1 issues, wide categorical data), and examples like a COVID comorbidity API demo and a TensorFlow computer vision project. If you’re responsible for semiconductor predictive maintenance, yield analytics, or ML deployment, this episode gives actionable guidance on tools, workflows, and learning strategies to get models reliably running in production.' +dateadded: 2025-10-21 + +duration: PT01H13M08S + +quotableClips: +- name: Podcast Introduction & Data Docs Club + startOffset: 0 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=0 + endOffset: 111 +- name: 'Guest Overview: Multidisciplinary Career Snapshot' + startOffset: 111 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=111 + endOffset: 178 +- name: 'Career Pivot: From Classical Guitarist to Tech in Portland' + startOffset: 178 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=178 + endOffset: 289 +- name: 'Semiconductor Onboarding: Expediter Role and Fab Floor Experience' + startOffset: 289 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=289 + endOffset: 349 +- name: 'Fab Data Exposure: Millisecond Tool Logs and Process Telemetry' + startOffset: 349 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=349 + endOffset: 376 +- name: 'Self-Education: Learning English and Computer Science' + startOffset: 376 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=376 + endOffset: 704 +- name: 'Automation Initiative: Building a Java Tool for CMP Calculations' + startOffset: 704 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=704 + endOffset: 923 +- name: 'Yield Analytics: JMP, Oracle, and Cross-Area Data Access' + startOffset: 923 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=923 + endOffset: 1262 +- name: 'ML Introduction: Academic AI Project and Predictive Interest' + startOffset: 1262 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1262 + endOffset: 1409 +- name: 'Predictive Maintenance: "Wafers at Risk" Model for Yield Improvement' + startOffset: 1409 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1409 + endOffset: 1516 +- name: 'Explainability Dilemma: Tweaking Models vs. Understanding Results' + startOffset: 1516 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1516 + endOffset: 1773 +- name: 'Course Selection: Choosing ML Zoomcamp Cohort Experience' + startOffset: 1773 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1773 + endOffset: 1942 +- name: 'Applied Curriculum: Deliverable ML Beyond Jupyter Notebooks' + startOffset: 1942 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1942 + endOffset: 2074 +- name: 'Learning Support: Slack Q&A, Cohorts, and Peer Study Groups' + startOffset: 2074 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2074 + endOffset: 2249 +- name: 'Production Focus: Flask REST API, Docker, and Google Cloud' + startOffset: 2249 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2249 + endOffset: 2392 +- name: 'Midterm Demo: COVID Comorbidity Model Deployed as an API' + startOffset: 2392 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2392 + endOffset: 2676 +- name: 'Infrastructure Automation: Terraform and MLOps Takeaways' + startOffset: 2676 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2676 + endOffset: 2904 +- name: 'Computer Vision Project: Butterfly Image Classification (TensorFlow)' + startOffset: 2904 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2904 + endOffset: 3070 +- name: 'Kaggle Workflow: EDA, Feature Engineering, and Model Iteration' + startOffset: 3070 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3070 + endOffset: 3113 +- name: 'Model Portability: ONNX for Framework Interoperability' + startOffset: 3113 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3113 + endOffset: 3203 +- name: 'Full-Stack ML Skills: Docker, VMs, Databases, and Deployment' + startOffset: 3203 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3203 + endOffset: 3265 +- name: 'Common Roadblocks: Mac M1 Issues and Wide Categorical Data' + startOffset: 3265 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3265 + endOffset: 3362 +- name: 'Time Commitment: Homework Strategy and Active Video Learning' + startOffset: 3362 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3362 + endOffset: 3487 +- name: 'Community Value: Rapid Help, Code Reviews, and Study Groups' + startOffset: 3487 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3487 + endOffset: 3631 +- name: 'Motivation Techniques: Public Learning and Project Accountability' + startOffset: 3631 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3631 + endOffset: 4056 +- name: 'Teaching Ambition: Creating High-Quality Spanish ML Content' + startOffset: 4056 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4056 + endOffset: 4228 +- name: 'Upcoming Offerings: AI-for-Developers, React, and LLM Coding' + startOffset: 4228 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4228 + endOffset: 4369 +- name: 'Closing Remarks: Course Endorsement and Next Steps' + startOffset: 4369 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4369 + endOffset: 4388 + transcript: - header: Podcast Introduction & Data Docs Club - line: Hi everyone, welcome to our event. This event is brought to you by the Data @@ -1026,139 +1146,6 @@ transcript: sec: 4388 time: '1:13:08' who: Dashel -description: 'Learn predictive maintenance & yield analytics for semiconductors: deploy - ML with Flask, Docker & MLOps to boost yield, enable explainability, and ship APIs.' -intro: 'How do you move machine learning for predictive maintenance and yield analytics - out of a notebook and into production on the fab floor? In this episode, Dashel Ruiz - Perez—data analyst, ML engineer, and educator who spent nearly a decade at Microchip - Technology—walks through practical steps for deploying ML to improve semiconductor - yield. Drawing on millisecond tool logs, process telemetry, and a “Wafers at Risk” - predictive model, Dashel explains how to build explainable yield analytics, iterate - with Kaggle-style EDA and feature engineering, and ensure model portability with - ONNX.

Listen for concrete implementation details: turning models into Flask - REST APIs, containerizing with Docker, using Google Cloud and Terraform for infrastructure - automation, and MLOps best practices for production monitoring. Dashel also covers - hands-on learning paths from ML Zoomcamp—course deliverables beyond Jupyter notebooks, - common roadblocks (Mac M1 issues, wide categorical data), and examples like a COVID - comorbidity API demo and a TensorFlow computer vision project. If you’re responsible - for semiconductor predictive maintenance, yield analytics, or ML deployment, this - episode gives actionable guidance on tools, workflows, and learning strategies to - get models reliably running in production.' -dateadded: '2025-10-21' -duration: PT01H13M08S -quotableClips: -- name: Podcast Introduction & Data Docs Club - startOffset: 0 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=0 - endOffset: 111 -- name: 'Guest Overview: Multidisciplinary Career Snapshot' - startOffset: 111 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=111 - endOffset: 178 -- name: 'Career Pivot: From Classical Guitarist to Tech in Portland' - startOffset: 178 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=178 - endOffset: 289 -- name: 'Semiconductor Onboarding: Expediter Role and Fab Floor Experience' - startOffset: 289 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=289 - endOffset: 349 -- name: 'Fab Data Exposure: Millisecond Tool Logs and Process Telemetry' - startOffset: 349 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=349 - endOffset: 376 -- name: 'Self-Education: Learning English and Computer Science' - startOffset: 376 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=376 - endOffset: 704 -- name: 'Automation Initiative: Building a Java Tool for CMP Calculations' - startOffset: 704 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=704 - endOffset: 923 -- name: 'Yield Analytics: JMP, Oracle, and Cross-Area Data Access' - startOffset: 923 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=923 - endOffset: 1262 -- name: 'ML Introduction: Academic AI Project and Predictive Interest' - startOffset: 1262 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1262 - endOffset: 1409 -- name: 'Predictive Maintenance: "Wafers at Risk" Model for Yield Improvement' - startOffset: 1409 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1409 - endOffset: 1516 -- name: 'Explainability Dilemma: Tweaking Models vs. Understanding Results' - startOffset: 1516 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1516 - endOffset: 1773 -- name: 'Course Selection: Choosing ML Zoomcamp Cohort Experience' - startOffset: 1773 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1773 - endOffset: 1942 -- name: 'Applied Curriculum: Deliverable ML Beyond Jupyter Notebooks' - startOffset: 1942 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1942 - endOffset: 2074 -- name: 'Learning Support: Slack Q&A, Cohorts, and Peer Study Groups' - startOffset: 2074 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2074 - endOffset: 2249 -- name: 'Production Focus: Flask REST API, Docker, and Google Cloud' - startOffset: 2249 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2249 - endOffset: 2392 -- name: 'Midterm Demo: COVID Comorbidity Model Deployed as an API' - startOffset: 2392 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2392 - endOffset: 2676 -- name: 'Infrastructure Automation: Terraform and MLOps Takeaways' - startOffset: 2676 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2676 - endOffset: 2904 -- name: 'Computer Vision Project: Butterfly Image Classification (TensorFlow)' - startOffset: 2904 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2904 - endOffset: 3070 -- name: 'Kaggle Workflow: EDA, Feature Engineering, and Model Iteration' - startOffset: 3070 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3070 - endOffset: 3113 -- name: 'Model Portability: ONNX for Framework Interoperability' - startOffset: 3113 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3113 - endOffset: 3203 -- name: 'Full-Stack ML Skills: Docker, VMs, Databases, and Deployment' - startOffset: 3203 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3203 - endOffset: 3265 -- name: 'Common Roadblocks: Mac M1 Issues and Wide Categorical Data' - startOffset: 3265 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3265 - endOffset: 3362 -- name: 'Time Commitment: Homework Strategy and Active Video Learning' - startOffset: 3362 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3362 - endOffset: 3487 -- name: 'Community Value: Rapid Help, Code Reviews, and Study Groups' - startOffset: 3487 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3487 - endOffset: 3631 -- name: 'Motivation Techniques: Public Learning and Project Accountability' - startOffset: 3631 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3631 - endOffset: 4056 -- name: 'Teaching Ambition: Creating High-Quality Spanish ML Content' - startOffset: 4056 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4056 - endOffset: 4228 -- name: 'Upcoming Offerings: AI-for-Developers, React, and LLM Coding' - startOffset: 4228 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4228 - endOffset: 4369 -- name: 'Closing Remarks: Course Endorsement and Next Steps' - startOffset: 4369 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4369 - endOffset: 4388 --- Links: diff --git a/_podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md b/_podcast/to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md similarity index 96% rename from _podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md rename to _podcast/to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md index 110d3518..15b3df23 100644 --- a/_podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md +++ b/_podcast/to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md @@ -1,20 +1,113 @@ --- +title: "The episode’s central idea is the pragmatic, end-to-end translation of data science and ML research into real-world product impact: owning the full stack from instrumentation and data collection through real-time streaming inference, experimentation, and rollout, while making practical engineering trade-offs (hardware, platforms, team roles) and modeling human behavior and incentives to drive measurable engagement and business outcomes." +short: 'From Theme Parks to Tesla: Building Data Products That Work' +season: 21 episode: 9 guests: - abouzarabbaspour +image: images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg ids: anchor: datatalksclub/episodes/From-Theme-Parks-to-Tesla-Building-Data-Products-That-Work-e395qme youtube: gXvVMvhfrIY -image: images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Theme-Parks-to-Tesla-Building-Data-Products-That-Work-e395qme apple: https://podcasts.apple.com/us/podcast/from-theme-parks-to-tesla-building-data-products-that-work/id1541710331?i=1000731198436 spotify: https://open.spotify.com/episode/5dpBs4xr3zMkBDw6cTYHQE?si=pivilqeDTHOiNCBb1bFHdA youtube: https://www.youtube.com/watch?v=gXvVMvhfrIY -season: 21 -short: 'From Theme Parks to Tesla: Building Data Products That Work' -title: Optimize Visitor Flow with Theme Park Crowd Modeling, Queue Prediction & Real-Time - Recommendations + +description: Discover crowd modeling, queue prediction and real-time recommendations to optimize visitor flow, reduce wait times and boost engagement with smart routing +intro: 'How can theme parks use data to cut wait times and guide visitors in real time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and data engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) and automotive (Tesla) — walks through building systems that optimize visitor flow using crowd modeling, queue prediction and real-time recommendations.

We cover the core problems of modeling crowd dynamics and ride capacity, designing a next-best-action visitor routing engine, and using behavioral route modeling and probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: incentivizing app adoption to collect data, validating recommendations with employee swiping experiments and A/B tests, and running streaming pipelines for live experiments and rollout (engagement metrics and accuracy measurement). He also touches on deployment concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines — and how these engineering choices affect measurement and user experience.

Listen to learn concrete approaches for queue prediction, visitor routing, real-time processing, and experimentation so you can design and validate systems that improve throughput and guest satisfaction.' +dateadded: 2025-10-21 + +duration: PT01H35S + +quotableClips: +- name: Podcast Introduction & Event Info + startOffset: 0 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=0 + endOffset: 77 +- name: 'Early Career: Software Engineering to Data Science' + startOffset: 77 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=77 + endOffset: 126 +- name: 'Academic Path: Professional Doctorate & TU Berlin' + startOffset: 126 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=126 + endOffset: 288 +- name: 'Research Partnerships: Industry Projects and Applied Research' + startOffset: 288 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=288 + endOffset: 377 +- name: 'Efteling Insights: Theme Park Tech and Experience Design' + startOffset: 377 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=377 + endOffset: 456 +- name: 'Crowd Modeling: Queue Prediction and Ride Capacity' + startOffset: 456 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=456 + endOffset: 779 +- name: 'Visitor Routing: Next-Best-Action Recommendation System' + startOffset: 779 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=779 + endOffset: 890 +- name: App Adoption & Incentives for Data Collection + startOffset: 890 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=890 + endOffset: 1000 +- name: Behavioral Route Modeling & Probabilistic Recommendations + startOffset: 1000 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1000 + endOffset: 1109 +- name: 'E-commerce Recs: Bol.com Favorite-Brand Carousel' + startOffset: 1109 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1109 + endOffset: 1443 +- name: 'Recommendation Validation: Employee Swiping Experiment & A/B Testing' + startOffset: 1443 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1443 + endOffset: 1561 +- name: 'Real-time Processing: Streaming for Live Experiments' + startOffset: 1561 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1561 + endOffset: 1879 +- name: 'Measurement & Rollout: Engagement Metrics and Accuracy Results' + startOffset: 1879 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1879 + endOffset: 2001 +- name: 'Role at Tesla: Data Engineering vs. ML Engineering Responsibilities' + startOffset: 2001 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2001 + endOffset: 2061 +- name: 'Full-Stack Data Work: Building Apps, Instrumentation, and Deployment' + startOffset: 2061 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2061 + endOffset: 2503 +- name: 'LLMs & AI-Assisted Development: Productivity Gains and Risks' + startOffset: 2503 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2503 + endOffset: 2766 +- name: 'On-Prem Inference Hardware: Raspberry Pi, Jetson Orin, Mac Mini' + startOffset: 2766 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2766 + endOffset: 2993 +- name: 'Models & Platforms: LLaMA, Code Models, and Replicate' + startOffset: 2993 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2993 + endOffset: 3243 +- name: 'Interview Preparation: Tesla Data Engineering Expectations (Architecture, + ETL, Scripting)' + startOffset: 3243 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3243 + endOffset: 3463 +- name: 'Career Strategy: Prioritization, Learning Opportunities, Underpromise & Overdeliver' + startOffset: 3463 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3463 + endOffset: 3610 +- name: Episode Closing & Key Takeaways + startOffset: 3610 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3610 + endOffset: 3635 + transcript: - header: Podcast Introduction & Event Info - line: Hi everyone, welcome to our event. This event is brought to you by Data Docs @@ -1135,112 +1228,6 @@ transcript: sec: 3635 time: '1:00:35' who: Abouzar -description: Discover crowd modeling, queue prediction and real-time recommendations - to optimize visitor flow, reduce wait times and boost engagement with smart routing. -intro: 'How can theme parks use data to cut wait times and guide visitors in real - time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and data - engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) - and automotive (Tesla) — walks through building systems that optimize visitor flow - using crowd modeling, queue prediction and real-time recommendations.

We - cover the core problems of modeling crowd dynamics and ride capacity, designing - a next-best-action visitor routing engine, and using behavioral route modeling and - probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: - incentivizing app adoption to collect data, validating recommendations with employee - swiping experiments and A/B tests, and running streaming pipelines for live experiments - and rollout (engagement metrics and accuracy measurement). He also touches on deployment - concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines - — and how these engineering choices affect measurement and user experience.

- Listen to learn concrete approaches for queue prediction, visitor routing, real-time - processing, and experimentation so you can design and validate systems that improve - throughput and guest satisfaction.' -dateadded: '2025-10-21' -duration: PT01H35S -quotableClips: -- name: Podcast Introduction & Event Info - startOffset: 0 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=0 - endOffset: 77 -- name: 'Early Career: Software Engineering to Data Science' - startOffset: 77 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=77 - endOffset: 126 -- name: 'Academic Path: Professional Doctorate & TU Berlin' - startOffset: 126 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=126 - endOffset: 288 -- name: 'Research Partnerships: Industry Projects and Applied Research' - startOffset: 288 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=288 - endOffset: 377 -- name: 'Efteling Insights: Theme Park Tech and Experience Design' - startOffset: 377 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=377 - endOffset: 456 -- name: 'Crowd Modeling: Queue Prediction and Ride Capacity' - startOffset: 456 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=456 - endOffset: 779 -- name: 'Visitor Routing: Next-Best-Action Recommendation System' - startOffset: 779 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=779 - endOffset: 890 -- name: App Adoption & Incentives for Data Collection - startOffset: 890 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=890 - endOffset: 1000 -- name: Behavioral Route Modeling & Probabilistic Recommendations - startOffset: 1000 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1000 - endOffset: 1109 -- name: 'E-commerce Recs: Bol.com Favorite-Brand Carousel' - startOffset: 1109 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1109 - endOffset: 1443 -- name: 'Recommendation Validation: Employee Swiping Experiment & A/B Testing' - startOffset: 1443 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1443 - endOffset: 1561 -- name: 'Real-time Processing: Streaming for Live Experiments' - startOffset: 1561 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1561 - endOffset: 1879 -- name: 'Measurement & Rollout: Engagement Metrics and Accuracy Results' - startOffset: 1879 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1879 - endOffset: 2001 -- name: 'Role at Tesla: Data Engineering vs. ML Engineering Responsibilities' - startOffset: 2001 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2001 - endOffset: 2061 -- name: 'Full-Stack Data Work: Building Apps, Instrumentation, and Deployment' - startOffset: 2061 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2061 - endOffset: 2503 -- name: 'LLMs & AI-Assisted Development: Productivity Gains and Risks' - startOffset: 2503 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2503 - endOffset: 2766 -- name: 'On-Prem Inference Hardware: Raspberry Pi, Jetson Orin, Mac Mini' - startOffset: 2766 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2766 - endOffset: 2993 -- name: 'Models & Platforms: LLaMA, Code Models, and Replicate' - startOffset: 2993 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2993 - endOffset: 3243 -- name: 'Interview Preparation: Tesla Data Engineering Expectations (Architecture, - ETL, Scripting)' - startOffset: 3243 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3243 - endOffset: 3463 -- name: 'Career Strategy: Prioritization, Learning Opportunities, Underpromise & Overdeliver' - startOffset: 3463 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3463 - endOffset: 3610 -- name: Episode Closing & Key Takeaways - startOffset: 3610 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3610 - endOffset: 3635 --- Links: diff --git a/_podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md b/_podcast/to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md similarity index 92% rename from _podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md rename to _podcast/to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md index 5ffb3858..a2774148 100644 --- a/_podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md +++ b/_podcast/to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md @@ -1,20 +1,160 @@ --- +title: "Context: +The episode traces a practitioner’s journey from early ML and language work into building agentic systems for real-world SRE and productivity problems—covering definitions, architectures, planning, retrieval, tooling, frameworks, testing, and evaluation. + +Core (single unifying idea): +Pragmatic agent engineering: turning LLMs into reliable, task‑oriented autonomous systems by engineering around their capabilities and limits—designing objectives, orchestration, context/retrieval, tooling integrations, planning strategies, and rigorous evaluation so agents can safely, efficiently, and predictably perform real operational and enterprise tasks. + +Why this unifies the episode: +- Defines what an “agent” means in practice (autonomy + objectives + LLMs) and why design choices matter. +- Shows orchestration needs (tools, memory, knowledge stores) to ground LLM reasoning in real data and actions. +- Contrasts planning styles (single‑step, multi‑pass, self‑reflection) and implementation tradeoffs (prompts vs SDKs, code vs natural‑language agents) as engineering choices, not academic ones. +- Treats retrieval/RAG as an engineering component with latency/cost/GIGO constraints and explores agentic RAG when RAG alone falls short. +- Emphasizes integration abstractions and framework tradeoffs for production deployment (from bespoke stacks to marketplaces and SDKs). +- Centers testing and evaluation—mocking tools, regression tests, goal‑based benchmarks—to ensure outcomes over narrative plausibility. +- Highlights specialization and domain constraints: generic agents struggle; practical value comes from adapting agents to workflows, data, and operational requirements. + +Bottom line: +The episode’s through‑line is that successful agent projects are not just about large models: they are systems engineering problems requiring explicit choices about autonomy, grounding, tooling, planning, and measurement to deliver dependable, useful automation." +short: Building reliable AI products in the era of Gen AI and Agents +season: 22 episode: 1 guests: - ranjithakulkarni +image: images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg ids: anchor: datatalksclub/episodes/Building-reliable-AI-products-in-the-era-of-Gen-AI-and-Agents---Ranjitha-Kulkarni-e396m2u youtube: x2AAjqz2XmM -image: images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Building-reliable-AI-products-in-the-era-of-Gen-AI-and-Agents---Ranjitha-Kulkarni-e396m2u apple: https://podcasts.apple.com/us/podcast/building-reliable-ai-products-in-the-era-of-gen/id1541710331?i=1000731199709 spotify: https://open.spotify.com/episode/7c22vqYNuNLKKYEfYGOos8?si=NBFT2e80S6WErW_tDDrijA youtube: https://www.youtube.com/watch?v=x2AAjqz2XmM -season: 22 -short: Building reliable AI products in the era of Gen AI and Agents -title: 'Build & Evaluate Autonomous LLM Agents: RAG, Orchestration, Context Engineering - & SRE' + +description: Build autonomous LLM agents with RAG, orchestration & context engineering - master SRE automation, testing, evaluation metrics and latency/cost tradeoffs +intro: 'How do you build and evaluate truly autonomous LLM agents that balance retrieval, orchestration, and real-world SRE needs? In this episode, Ranjitha Gurunath Kulkarni — Staff ML Engineer at NeuBird.ai with earlier LLM and assistant work at Dropbox and Microsoft and an LTI master’s from Carnegie Mellon — walks through practical engineering trade-offs for autonomous LLM agents and retrieval-augmented generation (RAG).

We cover a clear agent definition (autonomy, objectives, LLMs), agent orchestration tools and memory/knowledge stores, planning strategies from single-step to self-reflection, and implementation choices: prompts, SDKs, tool wrappers, and the code‑vs‑natural‑language agent trade-offs. Ranjitha digs into context engineering techniques (chunking, metadata, wrappers), RAG realities (latency, cost, GIGO), and when retrieval alone suffices versus when full agents are needed. She also maps SRE workflows to agents (logs, metrics, remediation), integration abstractions, framework trade-offs (LangChain, OpenAI Agents SDK, Small Agents), and evaluation strategy: custom datasets, mocking tools, regression tests, and goal‑based outcome assertions.

Listen to learn practical guidance for building, testing, and deploying autonomous LLM agents, and which architectures and evaluation approaches work best for production systems.' +dateadded: 2025-10-21 + +duration: PT00H59M23S + +quotableClips: +- name: Event Introduction & Community Links + startOffset: 0 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=0 + endOffset: 192 +- name: 'Early ML Projects: Image Search with OpenCV' + startOffset: 192 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=192 + endOffset: 265 +- name: Speech Recognition & Language Modeling Experience + startOffset: 265 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=265 + endOffset: 297 +- name: Transition to Recommendation Systems at Dropbox + startOffset: 297 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=297 + endOffset: 352 +- name: Question Answering & Early Agent Experiments + startOffset: 352 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=352 + endOffset: 464 +- name: 'Joining Noird.ai: Automating On‑call with Agents' + startOffset: 464 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=464 + endOffset: 660 +- name: 'Agent Definition: Autonomy, Objectives & LLMs' + startOffset: 660 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=660 + endOffset: 751 +- name: 'Agent Orchestration: Tools, Memory & Knowledge Stores' + startOffset: 751 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=751 + endOffset: 910 +- name: 'Planning Strategies: Single‑step, Multi‑pass & Self‑reflection' + startOffset: 910 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=910 + endOffset: 1103 +- name: 'Implementation Approaches: Prompts, SDKs & Tool Wrappers' + startOffset: 1103 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1103 + endOffset: 1198 +- name: 'Code Agents vs Natural‑Language Agents: Trade‑offs' + startOffset: 1198 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1198 + endOffset: 1281 +- name: 'Context Engineering: Designing Effective LLM Inputs' + startOffset: 1281 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1281 + endOffset: 1370 +- name: 'SRE Workflows Modeled by Agents: Logs, Metrics & Remediation' + startOffset: 1370 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1370 + endOffset: 1499 +- name: 'Integration Abstractions: Handling Diverse Tooling' + startOffset: 1499 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1499 + endOffset: 1770 +- name: 'RAG Reality Check: Latency, Cost & Garbage‑In/Garbage‑Out' + startOffset: 1770 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1770 + endOffset: 1898 +- name: 'Retrieval Limitations: Reworking Backends for LLM Context' + startOffset: 1898 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1898 + endOffset: 1968 +- name: 'Context Engineering Techniques: Chunking, Metadata & Wrappers' + startOffset: 1968 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1968 + endOffset: 2171 +- name: 'Agentic RAG: Using Retrieval as a Tool Within Agents' + startOffset: 2171 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2171 + endOffset: 2259 +- name: 'Use Cases: When RAG Is Enough vs When Agents Are Needed' + startOffset: 2259 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2259 + endOffset: 2430 +- name: 'Dynamic Planning Example: Calendar & Meeting Assistant' + startOffset: 2430 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2430 + endOffset: 2586 +- name: Dropbox Dash & AI Productivity Assistants for Enterprises + startOffset: 2586 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2586 + endOffset: 2648 +- name: 'Framework Choices: Build from Scratch vs Use Libraries' + startOffset: 2648 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2648 + endOffset: 2760 +- name: 'Framework Trade‑offs: LangChain, OpenAI Agents SDK, Small Agents' + startOffset: 2760 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2760 + endOffset: 2880 +- name: Agent Marketplaces & Tool Protocols (MCP) + startOffset: 2880 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2880 + endOffset: 3077 +- name: 'Evaluation Strategy: Custom Datasets & System Benchmarks' + startOffset: 3077 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3077 + endOffset: 3200 +- name: 'Testing Agents: Mocking Tools, Integration & Regression Tests' + startOffset: 3200 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3200 + endOffset: 3362 +- name: 'Goal‑based Evaluation: Outcome Assertions Over Exact Paths' + startOffset: 3362 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3362 + endOffset: 3491 +- name: 'Specialization Challenge: Why Generic Agent Solutions Lag' + startOffset: 3491 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3491 + endOffset: 3546 +- name: Closing Thoughts & Future Outlook for Agent Engineering + startOffset: 3546 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3546 + endOffset: 3563 + transcript: - header: Event Introduction & Community Links - line: Hi everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -851,144 +991,6 @@ transcript: sec: 3563 time: '59:23' who: Alexey -description: Build autonomous LLM agents with RAG, orchestration & context engineering - - master SRE automation, testing, evaluation metrics and latency/cost tradeoffs. -intro: 'How do you build and evaluate truly autonomous LLM agents that balance retrieval, - orchestration, and real-world SRE needs? In this episode, Ranjitha Gurunath Kulkarni - — Staff ML Engineer at NeuBird.ai with earlier LLM and assistant work at Dropbox - and Microsoft and an LTI master’s from Carnegie Mellon — walks through practical - engineering trade-offs for autonomous LLM agents and retrieval-augmented generation - (RAG).

We cover a clear agent definition (autonomy, objectives, LLMs), - agent orchestration tools and memory/knowledge stores, planning strategies from - single-step to self-reflection, and implementation choices: prompts, SDKs, tool - wrappers, and the code‑vs‑natural‑language agent trade-offs. Ranjitha digs into - context engineering techniques (chunking, metadata, wrappers), RAG realities (latency, - cost, GIGO), and when retrieval alone suffices versus when full agents are needed. - She also maps SRE workflows to agents (logs, metrics, remediation), integration - abstractions, framework trade-offs (LangChain, OpenAI Agents SDK, Small Agents), - and evaluation strategy: custom datasets, mocking tools, regression tests, and goal‑based - outcome assertions.

Listen to learn practical guidance for building, testing, - and deploying autonomous LLM agents, and which architectures and evaluation approaches - work best for production systems.' -dateadded: '2025-10-21' -duration: PT00H59M23S -quotableClips: -- name: Event Introduction & Community Links - startOffset: 0 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=0 - endOffset: 192 -- name: 'Early ML Projects: Image Search with OpenCV' - startOffset: 192 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=192 - endOffset: 265 -- name: Speech Recognition & Language Modeling Experience - startOffset: 265 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=265 - endOffset: 297 -- name: Transition to Recommendation Systems at Dropbox - startOffset: 297 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=297 - endOffset: 352 -- name: Question Answering & Early Agent Experiments - startOffset: 352 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=352 - endOffset: 464 -- name: 'Joining Noird.ai: Automating On‑call with Agents' - startOffset: 464 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=464 - endOffset: 660 -- name: 'Agent Definition: Autonomy, Objectives & LLMs' - startOffset: 660 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=660 - endOffset: 751 -- name: 'Agent Orchestration: Tools, Memory & Knowledge Stores' - startOffset: 751 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=751 - endOffset: 910 -- name: 'Planning Strategies: Single‑step, Multi‑pass & Self‑reflection' - startOffset: 910 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=910 - endOffset: 1103 -- name: 'Implementation Approaches: Prompts, SDKs & Tool Wrappers' - startOffset: 1103 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1103 - endOffset: 1198 -- name: 'Code Agents vs Natural‑Language Agents: Trade‑offs' - startOffset: 1198 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1198 - endOffset: 1281 -- name: 'Context Engineering: Designing Effective LLM Inputs' - startOffset: 1281 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1281 - endOffset: 1370 -- name: 'SRE Workflows Modeled by Agents: Logs, Metrics & Remediation' - startOffset: 1370 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1370 - endOffset: 1499 -- name: 'Integration Abstractions: Handling Diverse Tooling' - startOffset: 1499 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1499 - endOffset: 1770 -- name: 'RAG Reality Check: Latency, Cost & Garbage‑In/Garbage‑Out' - startOffset: 1770 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1770 - endOffset: 1898 -- name: 'Retrieval Limitations: Reworking Backends for LLM Context' - startOffset: 1898 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1898 - endOffset: 1968 -- name: 'Context Engineering Techniques: Chunking, Metadata & Wrappers' - startOffset: 1968 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1968 - endOffset: 2171 -- name: 'Agentic RAG: Using Retrieval as a Tool Within Agents' - startOffset: 2171 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2171 - endOffset: 2259 -- name: 'Use Cases: When RAG Is Enough vs When Agents Are Needed' - startOffset: 2259 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2259 - endOffset: 2430 -- name: 'Dynamic Planning Example: Calendar & Meeting Assistant' - startOffset: 2430 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2430 - endOffset: 2586 -- name: Dropbox Dash & AI Productivity Assistants for Enterprises - startOffset: 2586 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2586 - endOffset: 2648 -- name: 'Framework Choices: Build from Scratch vs Use Libraries' - startOffset: 2648 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2648 - endOffset: 2760 -- name: 'Framework Trade‑offs: LangChain, OpenAI Agents SDK, Small Agents' - startOffset: 2760 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2760 - endOffset: 2880 -- name: Agent Marketplaces & Tool Protocols (MCP) - startOffset: 2880 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2880 - endOffset: 3077 -- name: 'Evaluation Strategy: Custom Datasets & System Benchmarks' - startOffset: 3077 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3077 - endOffset: 3200 -- name: 'Testing Agents: Mocking Tools, Integration & Regression Tests' - startOffset: 3200 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3200 - endOffset: 3362 -- name: 'Goal‑based Evaluation: Outcome Assertions Over Exact Paths' - startOffset: 3362 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3362 - endOffset: 3491 -- name: 'Specialization Challenge: Why Generic Agent Solutions Lag' - startOffset: 3491 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3491 - endOffset: 3546 -- name: Closing Thoughts & Future Outlook for Agent Engineering - startOffset: 3546 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3546 - endOffset: 3563 --- Links: diff --git a/_podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md b/_podcast/to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md similarity index 95% rename from _podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md rename to _podcast/to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md index 2443fd3d..d46ea9ef 100644 --- a/_podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md +++ b/_podcast/to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md @@ -1,19 +1,142 @@ --- +title: "Context — This episode moves from the guest’s finance-to-self-driving AI career and research in computer vision to concrete projects (AI Guide Dog, malaria mapping), deep dives on sensor and model tradeoffs (LiDAR vs cameras, on-vehicle inference, model compression), operational realities (data collection, labeling, validation pipelines, staged releases, edge cases), system-level questions (reinforcement learning vs perception, multimodal LLMs), and practical career/project advice. + +Core — Building trustworthy, real‑world AI is an engineering-driven cycle that tightly couples pragmatic sensor and model choices, efficient on‑device inference, rigorous data and validation pipelines, staged safe deployment, and ethical/social purpose: the episode’s unifying idea is that successful AI systems aren’t just about better algorithms but about integrating perception, hardware constraints, data practices, testing, and human-centered impact into a continuous, safety‑first development process that scales across domains from autonomous vehicles to assistive tech and public‑health applications." +short: 'Lessons from Applied AI: Tesla, Waymo, and Beyond' +season: 22 episode: 2 guests: - aishwaryajadhav +image: images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg ids: anchor: datatalksclub/episodes/Lessons-from-Applied-AI-Tesla--Waymo--and-Beyond---Aishwarya-Jadhav-e39befu youtube: vK_SxyqIfwk -image: images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Lessons-from-Applied-AI-Tesla--Waymo--and-Beyond---Aishwarya-Jadhav-e39befu apple: https://podcasts.apple.com/us/podcast/lessons-from-applied-ai-tesla-waymo-and-beyond/id1541710331?i=1000731200298 spotify: https://open.spotify.com/episode/0h9eX7m6H2TPqOjUwb3Jw6?si=I4rKrHXpQTmS7cJBMJbUMA youtube: https://www.youtube.com/watch?v=vK_SxyqIfwk -season: 22 -short: 'Lessons from Applied AI: Tesla, Waymo, and Beyond' -title: 'Autonomous Driving AI: LiDAR vs Camera, On-Vehicle Inference & Model Compression' + +description: Discover LiDAR vs camera tradeoffs and model compression for on-vehicle inference in autonomous driving - learn quantization, edge speedups, testing tips +intro: How should self-driving systems balance LiDAR, cameras and edge compute to deliver safe, real-time perception? In this episode, Aishwarya Jadhav — a machine learning engineer with a Master's from Carnegie Mellon and four years deploying multimodal LLMs, generative AI and computer vision — walks through the practical tradeoffs in autonomous driving AI. Drawing on her assistive-tech work (AI Guide Dog) and research background, she explains LiDAR vs camera principles, radar and cost constraints, and Tesla’s camera-first approach for 360° vision.

We cover on-vehicle inference limits, model compression techniques like quantization and speedups for edge inference, plus validation pipelines from simulation to closed tracks and on-road testing. You’ll also hear about sensor data management, labeling strategies, multimodal LLM challenges in autonomy, gesture recognition for traffic control, and cross-domain transfer to robotics and drones. The conversation closes with real-world complexity, testing sensitive cases, and actionable career pathways and projects.

If you want concrete guidance on sensor fusion, model compression, and deployment-ready perception systems — plus practical testing and data strategies for self-driving AI — this episode delivers grounded, technical insight +dateadded: 2025-10-21 + +duration: PT00H59M01S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=0 + endOffset: 93 +- name: 'Guest Bio & Career Overview: Finance to Self-Driving AI' + startOffset: 93 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=93 + endOffset: 171 +- name: 'Morgan Stanley: Big Data Engineering & Transition to ML' + startOffset: 171 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=171 + endOffset: 235 +- name: 'Carnegie Mellon: Research Focus & Computer Vision Projects' + startOffset: 235 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=235 + endOffset: 339 +- name: 'AI Guide Dog: Mobile Navigation for the Visually Impaired' + startOffset: 339 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=339 + endOffset: 554 +- name: 'AI Guide Dog: Beta Testing, Iterative Development, Hardware Constraints' + startOffset: 554 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=554 + endOffset: 682 +- name: 'Sensor Tradeoffs: LiDAR, Radar, and Cost Considerations' + startOffset: 682 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=682 + endOffset: 718 +- name: 'LiDAR vs Cameras: Principles and Automotive Use Cases' + startOffset: 718 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=718 + endOffset: 885 +- name: 'Tesla''s Camera-First Perception: 360° Vision without LiDAR' + startOffset: 885 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=885 + endOffset: 966 +- name: 'Autopilot Use Cases: Driver Assistance vs Full Autonomy' + startOffset: 966 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=966 + endOffset: 1181 +- name: 'Waymo Ride-Hailing: App, Service Model, and Driverless Rides' + startOffset: 1181 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1181 + endOffset: 1197 +- name: 'Gesture Recognition for Traffic Control: Police & Construction Signals' + startOffset: 1197 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1197 + endOffset: 1337 +- name: 'On-Vehicle Inference: Performance Constraints and Optimization' + startOffset: 1337 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1337 + endOffset: 1408 +- name: 'Model Compression Techniques: Quantization and Speedups' + startOffset: 1408 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1408 + endOffset: 1445 +- name: 'Malaria Mapping: AI for Social Good Using Satellite & Topographic Data' + startOffset: 1445 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1445 + endOffset: 1623 +- name: 'Malaria Project Impact: Field Feedback and Resource Optimization' + startOffset: 1623 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1623 + endOffset: 1785 +- name: 'Validation Pipeline: Simulation, Closed Tracks, and On-Road Testing' + startOffset: 1785 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1785 + endOffset: 1862 +- name: 'Sensor Data Management: Collection, Privacy, and Scale' + startOffset: 1862 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1862 + endOffset: 1929 +- name: 'Labeling Strategy: Human Annotation and Automated Labeling' + startOffset: 1929 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1929 + endOffset: 1963 +- name: 'Model Release Cadence: Safety Checks and Staged Deployments' + startOffset: 1963 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1963 + endOffset: 2172 +- name: 'Cross-Domain Transfer: Perception Techniques for Robotics & Drones' + startOffset: 2172 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2172 + endOffset: 2238 +- name: 'Real-World Complexity: Edge Cases, Geography, and System Coordination' + startOffset: 2238 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2238 + endOffset: 2624 +- name: 'Reinforcement Learning vs Perception: Roles and Practical Constraints' + startOffset: 2624 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2624 + endOffset: 3088 +- name: 'Testing Sensitive Cases: Evaluation Stages and Inherited Tests' + startOffset: 3088 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3088 + endOffset: 3173 +- name: 'Multimodal LLMs in Autonomous Driving: Research and Practical Challenges' + startOffset: 3173 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3173 + endOffset: 3325 +- name: 'Career Pathways: Skills, Projects, and Entry Routes into Self-Driving AI' + startOffset: 3325 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3325 + endOffset: 3384 +- name: 'Practical Projects & Tools: Vision Apps, LLMs, and Coding Agents' + startOffset: 3384 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3384 + endOffset: 3515 +- name: Closing Remarks and Final Advice + startOffset: 3515 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3515 + endOffset: 3541 + transcript: - header: Podcast Introduction - line: Hey everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -1065,139 +1188,6 @@ transcript: sec: 3541 time: '59:01' who: Alexey -description: Discover LiDAR vs camera tradeoffs and model compression for on-vehicle - inference in autonomous driving - learn quantization, edge speedups, testing tips. -intro: How should self-driving systems balance LiDAR, cameras and edge compute to - deliver safe, real-time perception? In this episode, Aishwarya Jadhav — a machine - learning engineer with a Master's from Carnegie Mellon and four years deploying - multimodal LLMs, generative AI and computer vision — walks through the practical - tradeoffs in autonomous driving AI. Drawing on her assistive-tech work (AI Guide - Dog) and research background, she explains LiDAR vs camera principles, radar and - cost constraints, and Tesla’s camera-first approach for 360° vision.

We - cover on-vehicle inference limits, model compression techniques like quantization - and speedups for edge inference, plus validation pipelines from simulation to closed - tracks and on-road testing. You’ll also hear about sensor data management, labeling - strategies, multimodal LLM challenges in autonomy, gesture recognition for traffic - control, and cross-domain transfer to robotics and drones. The conversation closes - with real-world complexity, testing sensitive cases, and actionable career pathways - and projects.

If you want concrete guidance on sensor fusion, model compression, - and deployment-ready perception systems — plus practical testing and data strategies - for self-driving AI — this episode delivers grounded, technical insight. -dateadded: '2025-10-21' -duration: PT00H59M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=0 - endOffset: 93 -- name: 'Guest Bio & Career Overview: Finance to Self-Driving AI' - startOffset: 93 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=93 - endOffset: 171 -- name: 'Morgan Stanley: Big Data Engineering & Transition to ML' - startOffset: 171 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=171 - endOffset: 235 -- name: 'Carnegie Mellon: Research Focus & Computer Vision Projects' - startOffset: 235 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=235 - endOffset: 339 -- name: 'AI Guide Dog: Mobile Navigation for the Visually Impaired' - startOffset: 339 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=339 - endOffset: 554 -- name: 'AI Guide Dog: Beta Testing, Iterative Development, Hardware Constraints' - startOffset: 554 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=554 - endOffset: 682 -- name: 'Sensor Tradeoffs: LiDAR, Radar, and Cost Considerations' - startOffset: 682 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=682 - endOffset: 718 -- name: 'LiDAR vs Cameras: Principles and Automotive Use Cases' - startOffset: 718 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=718 - endOffset: 885 -- name: 'Tesla''s Camera-First Perception: 360° Vision without LiDAR' - startOffset: 885 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=885 - endOffset: 966 -- name: 'Autopilot Use Cases: Driver Assistance vs Full Autonomy' - startOffset: 966 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=966 - endOffset: 1181 -- name: 'Waymo Ride-Hailing: App, Service Model, and Driverless Rides' - startOffset: 1181 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1181 - endOffset: 1197 -- name: 'Gesture Recognition for Traffic Control: Police & Construction Signals' - startOffset: 1197 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1197 - endOffset: 1337 -- name: 'On-Vehicle Inference: Performance Constraints and Optimization' - startOffset: 1337 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1337 - endOffset: 1408 -- name: 'Model Compression Techniques: Quantization and Speedups' - startOffset: 1408 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1408 - endOffset: 1445 -- name: 'Malaria Mapping: AI for Social Good Using Satellite & Topographic Data' - startOffset: 1445 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1445 - endOffset: 1623 -- name: 'Malaria Project Impact: Field Feedback and Resource Optimization' - startOffset: 1623 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1623 - endOffset: 1785 -- name: 'Validation Pipeline: Simulation, Closed Tracks, and On-Road Testing' - startOffset: 1785 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1785 - endOffset: 1862 -- name: 'Sensor Data Management: Collection, Privacy, and Scale' - startOffset: 1862 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1862 - endOffset: 1929 -- name: 'Labeling Strategy: Human Annotation and Automated Labeling' - startOffset: 1929 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1929 - endOffset: 1963 -- name: 'Model Release Cadence: Safety Checks and Staged Deployments' - startOffset: 1963 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1963 - endOffset: 2172 -- name: 'Cross-Domain Transfer: Perception Techniques for Robotics & Drones' - startOffset: 2172 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2172 - endOffset: 2238 -- name: 'Real-World Complexity: Edge Cases, Geography, and System Coordination' - startOffset: 2238 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2238 - endOffset: 2624 -- name: 'Reinforcement Learning vs Perception: Roles and Practical Constraints' - startOffset: 2624 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2624 - endOffset: 3088 -- name: 'Testing Sensitive Cases: Evaluation Stages and Inherited Tests' - startOffset: 3088 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3088 - endOffset: 3173 -- name: 'Multimodal LLMs in Autonomous Driving: Research and Practical Challenges' - startOffset: 3173 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3173 - endOffset: 3325 -- name: 'Career Pathways: Skills, Projects, and Entry Routes into Self-Driving AI' - startOffset: 3325 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3325 - endOffset: 3384 -- name: 'Practical Projects & Tools: Vision Apps, LLMs, and Coding Agents' - startOffset: 3384 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3384 - endOffset: 3515 -- name: Closing Remarks and Final Advice - startOffset: 3515 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3515 - endOffset: 3541 --- Links: diff --git a/_podcast/s22e03-from-biotechnology-to-bioinformatics-software.md b/_podcast/to-update/s22e03-from-biotechnology-to-bioinformatics-software.md similarity index 95% rename from _podcast/s22e03-from-biotechnology-to-bioinformatics-software.md rename to _podcast/to-update/s22e03-from-biotechnology-to-bioinformatics-software.md index d239051e..77fe7287 100644 --- a/_podcast/s22e03-from-biotechnology-to-bioinformatics-software.md +++ b/_podcast/to-update/s22e03-from-biotechnology-to-bioinformatics-software.md @@ -1,20 +1,124 @@ --- +title: "At its core this episode is about how building open, reproducible computational infrastructure and workflows lets us translate messy biological data into scalable, actionable insight—bridging wet lab and dry lab work so researchers can ask better questions, run fewer experiments, and move faster. From genomics and metagenomics pipelines to network inference, molecular simulation, knowledge graphs, visualization, and AI assistants, the through‑line is empowering scientists with accessible tools, automation, and community-driven software that make complex biology interpretable, shareable, and useful in the real world." +short: From Biotechnology to Bioinformatics Software +season: 22 episode: 3 guests: - sebastianayalaruano +image: images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg ids: anchor: datatalksclub/episodes/From-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-RuanoFrom-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-Ruano-e39vsv6 youtube: ZFrcrTtnB1Q -image: images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-RuanoFrom-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-Ruano-e39vsv6 apple: https://podcasts.apple.com/us/podcast/from-biotechnology-to-bioinformatics-software-sebastian/id1541710331?i=1000733347636 spotify: https://open.spotify.com/episode/3CohNIXZdooLYoIyIbr6EF youtube: https://www.youtube.com/watch?v=ZFrcrTtnB1Q -season: 22 -short: From Biotechnology to Bioinformatics Software -title: 'Wastewater Metagenomics & Knowledge Graphs: Network Inference, AlphaFold & - Open-Source Tools' + +description: Discover wastewater metagenomics knowledge graphs & AlphaFold-driven network inference using open-source bioinformatics tools to map microbes and cut lab tests +intro: How can wastewater metagenomics and knowledge graphs reveal microbial interactions while reducing wet‑lab experiments? In this episode, Sebastian Ayala Ruano — a bioinformatics software developer and Master’s student in Systems Biology — walks through his wastewater microbiome knowledge graph thesis and open‑source tooling for multi‑omics analysis.

We cover metagenomics workflows from sequencing and abundance tables to building microbial networks with co‑abundance and association inference (CC Lasso, correlations, thresholding), plus network inference best practices. Sebastian also explains molecular simulations, protein–ligand dynamics and the practical impact of AlphaFold on structure prediction. Hear about MCW2 Graph, VueGen and VueCore, knowledge graph exploration with Neo4j and Streamlit, report automation (Quarto exports), and the bioinformatics package ecosystem (Bioconda, Bioconductor).

Listeners will gain actionable approaches for integrating wastewater metagenomics, network science, and knowledge graphs, practical open‑source tools to automate analysis and visualization, and guidance on project portfolios, language tradeoffs (R vs Python), and applying AI/LLMs in bioinformatics workflows. Ideal for researchers and engineers wanting to turn sequencing data into reproducible network models and automated reports +dateadded: 2025-10-27 + +duration: PT00H55M13S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=0 + endOffset: 69 +- name: 'Career Transition: Biotechnology to Bioinformatics Software' + startOffset: 69 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=69 + endOffset: 221 +- name: 'Master’s Thesis Overview: Wastewater Microbiome Knowledge Graph' + startOffset: 221 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=221 + endOffset: 387 +- name: 'Bioinformatics Role: Reducing Lab Experiments with Computational Analysis' + startOffset: 387 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=387 + endOffset: 503 +- name: 'Wet Lab vs Dry Lab: Experimental Work vs Computational Pipelines' + startOffset: 503 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=503 + endOffset: 681 +- name: 'Bioinformatics as Data Science: From Sequencing to Analysis' + startOffset: 681 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=681 + endOffset: 755 +- name: 'Genomic Data Basics: Nucleotides and DNA Sequences' + startOffset: 755 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=755 + endOffset: 930 +- name: DNA Sequencing Workflow and Reference Genomes + startOffset: 930 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=930 + endOffset: 1076 +- name: 'Metagenomics: Environmental Sampling and Abundance Tables' + startOffset: 1076 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1076 + endOffset: 1181 +- name: 'Building Microbial Networks: Co‑abundance and Association Inference' + startOffset: 1181 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1181 + endOffset: 1471 +- name: 'Network Inference Methodology: CC Lasso, Correlations, and Thresholding' + startOffset: 1471 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1471 + endOffset: 1626 +- name: 'Molecular Simulations: Protein–Ligand Dynamics and Water Boxes' + startOffset: 1626 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1626 + endOffset: 1798 +- name: 'Protein Folding Revolution: AlphaFold Impact on Structure Prediction' + startOffset: 1798 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1798 + endOffset: 2180 +- name: 'Open‑Source Projects Overview: MCW2 Graph, VueGen, and VueCore' + startOffset: 2180 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2180 + endOffset: 2311 +- name: 'Knowledge Graph Exploration: Neo4j, Streamlit, and Graph Algorithms' + startOffset: 2311 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2311 + endOffset: 2400 +- name: 'Report Automation with VueGen: Quarto, Streamlit, and Export Formats' + startOffset: 2400 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2400 + endOffset: 2549 +- name: 'Package Ecosystem: Bioconda, Bioconductor, and Bioinformatics Libraries' + startOffset: 2549 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2549 + endOffset: 2636 +- name: 'Omics Visualization: VueCore for Genomics, Proteomics, and Metabolomics' + startOffset: 2636 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2636 + endOffset: 2708 +- name: 'Portfolio Advice: Beginner Bioinformatics Projects and Tools to Showcase' + startOffset: 2708 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2708 + endOffset: 2870 +- name: 'AI & LLMs in Bioinformatics: Documentation, MLOps, and Coding Assistants' + startOffset: 2870 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2870 + endOffset: 3025 +- name: 'Language Tradeoffs: R vs Python and Scaling Scientific Tools' + startOffset: 3025 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3025 + endOffset: 3113 +- name: 'Visualization Workflows: Viewer and Supporting Plotting Libraries' + startOffset: 3113 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3113 + endOffset: 3197 +- name: 'Remote Work & Field Life: Working from Ecuador and Nature Notes' + startOffset: 3197 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3197 + endOffset: 3250 +- name: 'Episode Wrap‑up: Open‑Source Encouragement and Closing Remarks' + startOffset: 3250 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3250 + endOffset: 3313 + transcript: - header: Podcast Introduction - line: Hi everyone, welcome to our event. This event is brought to you by Data Talks @@ -962,125 +1066,6 @@ transcript: sec: 3313 time: '55:13' who: Sebastian -description: Discover wastewater metagenomics knowledge graphs & AlphaFold-driven - network inference using open-source bioinformatics tools to map microbes and cut - lab tests. -intro: How can wastewater metagenomics and knowledge graphs reveal microbial interactions - while reducing wet‑lab experiments? In this episode, Sebastian Ayala Ruano — a bioinformatics - software developer and Master’s student in Systems Biology — walks through his wastewater - microbiome knowledge graph thesis and open‑source tooling for multi‑omics analysis. -

We cover metagenomics workflows from sequencing and abundance tables to - building microbial networks with co‑abundance and association inference (CC Lasso, - correlations, thresholding), plus network inference best practices. Sebastian also - explains molecular simulations, protein–ligand dynamics and the practical impact - of AlphaFold on structure prediction. Hear about MCW2 Graph, VueGen and VueCore, - knowledge graph exploration with Neo4j and Streamlit, report automation (Quarto - exports), and the bioinformatics package ecosystem (Bioconda, Bioconductor).

- Listeners will gain actionable approaches for integrating wastewater metagenomics, - network science, and knowledge graphs, practical open‑source tools to automate analysis - and visualization, and guidance on project portfolios, language tradeoffs (R vs - Python), and applying AI/LLMs in bioinformatics workflows. Ideal for researchers - and engineers wanting to turn sequencing data into reproducible network models and - automated reports. -dateadded: '2025-10-27' -duration: PT00H55M13S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=0 - endOffset: 69 -- name: 'Career Transition: Biotechnology to Bioinformatics Software' - startOffset: 69 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=69 - endOffset: 221 -- name: 'Master’s Thesis Overview: Wastewater Microbiome Knowledge Graph' - startOffset: 221 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=221 - endOffset: 387 -- name: 'Bioinformatics Role: Reducing Lab Experiments with Computational Analysis' - startOffset: 387 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=387 - endOffset: 503 -- name: 'Wet Lab vs Dry Lab: Experimental Work vs Computational Pipelines' - startOffset: 503 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=503 - endOffset: 681 -- name: 'Bioinformatics as Data Science: From Sequencing to Analysis' - startOffset: 681 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=681 - endOffset: 755 -- name: 'Genomic Data Basics: Nucleotides and DNA Sequences' - startOffset: 755 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=755 - endOffset: 930 -- name: DNA Sequencing Workflow and Reference Genomes - startOffset: 930 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=930 - endOffset: 1076 -- name: 'Metagenomics: Environmental Sampling and Abundance Tables' - startOffset: 1076 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1076 - endOffset: 1181 -- name: 'Building Microbial Networks: Co‑abundance and Association Inference' - startOffset: 1181 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1181 - endOffset: 1471 -- name: 'Network Inference Methodology: CC Lasso, Correlations, and Thresholding' - startOffset: 1471 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1471 - endOffset: 1626 -- name: 'Molecular Simulations: Protein–Ligand Dynamics and Water Boxes' - startOffset: 1626 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1626 - endOffset: 1798 -- name: 'Protein Folding Revolution: AlphaFold Impact on Structure Prediction' - startOffset: 1798 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1798 - endOffset: 2180 -- name: 'Open‑Source Projects Overview: MCW2 Graph, VueGen, and VueCore' - startOffset: 2180 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2180 - endOffset: 2311 -- name: 'Knowledge Graph Exploration: Neo4j, Streamlit, and Graph Algorithms' - startOffset: 2311 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2311 - endOffset: 2400 -- name: 'Report Automation with VueGen: Quarto, Streamlit, and Export Formats' - startOffset: 2400 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2400 - endOffset: 2549 -- name: 'Package Ecosystem: Bioconda, Bioconductor, and Bioinformatics Libraries' - startOffset: 2549 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2549 - endOffset: 2636 -- name: 'Omics Visualization: VueCore for Genomics, Proteomics, and Metabolomics' - startOffset: 2636 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2636 - endOffset: 2708 -- name: 'Portfolio Advice: Beginner Bioinformatics Projects and Tools to Showcase' - startOffset: 2708 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2708 - endOffset: 2870 -- name: 'AI & LLMs in Bioinformatics: Documentation, MLOps, and Coding Assistants' - startOffset: 2870 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2870 - endOffset: 3025 -- name: 'Language Tradeoffs: R vs Python and Scaling Scientific Tools' - startOffset: 3025 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3025 - endOffset: 3113 -- name: 'Visualization Workflows: Viewer and Supporting Plotting Libraries' - startOffset: 3113 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3113 - endOffset: 3197 -- name: 'Remote Work & Field Life: Working from Ecuador and Nature Notes' - startOffset: 3197 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3197 - endOffset: 3250 -- name: 'Episode Wrap‑up: Open‑Source Encouragement and Closing Remarks' - startOffset: 3250 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3250 - endOffset: 3313 --- Links: diff --git a/_podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md b/_podcast/to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md similarity index 95% rename from _podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md rename to _podcast/to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md index e8b46675..badf0a67 100644 --- a/_podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md +++ b/_podcast/to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md @@ -1,37 +1,131 @@ --- +title: "Context: This episode surveys practical, hands‑on patterns—RAG, chunking, prompting, generator–evaluator workflows, transcript pipelines, evaluation sets, monitoring, agents, memory design, and developer tooling—drawn from moving models from prototypes into real products across consulting, DevRel, and engineering roles. + +Core: The unifying idea is pragmatic, iterative engineering of LLM‑powered systems: prioritize retrieval‑first solutions that deliver immediate business value, instrument rigorous evaluation and monitoring (gold tests, failure analysis, generator–evaluator), automate pipelines and reproducible workflows, and only escalate to agentic tooling or persistent memory once data, metrics, and clear ROI justify the added complexity—treating AI as an integrated augmentation that must be built, tested, and scaled with standard software engineering practices." +short: How to Build and Evaluate AI systems in the Age of LLMs +season: 22 episode: 4 guests: - hugobowneanderson -date: 2025-11-07 -intro: How do you move from prototypes to reliable, scalable LLM systems that actually - deliver business value?

In this episode, Hugo Bowne‑Anderson—tracing a path - from biology research into Python, PyData, DataCamp curriculum and product work, - then into consulting, teaching, and developer relations—walks through practical - engineering and evaluation patterns for building LLM-driven workflows.

- We cover prompt engineering (role prompts, structured output, timestamps), everyday - LLM use cases (summaries, translation, CSV workflows), transcript pipelines (Gemini, - Descript, Loom) and automation with GitHub Actions. Hugo explains the generator–evaluator - pattern for automated quality control, how to design evaluation sets and failure - analysis, and techniques for logging, traces, and debuggable MVPs.

You’ll - hear when to prioritize RAG (retrieval-augmented generation) and chunking strategies, - when to add tool calls or agents, plus a concrete email assistant build using Gmail - API + RAG. The episode closes with a four‑step framework for agents and guidance - on retrieval‑based vs multi‑turn memory.

If you’re building LLM systems, - this conversation gives actionable tactics for prompt engineering, evaluation, scaling - transcript pipelines, and deciding when to adopt agents, embeddings, and automation. +image: images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg ids: anchor: datatalksclub/episodes/How-to-Build-and-Evaluate-AI-systems-in-the-Age-of-LLMs---Hugo-Bowne-Anderson-e39vt24 youtube: eC3RNuI6ow0 -image: images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/How-to-Build-and-Evaluate-AI-systems-in-the-Age-of-LLMs---Hugo-Bowne-Anderson-e39vt24 apple: https://podcasts.apple.com/us/podcast/how-to-build-and-evaluate-ai-systems-in-the-age-of/id1541710331?i=1000733350691 spotify: https://open.spotify.com/episode/2RD2qXaYa2ZjKjuIE7Aj6O youtube: https://www.youtube.com/watch?v=eC3RNuI6ow0 -season: 22 -short: How to Build and Evaluate AI systems in the Age of LLMs -title: 'Build & Scale LLM Agents and RAG Pipelines: Prompting, Transcript Automation, - Evaluation' + +description: Build LLM agents and RAG pipelines using prompting, transcript automation, and evaluation to scale systems - learn chunking, monitoring, and practical build steps +intro: How do you move from prototypes to reliable, scalable LLM systems that actually deliver business value?

In this episode, Hugo Bowne‑Anderson—tracing a path from biology research into Python, PyData, DataCamp curriculum and product work, then into consulting, teaching, and developer relations—walks through practical engineering and evaluation patterns for building LLM-driven workflows.

We cover prompt engineering (role prompts, structured output, timestamps), everyday LLM use cases (summaries, translation, CSV workflows), transcript pipelines (Gemini, Descript, Loom) and automation with GitHub Actions. Hugo explains the generator–evaluator pattern for automated quality control, how to design evaluation sets and failure analysis, and techniques for logging, traces, and debuggable MVPs.

You’ll hear when to prioritize RAG (retrieval-augmented generation) and chunking strategies, when to add tool calls or agents, plus a concrete email assistant build using Gmail API + RAG. The episode closes with a four‑step framework for agents and guidance on retrieval‑based vs multi‑turn memory.

If you’re building LLM systems, this conversation gives actionable tactics for prompt engineering, evaluation, scaling transcript pipelines, and deciding when to adopt agents, embeddings, and automation +dateadded: 2025-10-27 +date: 2025-11-07 + +duration: PT01H01M30S + +quotableClips: +- name: Podcast Kickoff & Hugo Bowne‑Anderson Background + startOffset: 0 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=0 + endOffset: 72 +- name: 'Vanishing Gradients vs High Signal: Podcast Formats & Audiences' + startOffset: 72 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=72 + endOffset: 124 +- name: 'From Academia to Industry: Biology Research, Python, and PyData' + startOffset: 124 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=124 + endOffset: 207 +- name: 'Early Industry Work: DataCamp Curriculum and Product Roles' + startOffset: 207 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=207 + endOffset: 237 +- name: 'Transition to Freelance: Consulting, Teaching, and DevRel' + startOffset: 237 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=237 + endOffset: 431 +- name: 'Consulting vs Advisory: Hands‑On Coding and Organizational Advice' + startOffset: 431 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=431 + endOffset: 504 +- name: 'Driving AI Adoption: Loss Aversion and Dedicated Experimentation Time' + startOffset: 504 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=504 + endOffset: 568 +- name: 'Everyday LLM Use Cases: Summaries, Translation, and CSV Workflows' + startOffset: 568 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=568 + endOffset: 671 +- name: 'Prompting Best Practices: Role Prompts, Structured Output, and Timestamps' + startOffset: 671 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=671 + endOffset: 742 +- name: 'Transcript Workflows: Gemini, Descript, Loom and Automation Tools' + startOffset: 742 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=742 + endOffset: 836 +- name: 'Generator–Evaluator Pattern: Automated Quality Control for Outputs' + startOffset: 836 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=836 + endOffset: 1058 +- name: 'Scaling Transcript Pipelines: Automation with GitHub Actions' + startOffset: 1058 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1058 + endOffset: 1380 +- name: 'Evaluation Sets for LLMs: Gold Tests, Size, Cost, and Representativeness' + startOffset: 1380 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1380 + endOffset: 1603 +- name: 'Failure Analysis: Categorizing Errors and Prioritizing Retrieval Fixes' + startOffset: 1603 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1603 + endOffset: 1658 +- name: 'Vibe Coding & Monitoring: Logging, Traces, and Debuggable MVPs' + startOffset: 1658 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1658 + endOffset: 1916 +- name: 'Developer Tools & Assistants: GitHub Copilot, Cursor, and IDE Agents' + startOffset: 1916 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1916 + endOffset: 1994 +- name: 'Embedded Agents in Workflows: Slack Integration and Proactive Assistants' + startOffset: 1994 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1994 + endOffset: 2412 +- name: 'Agentic Value Beyond Chat: Actions, Documents, and Automation' + startOffset: 2412 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2412 + endOffset: 2666 +- name: 'Prioritizing RAG: Quick Business Wins with Chunking and Embeddings' + startOffset: 2666 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2666 + endOffset: 2900 +- name: 'Chunking Strategies: Fixed Length, Sliding Windows, and Context Rot' + startOffset: 2900 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2900 + endOffset: 3019 +- name: 'When to Add Tooling: Moving from RAG to Agents and Tool Calls' + startOffset: 3019 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3019 + endOffset: 3214 +- name: 'Practical Build: Email Assistant Example using Gmail API + RAG' + startOffset: 3214 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3214 + endOffset: 3381 +- name: 'Four‑Step Framework for Agents: Problem, Start Small, Data, Evaluation' + startOffset: 3381 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3381 + endOffset: 3461 +- name: 'Memory Design: Retrieval‑Based Memory vs Multi‑Turn Conversation Memory' + startOffset: 3461 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3461 + endOffset: 3655 +- name: 'Episode Wrap‑Up: Key Takeaways, Courses, and Next Steps' + startOffset: 3655 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3655 + endOffset: 3690 + transcript: - header: Episode Introduction & Guest Bio - line: This week we will talk about LLMs and AI like everyone else, I guess. @@ -1131,112 +1225,6 @@ transcript: sec: 3690 time: '1:01:30' who: Alexey -description: Build LLM agents and RAG pipelines using prompting, transcript automation, - and evaluation to scale systems - learn chunking, monitoring, and practical build - steps. -dateadded: '2025-10-27' -duration: PT01H01M30S -quotableClips: -- name: Podcast Kickoff & Hugo Bowne‑Anderson Background - startOffset: 0 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=0 - endOffset: 72 -- name: 'Vanishing Gradients vs High Signal: Podcast Formats & Audiences' - startOffset: 72 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=72 - endOffset: 124 -- name: 'From Academia to Industry: Biology Research, Python, and PyData' - startOffset: 124 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=124 - endOffset: 207 -- name: 'Early Industry Work: DataCamp Curriculum and Product Roles' - startOffset: 207 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=207 - endOffset: 237 -- name: 'Transition to Freelance: Consulting, Teaching, and DevRel' - startOffset: 237 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=237 - endOffset: 431 -- name: 'Consulting vs Advisory: Hands‑On Coding and Organizational Advice' - startOffset: 431 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=431 - endOffset: 504 -- name: 'Driving AI Adoption: Loss Aversion and Dedicated Experimentation Time' - startOffset: 504 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=504 - endOffset: 568 -- name: 'Everyday LLM Use Cases: Summaries, Translation, and CSV Workflows' - startOffset: 568 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=568 - endOffset: 671 -- name: 'Prompting Best Practices: Role Prompts, Structured Output, and Timestamps' - startOffset: 671 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=671 - endOffset: 742 -- name: 'Transcript Workflows: Gemini, Descript, Loom and Automation Tools' - startOffset: 742 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=742 - endOffset: 836 -- name: 'Generator–Evaluator Pattern: Automated Quality Control for Outputs' - startOffset: 836 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=836 - endOffset: 1058 -- name: 'Scaling Transcript Pipelines: Automation with GitHub Actions' - startOffset: 1058 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1058 - endOffset: 1380 -- name: 'Evaluation Sets for LLMs: Gold Tests, Size, Cost, and Representativeness' - startOffset: 1380 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1380 - endOffset: 1603 -- name: 'Failure Analysis: Categorizing Errors and Prioritizing Retrieval Fixes' - startOffset: 1603 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1603 - endOffset: 1658 -- name: 'Vibe Coding & Monitoring: Logging, Traces, and Debuggable MVPs' - startOffset: 1658 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1658 - endOffset: 1916 -- name: 'Developer Tools & Assistants: GitHub Copilot, Cursor, and IDE Agents' - startOffset: 1916 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1916 - endOffset: 1994 -- name: 'Embedded Agents in Workflows: Slack Integration and Proactive Assistants' - startOffset: 1994 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1994 - endOffset: 2412 -- name: 'Agentic Value Beyond Chat: Actions, Documents, and Automation' - startOffset: 2412 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2412 - endOffset: 2666 -- name: 'Prioritizing RAG: Quick Business Wins with Chunking and Embeddings' - startOffset: 2666 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2666 - endOffset: 2900 -- name: 'Chunking Strategies: Fixed Length, Sliding Windows, and Context Rot' - startOffset: 2900 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2900 - endOffset: 3019 -- name: 'When to Add Tooling: Moving from RAG to Agents and Tool Calls' - startOffset: 3019 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3019 - endOffset: 3214 -- name: 'Practical Build: Email Assistant Example using Gmail API + RAG' - startOffset: 3214 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3214 - endOffset: 3381 -- name: 'Four‑Step Framework for Agents: Problem, Start Small, Data, Evaluation' - startOffset: 3381 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3381 - endOffset: 3461 -- name: 'Memory Design: Retrieval‑Based Memory vs Multi‑Turn Conversation Memory' - startOffset: 3461 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3461 - endOffset: 3655 -- name: 'Episode Wrap‑Up: Key Takeaways, Courses, and Next Steps' - startOffset: 3655 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3655 - endOffset: 3690 --- Links: diff --git a/_podcast/s08e01-visualising-machine-learning.md b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md similarity index 96% rename from _podcast/s08e01-visualising-machine-learning.md rename to _podcast/visualizing-machine-learning-concepts-to-explain-ml.md index 5136fb79..09170cd5 100644 --- a/_podcast/s08e01-visualising-machine-learning.md +++ b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md @@ -1,40 +1,124 @@ --- +title: 'Using Visualizations to Explain Machine Learning: Build Intuition with kDimensions, Figma & Templates' +short: Using Visualizations to Explain Machine Learning +season: 8 episode: 1 guests: - meoramer -intro: 'How do you teach machine learning so people build intuition before diving - into math? In this episode, Meor Amer—educator, author, and Developer Relations at - Cohere—walks through a visual-first approach to machine learning that makes concepts - accessible and actionable. Drawing on his journey from bioengineering and telecom - analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, - Meor explains why visual machine learning and dimensionality reduction matter and - how templates can scale understanding.

We cover practical workflows: generating - ideas (visualize the verb, use metaphors like the catapult and airplane), design - constraints that spark creativity, and a sketchbook → Figma pipeline for engineers - that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn - visuals, how to map ML problems (classification, regression, clustering, anomaly, - RL) to templates, and hands‑on learning techniques—consume with intent, break and - modify code. He also discusses monetizing visual design services and turning articles - into key visuals using 4–5 keywords.

Listen to learn concrete techniques - for ML visualization, Figma for engineers, and creating reusable templates that - build intuition and make machine learning teachable.' -topics: -- machine learning -- education +image: images/podcast/s08e01-visualising-machine-learning.jpg ids: anchor: Visualising-Machine-Learning---Meor-Amer-e1g7iri youtube: OuCuk-7RHjM -image: images/podcast/s08e01-visualising-machine-learning.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Visualising-Machine-Learning---Meor-Amer-e1g7iri apple: https://podcasts.apple.com/us/podcast/visualising-machine-learning-meor-amer/id1541710331?i=1000555246590 spotify: https://open.spotify.com/episode/032NhEphm5QDdDFDUIypOL youtube: https://www.youtube.com/watch?v=OuCuk-7RHjM -season: 8 -short: Visualising Machine Learning -title: 'Visualize Machine Learning: Build Intuition with kDimensions, Figma & Templates' -description: Discover kDimensions and Figma templates to visualize machine learning, - build intuition before the math, map ML problems, and create shareable visuals. + +description: Discover kDimensions and Figma templates to visualize machine learning, build intuition before the math, map ML problems, and create shareable visuals +intro: 'How do you teach machine learning so people build intuition before diving into math? In this episode, Meor Amer—educator, author, and Developer Relations at Cohere—walks through a visual-first approach to machine learning that makes concepts accessible and actionable. Drawing on his journey from bioengineering and telecom analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, Meor explains why visual machine learning and dimensionality reduction matter and how templates can scale understanding.

We cover practical workflows: generating ideas (visualize the verb, use metaphors like the catapult and airplane), design constraints that spark creativity, and a sketchbook → Figma pipeline for engineers that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn visuals, how to map ML problems (classification, regression, clustering, anomaly, RL) to templates, and hands‑on learning techniques—consume with intent, break and modify code. He also discusses monetizing visual design services and turning articles into key visuals using 4–5 keywords.

Listen to learn concrete techniques for ML visualization, Figma for engineers, and creating reusable templates that build intuition and make machine learning teachable.' +topics: +- machine learning +- education +dateadded: 2022-03-26 + +duration: PT00H59M45S + +quotableClips: +- name: Episode Introduction & Visual ML Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=0 + endOffset: 116 +- name: Posting Cadence & Visuals on LinkedIn + startOffset: 116 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=116 + endOffset: 177 +- name: 'Career Journey: Bioengineering → Telecom Analytics → Self‑employment' + startOffset: 177 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=177 + endOffset: 375 +- name: 'kDimensions: Name & Visual Dimensionality Reduction' + startOffset: 375 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=375 + endOffset: 532 +- name: Jack Butcher Influence & Visual Engineering Principles + startOffset: 532 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=532 + endOffset: 700 +- name: 'Purpose of Visuals: Build Intuition Before Math' + startOffset: 700 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=700 + endOffset: 852 +- name: 'Design Constraints: Creativity Through Color & Shape Limits' + startOffset: 852 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=852 + endOffset: 1053 +- name: 'Idea Generation: Visualize the Verb & Use Metaphors' + startOffset: 1053 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1053 + endOffset: 1286 +- name: Drift Visualized (Catapult Metaphor) & Data‑centric AI Airplane Analogy + startOffset: 1286 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1286 + endOffset: 1447 +- name: 'Creative Process: Longlist → Shortlist → Brainstorming' + startOffset: 1447 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1447 + endOffset: 1826 +- name: 'Capturing Ideas: Sketchbook, Notes & Quick Logging' + startOffset: 1826 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1826 + endOffset: 1874 +- name: 'Tools: Figma for Engineers & Geometric Shape Workflow' + startOffset: 1874 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1874 + endOffset: 2011 +- name: 'From Sketch to Figma: Drafting, Asset Reuse & Iteration' + startOffset: 2011 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2011 + endOffset: 2132 +- name: 'Design Advice: Prioritize Message Over Aesthetics; Start Posting' + startOffset: 2132 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2132 + endOffset: 2450 +- name: 'Learning Technique: Consume with Intent to Teach — "What If?" Questions' + startOffset: 2450 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2450 + endOffset: 2617 +- name: 'Hands‑on Learning: Break and Modify Code to Understand ML' + startOffset: 2617 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2617 + endOffset: 2687 +- name: 'Monetization: Visual Design Services for Startups & Content Creators' + startOffset: 2687 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2687 + endOffset: 2940 +- name: 'Content Design: Turn Articles into Key Visuals (Extract 4–5 Keywords)' + startOffset: 2940 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2940 + endOffset: 3056 +- name: 'Visualization Techniques: Contrast, Balance & Slider Metaphors' + startOffset: 3056 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3056 + endOffset: 3246 +- name: 'Mapping ML Problems to Visual Templates: Classification, Regression, Anomaly, + Clustering, RL' + startOffset: 3246 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3246 + endOffset: 3361 +- name: 'Book Overview: Visual Introduction to Deep Learning (Neuron‑by‑Neuron)' + startOffset: 3361 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3361 + endOffset: 3536 +- name: 'Book Workflow: Visual‑first Layout with Concise Text' + startOffset: 3536 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3536 + endOffset: 3612 +- name: 'Closing: kDimensions, Book Links & Contact Information' + startOffset: 3612 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3612 + endOffset: 3585 + transcript: - header: Episode Introduction & Visual ML Overview - header: Posting Cadence & Visuals on LinkedIn @@ -870,102 +954,6 @@ transcript: sec: 3701 time: '1:01:41' who: Meor -dateadded: '2022-03-26' -duration: PT00H59M45S -quotableClips: -- name: Episode Introduction & Visual ML Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=0 - endOffset: 116 -- name: Posting Cadence & Visuals on LinkedIn - startOffset: 116 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=116 - endOffset: 177 -- name: 'Career Journey: Bioengineering → Telecom Analytics → Self‑employment' - startOffset: 177 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=177 - endOffset: 375 -- name: 'kDimensions: Name & Visual Dimensionality Reduction' - startOffset: 375 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=375 - endOffset: 532 -- name: Jack Butcher Influence & Visual Engineering Principles - startOffset: 532 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=532 - endOffset: 700 -- name: 'Purpose of Visuals: Build Intuition Before Math' - startOffset: 700 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=700 - endOffset: 852 -- name: 'Design Constraints: Creativity Through Color & Shape Limits' - startOffset: 852 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=852 - endOffset: 1053 -- name: 'Idea Generation: Visualize the Verb & Use Metaphors' - startOffset: 1053 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1053 - endOffset: 1286 -- name: Drift Visualized (Catapult Metaphor) & Data‑centric AI Airplane Analogy - startOffset: 1286 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1286 - endOffset: 1447 -- name: 'Creative Process: Longlist → Shortlist → Brainstorming' - startOffset: 1447 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1447 - endOffset: 1826 -- name: 'Capturing Ideas: Sketchbook, Notes & Quick Logging' - startOffset: 1826 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1826 - endOffset: 1874 -- name: 'Tools: Figma for Engineers & Geometric Shape Workflow' - startOffset: 1874 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1874 - endOffset: 2011 -- name: 'From Sketch to Figma: Drafting, Asset Reuse & Iteration' - startOffset: 2011 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2011 - endOffset: 2132 -- name: 'Design Advice: Prioritize Message Over Aesthetics; Start Posting' - startOffset: 2132 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2132 - endOffset: 2450 -- name: 'Learning Technique: Consume with Intent to Teach — "What If?" Questions' - startOffset: 2450 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2450 - endOffset: 2617 -- name: 'Hands‑on Learning: Break and Modify Code to Understand ML' - startOffset: 2617 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2617 - endOffset: 2687 -- name: 'Monetization: Visual Design Services for Startups & Content Creators' - startOffset: 2687 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2687 - endOffset: 2940 -- name: 'Content Design: Turn Articles into Key Visuals (Extract 4–5 Keywords)' - startOffset: 2940 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2940 - endOffset: 3056 -- name: 'Visualization Techniques: Contrast, Balance & Slider Metaphors' - startOffset: 3056 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3056 - endOffset: 3246 -- name: 'Mapping ML Problems to Visual Templates: Classification, Regression, Anomaly, - Clustering, RL' - startOffset: 3246 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3246 - endOffset: 3361 -- name: 'Book Overview: Visual Introduction to Deep Learning (Neuron‑by‑Neuron)' - startOffset: 3361 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3361 - endOffset: 3536 -- name: 'Book Workflow: Visual‑first Layout with Concise Text' - startOffset: 3536 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3536 - endOffset: 3612 -- name: 'Closing: kDimensions, Book Links & Contact Information' - startOffset: 3612 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3612 - endOffset: 3585 --- Links: diff --git a/_posts/2025-08-16-ultimate-list-of-20-free-online-courses-on-machine-learning.md b/_posts/2025-08-16-free-machine-learning-courses.md similarity index 100% rename from _posts/2025-08-16-ultimate-list-of-20-free-online-courses-on-machine-learning.md rename to _posts/2025-08-16-free-machine-learning-courses.md diff --git a/scripts/generate_central_narrative_podcasts.py b/scripts/generate_central_narrative_podcasts.py new file mode 100755 index 00000000..e164063a --- /dev/null +++ b/scripts/generate_central_narrative_podcasts.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +""" +Generate titles for podcast episodes using OpenAI API. + +This script takes a podcast page, finds the timestamp file from podcast-timestamps folder, +and generates a SEO-optimized title for the podcast episode. + +Usage: + python generate_title_podcasts.py [--update] [--api-key YOUR_KEY] + python generate_title_podcasts.py --all-in-dir _podcast/ --update + python generate_title_podcasts.py --file-list podcasts.txt --update + +Example: + python generate_title_podcasts.py _podcast/s01e02-processes.md --update +""" + +import os +import sys +import argparse +from pathlib import Path +from typing import List +from openai import OpenAI + + +DEFAULT_PROMPT = """You are an expert creating a central narrative for a podcast episode. + +Task: Determine the single most important idea that unifies all major themes, segments, and discussions in a podcast episode. + +REQUIREMENTS: +- Scan all timestamps +- Separate context from core +- Identify the dominant through-line and key themes +- Phrase the center as a clear, high-level theme + +TIMESTAMPS: +{timestamps_content} + +OUTPUT: Generate ONLY the central narrative text. +""" + + +def get_timestamps_file(podcast_file_path): + """Get the timestamp file path for a podcast.""" + script_dir = Path(__file__).parent + project_root = script_dir.parent + timestamps_dir = project_root / 'podcast-timestamps' + + # Use podcast filename (without .md) for the timestamp file + podcast_name = podcast_file_path.stem # filename without extension + timestamp_file = timestamps_dir / f"{podcast_name}.txt" + + return timestamp_file if timestamp_file.exists() else None + + +def generate_title(timestamps_content, api_key=None): + """Generate title using OpenAI API.""" + # Initialize OpenAI client + if api_key: + client = OpenAI(api_key=api_key) + else: + # Will use OPENAI_API_KEY environment variable + client = OpenAI() + + # Format the prompt with all the information + prompt = DEFAULT_PROMPT.format( + timestamps_content=timestamps_content, + ) + + print(f"Prompt size: {len(prompt)} characters") + print(f" - Timestamps: {len(timestamps_content)} characters") + print() + + # Call OpenAI API + response = client.responses.create( + model="gpt-5-mini", # Using gpt-5-mini + input=prompt, + ) + + title = response.output_text.strip() + + # Remove quotes if present + if title.startswith('"') and title.endswith('"'): + title = title[1:-1] + if title.startswith("'") and title.endswith("'"): + title = title[1:-1] + + return title + + +def update_podcast_file(file_path, title): + """Update the podcast file with the generated title.""" + # Ensure title has no quotes (strip if present) + title = title.strip() + if title.startswith('"') and title.endswith('"'): + title = title[1:-1] + if title.startswith("'") and title.endswith("'"): + title = title[1:-1] + + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + # Find and replace the title line + found = False + for i, line in enumerate(lines): + if line.strip().startswith('title:'): + # Always format as: title: "title-here" + indent = len(line) - len(line.lstrip()) + lines[i] = ' ' * indent + f'title: "{title}"\n' + found = True + break + + if not found: + return False + + # Write back to file + with open(file_path, 'w', encoding='utf-8') as f: + f.writelines(lines) + + return True + + +def get_project_root(): + """Get the project root directory (parent of scripts directory).""" + script_dir = Path(__file__).parent + return script_dir.parent + + +def resolve_podcast_path(podcast_file: str) -> Path: + """Resolve podcast file path relative to project root.""" + file_path = Path(podcast_file) + if file_path.exists(): + return file_path + + # Try relative to project root + project_root = get_project_root() + file_path = project_root / podcast_file + if file_path.exists(): + return file_path + + return None + + +def get_podcast_files_from_args(args) -> List[Path]: + """Get list of podcast files from command line arguments.""" + files = [] + + if args.file_list: + # Read from file + list_file = Path(args.file_list) + if not list_file.exists(): + project_root = get_project_root() + list_file = project_root / args.file_list + + if not list_file.exists(): + print(f"Error: File list not found: {args.file_list}", file=sys.stderr) + sys.exit(1) + + with open(list_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + file_path = resolve_podcast_path(line) + if file_path: + files.append(file_path) + else: + print(f"Warning: File not found: {line}", file=sys.stderr) + + elif args.all_in_dir: + # Get all .md files in directory + project_root = get_project_root() + dir_path = Path(args.all_in_dir) + if not dir_path.is_absolute(): + dir_path = project_root / args.all_in_dir + + if not dir_path.exists(): + print(f"Error: Directory not found: {args.all_in_dir}", file=sys.stderr) + sys.exit(1) + + files = sorted(dir_path.glob('*.md')) + + else: + # From command line arguments + for podcast_file in args.podcast_files: + file_path = resolve_podcast_path(podcast_file) + if file_path: + files.append(file_path) + else: + print(f"Error: File not found: {podcast_file}", file=sys.stderr) + + return files + + +def process_podcast_file(podcast_file: Path, api_key: str = None, update: bool = False, dry_run: bool = False) -> bool: + """Process a single podcast file to generate and optionally update the title.""" + print(f"Processing: {podcast_file.name}") + print("-" * 60) + + try: + # Get timestamp file + timestamp_file = get_timestamps_file(podcast_file) + if not timestamp_file: + print(f"Warning: Timestamp file not found: podcast-timestamps/{podcast_file.stem}.txt", file=sys.stderr) + timestamps_content = "No timestamps available." + else: + print(f"Found timestamps: {timestamp_file.relative_to(timestamp_file.parent.parent)}") + with open(timestamp_file, 'r', encoding='utf-8') as f: + timestamps_content = f.read().strip() + + print() + print("Generating title...") + print() + + # Generate title + title = generate_title(timestamps_content, api_key=api_key) + + print(f"Generated title ({len(title)} chars):") + print(f" {title}") + print() + + # Update file if requested + if update: + if dry_run: + print("\n[DRY RUN] Would update the file with the new title") + return True + else: + success = update_podcast_file(podcast_file, title) + if success: + print(f"\n✓ File updated successfully!") + return True + else: + print(f"\n✗ Failed to update file", file=sys.stderr) + return False + else: + print("\nTo update the file, run with --update flag") + return True + + except Exception as e: + print(f"\nError: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Generate titles for podcast episodes using OpenAI API', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate title and display it + python generate_title_podcasts.py _podcast/s01e02-processes.md + + # Generate and update the file + python generate_title_podcasts.py _podcast/s01e02-processes.md --update + + # Process multiple files + python generate_title_podcasts.py _podcast/episode1.md _podcast/episode2.md --update + + # Process all files in a directory + python generate_title_podcasts.py --all-in-dir _podcast/ --update + + # Read file list from a text file + python generate_title_podcasts.py --file-list podcasts.txt --update + + # Use custom API key + python generate_title_podcasts.py _podcast/s01e02-processes.md --api-key sk-... + + # Dry run to see what would be done + python generate_title_podcasts.py --all-in-dir _podcast/ --dry-run + """ + ) + + parser.add_argument('podcast_files', nargs='*', help='Podcast markdown files to process') + parser.add_argument('--file-list', help='Text file containing list of podcast files (one per line)') + parser.add_argument('--all-in-dir', help='Process all .md files in the specified directory') + parser.add_argument('--update', action='store_true', help='Update the file with generated title') + parser.add_argument('--api-key', help='OpenAI API key (or set OPENAI_API_KEY env var)') + parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes') + + args = parser.parse_args() + + # Validate arguments + if not args.podcast_files and not args.file_list and not args.all_in_dir: + parser.error("Must provide podcast files, --file-list, or --all-in-dir") + + # Get list of files to process + files = get_podcast_files_from_args(args) + + if not files: + print("Error: No valid podcast files found", file=sys.stderr) + sys.exit(1) + + print(f"Found {len(files)} podcast file(s) to process") + if args.dry_run: + print("[DRY RUN MODE - No changes will be made]") + print() + + # Use API key from environment if not provided + api_key = args.api_key or os.getenv('OPENAI_API_KEY') + + # Process each file + successful = 0 + failed = 0 + + for i, podcast_file in enumerate(files, 1): + print(f"\n[{i}/{len(files)}] ", end='') + success = process_podcast_file( + podcast_file, + api_key=api_key, + update=args.update, + dry_run=args.dry_run + ) + + if success: + successful += 1 + else: + failed += 1 + + # Summary + print("\n" + "=" * 60) + print(f"Summary: {successful} successful, {failed} failed") + + if failed > 0: + sys.exit(1) + + +if __name__ == '__main__': + main() + diff --git a/scripts/podcasts2.txt b/scripts/podcasts2.txt new file mode 100644 index 00000000..b88d209b --- /dev/null +++ b/scripts/podcasts2.txt @@ -0,0 +1,189 @@ +https://datatalks.club/podcast/s01e01-roles.html +https://datatalks.club/podcast/s01e02-processes.html +https://datatalks.club/podcast/s01e03-building-ds-team.html +https://datatalks.club/podcast/s01e04-standing-out-as-a-data-scientist.html +https://datatalks.club/podcast/s01e05-mentoring.html +https://datatalks.club/podcast/s02e01-writing.html +https://datatalks.club/podcast/s02e02-developer-advocacy.html +https://datatalks.club/podcast/s02e03-open-source.html +https://datatalks.club/podcast/s02e04-mlops.html +https://datatalks.club/podcast/s02e05-feature-stores.html +https://datatalks.club/podcast/s02e06-decision-optimization.html +https://datatalks.club/podcast/s02e07-abc-data-science.html +https://datatalks.club/podcast/s02e08-personal-branding.html +https://datatalks.club/podcast/s02e09-roles-skills-monetizing-ml.html +https://datatalks.club/podcast/s02e10-public-speaking.html +https://datatalks.club/podcast/s02e11-dataops.html +https://datatalks.club/podcast/s02e12-communities.html +https://datatalks.club/podcast/s03e01-from-pm-to-ds.html +https://datatalks.club/podcast/s03e02-from-analytics-to-data-science.html +https://datatalks.club/podcast/s03e03-data-observability.html +https://datatalks.club/podcast/s03e04-effective-communication-with-business.html +https://datatalks.club/podcast/s03e04-interviewing-300-data-scientists.html +https://datatalks.club/podcast/s03e06-from-physics-to-machine-learning.html +https://datatalks.club/podcast/s03e07-market-yourself.html +https://datatalks.club/podcast/s03e08-data-led-professional.html +https://datatalks.club/podcast/s03e09-what-data-scientists-dont-mention.html +https://datatalks.club/podcast/s03e10-data-governance.html +https://datatalks.club/podcast/s03e11-analytics-engineer.html +https://datatalks.club/podcast/s04e01-from-swe-to-ml.html +https://datatalks.club/podcast/s04e02-build-your-own-data-pipeline.html +https://datatalks.club/podcast/s04e03-big-data-engineer-vs-data-scientist.html +https://datatalks.club/podcast/s04e04-ml-startup.html +https://datatalks.club/podcast/s04e05-running-from-complexity.html +https://datatalks.club/podcast/s04e06-humans-in-the-loop.html +https://datatalks.club/podcast/s04e07-launching-a-startup.html +https://datatalks.club/podcast/s04e08-freelancing.html +https://datatalks.club/podcast/s04e09-chief-data-officer.html +https://datatalks.club/podcast/s05e01-mastering-algorithms-and-data-structures.html +https://datatalks.club/podcast/s05e02-data-engineering-acronyms.html +https://datatalks.club/podcast/s05e03-metrics-and-kpis.html +https://datatalks.club/podcast/s05e04-introducing-data-science-in-startups.html +https://datatalks.club/podcast/s05e05-researchers-vs-engineers.html +https://datatalks.club/podcast/s05e06-building-and-leading-data-teams.html +https://datatalks.club/podcast/s05e07-ml-vs-analytics.html +https://datatalks.club/podcast/s05e08-the-last-mile-in-data.html +https://datatalks.club/podcast/s05e09-business-acumen.html +https://datatalks.club/podcast/s06e01-solopreneur.html +https://datatalks.club/podcast/s06e02-non-technical-interviews.html +https://datatalks.club/podcast/s06e03-manager-vs-expert.html +https://datatalks.club/podcast/s06e04-becoming-a-data-product-manager.html +https://datatalks.club/podcast/s06e05-post-doctoral-research.html +https://datatalks.club/podcast/s06e06-from-academia-to-industry.html +https://datatalks.club/podcast/s06e07-product-management-for-machine-learning.html +https://datatalks.club/podcast/s06e08-nlp-teams.html +https://datatalks.club/podcast/s06e09-data-science-manager.html +https://datatalks.club/podcast/s07e01-datatalksclub-behind-the-scenes.html +https://datatalks.club/podcast/s07e02-recruiting-data-professionals.html +https://datatalks.club/podcast/s07e03-product-management-essentials.html +https://datatalks.club/podcast/s07e04-career-coaching.html +https://datatalks.club/podcast/s07e05-machine-learning-system-design-interview.html +https://datatalks.club/podcast/s07e06-ab-testing.html +https://datatalks.club/podcast/s07e07-becoming-a-data-engineering-manager.html +https://datatalks.club/podcast/s07e08-from-data-science-to-data-engineering.html +https://datatalks.club/podcast/s07e09-from-math-teacher-to-analytics-engineer.html +https://datatalks.club/podcast/s08e01-visualising-machine-learning.html +https://datatalks.club/podcast/s08e02-hacking-your-data-career.html +https://datatalks.club/podcast/s08e03-innovation-and-design-for-machine-learning.html +https://datatalks.club/podcast/s08e04-machine-learning-and-personalization-in-healthcare.html +https://datatalks.club/podcast/s08e05-storytime-for-dataops.html +https://datatalks.club/podcast/s08e06-recruiting-data-engineers.html +https://datatalks.club/podcast/s08e07-from-roasting-coffee-to-backend-development.html +https://datatalks.club/podcast/s08e08-teaching-data-engineers.html +https://datatalks.club/podcast/s08e09-from-academia-to-data-analytics-and-engineering.html +https://datatalks.club/podcast/s09e01-machine-learning-in-marketing.html +https://datatalks.club/podcast/s09e02-using-data-for-asteroid-mining.html +https://datatalks.club/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).html +https://datatalks.club/podcast/s09e04-freelancing-and-consulting-with-data-engineering.html +https://datatalks.club/podcast/s09e05-data-scientists-at-work.html +https://datatalks.club/podcast/s09e06-developer-advocacy-engineer-for-open-source.html +https://datatalks.club/podcast/s09e07-designing-data-science-organization.html +https://datatalks.club/podcast/s09e08-from-open-source-maintainer-to-founder.html +https://datatalks.club/podcast/s09e09-hiring-data-science-talent.html +https://datatalks.club/podcast/s10e01-data-science-for-social-impact.html +https://datatalks.club/podcast/s10e02-decoding-data-science-job-descriptions.html +https://datatalks.club/podcast/s10e03-mlops-architect.html +https://datatalks.club/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.html +https://datatalks.club/podcast/s10e05-growing-data-engineering-team-in-scale-up.html +https://datatalks.club/podcast/s10e06-data-mesh-101.html +https://datatalks.club/podcast/s10e07-dataset-creation-and-curation.html +https://datatalks.club/podcast/s10e08-leading-data-research.html +https://datatalks.club/podcast/s10e09-responsible-and-explainable-ai.html +https://datatalks.club/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.html +https://datatalks.club/podcast/s11e02-data-science-career-development.html +https://datatalks.club/podcast/s11e03-from-data-science-to-dataops.html +https://datatalks.club/podcast/s11e04-large-scale-entity-resolution.html +https://datatalks.club/podcast/s11e05-building-data-science-practice.html +https://datatalks.club/podcast/s11e06-product-owners-in-data-science.html +https://datatalks.club/podcast/s11e07-from-digital-marketing-to-analytics-engineering.html +https://datatalks.club/podcast/s11e08-technical-writing-and-data-journalism.html +https://datatalks.club/podcast/s11e09-teaching-and-mentoring-in-data-analytics.html +https://datatalks.club/podcast/s12e01-from-software-engineer-to-data-science-manager.html +https://datatalks.club/podcast/s12e02-business-skills-for-data-professionals.html +https://datatalks.club/podcast/s12e03-data-centric-ai.html +https://datatalks.club/podcast/s12e04-doing-software-engineering-in-academia.html +https://datatalks.club/podcast/s12e05-indie-hacking.html +https://datatalks.club/podcast/s12e06-preparing-for-data-science-interview.html +https://datatalks.club/podcast/s12e07-navigating-career-changes-in-machine-learning.html +https://datatalks.club/podcast/s12e09-staff-ai-engineer.html +https://datatalks.club/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.html +https://datatalks.club/podcast/s13e02-analytics-for-better-world.html +https://datatalks.club/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.html +https://datatalks.club/podcast/s13e04-starting-consultancy-in-data-space.html +https://datatalks.club/podcast/s13e05-se4ml-software-engineering-for-machine-learning.html +https://datatalks.club/podcast/s13e06-secret-sauce-of-data-science-management.html +https://datatalks.club/podcast/s13e07-mastering-self-learning-in-machine-learning.html +https://datatalks.club/podcast/s13e08-navigating-industrial-data-challenges.html +https://datatalks.club/podcast/s13e09-building-open-source-nlp-tool.html +https://datatalks.club/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.html +https://datatalks.club/podcast/s14e02-practical-data-privacy.html +https://datatalks.club/podcast/s14e03-data-strategy-key-principles-and-best-practices.html +https://datatalks.club/podcast/s14e04-data-access-management.html +https://datatalks.club/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.html +https://datatalks.club/podcast/s14e06-data-developer-relations.html +https://datatalks.club/podcast/s14e07-from-mlops-to-dataops.html +https://datatalks.club/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.html +https://datatalks.club/podcast/s14e09-interpretable-ai-and-ml.html +https://datatalks.club/podcast/s15e01-why-machine-learning-design-broken.html +https://datatalks.club/podcast/s15e02-investing-in-open-source-data-tools.html +https://datatalks.club/podcast/s15e03-llms-for-everyone.html +https://datatalks.club/podcast/s15e04-good-bad-and-ugly-of-gpt.html +https://datatalks.club/podcast/s15e05-mastering-data-engineering-as-remote-worker.html +https://datatalks.club/podcast/s15e06-democratizing-causality.html +https://datatalks.club/podcast/s15e07-pragmatic-and-standardized-mlops.html +https://datatalks.club/podcast/s15e08-from-data-manager-to-data-architect.html +https://datatalks.club/podcast/s15e09-data-engineering-for-fraud-prevention.html +https://datatalks.club/podcast/s16e01-datatalks-club-anniversary-interview.html +https://datatalks.club/podcast/s16e02-bridging-data-science-and-healthcare.html +https://datatalks.club/podcast/s16e03-collaborative-data-science-in-business.html +https://datatalks.club/podcast/s16e04-from-marketing-to-product-owner-in-search.html +https://datatalks.club/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.html +https://datatalks.club/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.html +https://datatalks.club/podcast/s16e07-cracking-code-machine-learning-made-understandable.html +https://datatalks.club/podcast/s16e08-ai-for-digital-health.html +https://datatalks.club/podcast/s16e09-become-data-freelancer.html +https://datatalks.club/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.html +https://datatalks.club/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.html +https://datatalks.club/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.html +https://datatalks.club/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.html +https://datatalks.club/podcast/s17e05-machine-learning-engineering-in-finance.html +https://datatalks.club/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.html +https://datatalks.club/podcast/s17e07-make-impact-through-volunteering-open-source-work.html +https://datatalks.club/podcast/s17e08-building-machine-learning-products.html +https://datatalks.club/podcast/s17e09-building-production-search-systems.html +https://datatalks.club/podcast/s18e01-inclusive-data-leadership-coaching.html +https://datatalks.club/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.html +https://datatalks.club/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.html +https://datatalks.club/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.html +https://datatalks.club/podcast/s18e05-community-building-and-teaching-in-ai-tech.html +https://datatalks.club/podcast/s18e07-building-domestic-risk-assessment-tool.html +https://datatalks.club/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.html +https://datatalks.club/podcast/s19e01-using-data-to-create-liveable-cities.html +https://datatalks.club/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.html +https://datatalks.club/podcast/s19e03-datatalks-club-anniversary-podcast.html +https://datatalks.club/podcast/s19e04-mlops-as-team.html +https://datatalks.club/podcast/s19e05-large-hadron-collider-and-mentorship.html +https://datatalks.club/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.html +https://datatalks.club/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.html +https://datatalks.club/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.html +https://datatalks.club/podcast/s19e09-linguistics-and-fairness.html +https://datatalks.club/podcast/s20e01-trends-in-ai-infrastructure.html +https://datatalks.club/podcast/s20e02-competitive-machine-learning-and-teaching.html +https://datatalks.club/podcast/s20e03-trends-in-data-engineering.html +https://datatalks.club/podcast/s20e04-mlops-in-corporations-and-startups.html +https://datatalks.club/podcast/s20e05-data-intensive-ai.html +https://datatalks.club/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.html +https://datatalks.club/podcast/s20e07-build-strong-career-in-data.html +https://datatalks.club/podcast/s20e08-from-hackathons-to-developer-advocacy.html +https://datatalks.club/podcast/s20e09-taking-your-freelance-career-to-next-level.html +https://datatalks.club/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.html +https://datatalks.club/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.html +https://datatalks.club/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.html +https://datatalks.club/podcast/s21e05-from-astronomy-to-applied-ml.html +https://datatalks.club/podcast/s21e07-lessons-from-two-decades-of-ai.html +https://datatalks.club/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.html +https://datatalks.club/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.html +https://datatalks.club/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.html +https://datatalks.club/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.html +https://datatalks.club/podcast/s22e03-from-biotechnology-to-bioinformatics-software.html +https://datatalks.club/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.html diff --git a/scripts/process_podcast_intros.py b/scripts/process_podcast_intros.py new file mode 100755 index 00000000..e9ff96e0 --- /dev/null +++ b/scripts/process_podcast_intros.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +""" +Generate SEO-optimized URL slugs from podcast intro text using OpenAI API. + +This script extracts intro text from podcast markdown files and sends it to OpenAI +to generate SEO-optimized URL slugs based on the content. + +Usage: + python process_podcast_intros.py [--prompt PROMPT] [--api-key YOUR_KEY] + python process_podcast_intros.py --all-in-dir _podcast/ + python process_podcast_intros.py --file-list podcasts.txt + +Example: + python process_podcast_intros.py _podcast/s01e02-processes.md +""" + +import os +import sys +import yaml +import argparse +import re +from pathlib import Path +from typing import List, Optional +from openai import OpenAI + + +DEFAULT_PROMPT = """You are an SEO expert creating SEO-optimized URL slugs. + +You are given intro text for a podcast episode. + +TASK: Based on the intro text, generate a SEO-optimized URL slug for the podcast episode. + +REQUIREMENTS: +- Optimize for SEO with relevant keywords +- Use lowercase letters and hyphens only +- Keep slugs short, clean, and descriptive +- Reflect the main search intent of the episode +- Avoid keyword stuffing or repeating terms +- No dates, stop words (a, the, of, to), or guest names unless essential +- Must map clearly to the episode topic + +INTRO TEXT: +{intro_text} + +OUTPUT: Generate ONLY the URL slug text. +""" + + +def parse_podcast_file(file_path): + """Parse a podcast markdown file and extract front matter and content.""" + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Extract front matter + if content.startswith('---\n'): + match = re.search(r'\n---\n', content[4:]) + if match: + end_pos = match.start() + 4 + frontmatter_text = content[4:end_pos] + rest_content = content[end_pos + 5:] + + try: + frontmatter = yaml.safe_load(frontmatter_text) + return frontmatter, rest_content, content + except yaml.YAMLError: + return None, content, content + + # No frontmatter found + return None, content, content + + +def get_intro_text(podcast_file_path): + """Extract intro text from a podcast file.""" + frontmatter, _, _ = parse_podcast_file(podcast_file_path) + + if not frontmatter: + return None + + intro = frontmatter.get('intro') + if not intro: + return None + + # Remove HTML breaks for processing + intro_clean = intro.replace('

', '\n\n').replace('
', '\n') + return intro_clean.strip() + + +def send_to_openai(intro_text: str, prompt_template: str, api_key: Optional[str] = None, model: str = "gpt-4o-mini"): + """Send intro text to OpenAI API with the given prompt.""" + # Initialize OpenAI client + if api_key: + client = OpenAI(api_key=api_key) + else: + # Will use OPENAI_API_KEY environment variable + client = OpenAI() + + # Format the prompt with the intro text + prompt = prompt_template.format(intro_text=intro_text) + + print(f"Sending request to OpenAI (model: {model})...") + print(f"Intro text length: {len(intro_text)} characters") + print() + + try: + # Call OpenAI API + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are an SEO expert that generates optimized URL slugs."}, + {"role": "user", "content": prompt} + ], + temperature=0.7 + ) + + result = response.choices[0].message.content.strip() + return result + + except Exception as e: + print(f"Error calling OpenAI API: {e}", file=sys.stderr) + raise + + +def rename_podcast_file(old_path: Path, new_slug: str, dry_run: bool = False) -> Optional[Path]: + """Rename a podcast file to match the generated slug.""" + # Just strip whitespace and quotes from the slug + slug = new_slug.strip().strip('"').strip("'") + + # Create new filename + new_filename = f"{slug}.md" + new_path = old_path.parent / new_filename + + # Check if the new file already exists + if new_path.exists() and new_path != old_path: + print(f"Warning: Target file already exists: {new_filename}", file=sys.stderr) + return None + + if dry_run: + print(f"[DRY RUN] Would rename: {old_path.name} -> {new_filename}") + return new_path + + try: + old_path.rename(new_path) + print(f"✓ Renamed: {old_path.name} -> {new_filename}") + return new_path + except Exception as e: + print(f"Error renaming file: {e}", file=sys.stderr) + return None + + +def get_project_root(): + """Get the project root directory (parent of scripts directory).""" + script_dir = Path(__file__).parent + return script_dir.parent + + +def resolve_podcast_path(podcast_file: str) -> Optional[Path]: + """Resolve podcast file path relative to project root.""" + file_path = Path(podcast_file) + if file_path.exists(): + return file_path + + # Try relative to project root + project_root = get_project_root() + file_path = project_root / podcast_file + if file_path.exists(): + return file_path + + return None + + +def get_podcast_files_from_args(args) -> List[Path]: + """Get list of podcast files from command line arguments.""" + files = [] + + if args.file_list: + # Read from file + list_file = Path(args.file_list) + if not list_file.exists(): + project_root = get_project_root() + list_file = project_root / args.file_list + + if not list_file.exists(): + print(f"Error: File list not found: {args.file_list}", file=sys.stderr) + sys.exit(1) + + with open(list_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + file_path = resolve_podcast_path(line) + if file_path: + files.append(file_path) + else: + print(f"Warning: File not found: {line}", file=sys.stderr) + + elif args.all_in_dir: + # Get all .md files in directory + project_root = get_project_root() + dir_path = Path(args.all_in_dir) + if not dir_path.is_absolute(): + dir_path = project_root / args.all_in_dir + + if not dir_path.exists(): + print(f"Error: Directory not found: {args.all_in_dir}", file=sys.stderr) + sys.exit(1) + + files = sorted(dir_path.glob('*.md')) + + else: + # From command line arguments + for podcast_file in args.podcast_files: + file_path = resolve_podcast_path(podcast_file) + if file_path: + files.append(file_path) + else: + print(f"Error: File not found: {podcast_file}", file=sys.stderr) + + return files + + +def process_podcast_file( + podcast_file: Path, + prompt_template: str, + api_key: Optional[str] = None, + model: str = "gpt-4o-mini", + output_file: Optional[Path] = None, + update: bool = False, + dry_run: bool = False +) -> bool: + """Process a single podcast file to extract intro and generate URL slug via OpenAI.""" + print(f"Processing: {podcast_file.name}") + print("-" * 60) + + try: + # Extract intro text + intro_text = get_intro_text(podcast_file) + + if not intro_text: + print("Warning: No intro text found in this podcast file", file=sys.stderr) + return False + + print(f"Found intro text ({len(intro_text)} characters):") + print(f" {intro_text[:200]}..." if len(intro_text) > 200 else f" {intro_text}") + print() + + # Send to OpenAI + result = send_to_openai(intro_text, prompt_template, api_key=api_key, model=model) + + print("Generated URL Slug:") + print("=" * 60) + print(result) + print("=" * 60) + print() + + # Save to file if requested + if output_file: + with open(output_file, 'w', encoding='utf-8') as f: + f.write(result) + print(f"✓ Result saved to: {output_file}") + + # Rename file if update is requested + if update: + new_path = rename_podcast_file(podcast_file, result, dry_run=dry_run) + if new_path and not dry_run: + print(f"✓ File renamed successfully") + elif dry_run: + print(f"[DRY RUN] File would be renamed") + else: + print(f"✗ Failed to rename file", file=sys.stderr) + return False + else: + print("\nTo rename the file, run with --update flag") + + return True + + except Exception as e: + print(f"\nError: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Generate SEO-optimized URL slugs from podcast intro text using OpenAI API', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate URL slug for a single podcast file + python process_podcast_intros.py _podcast/s01e02-processes.md + + # Generate and automatically rename file + python process_podcast_intros.py _podcast/s01e02-processes.md --update + + # Preview what would be renamed (dry run) + python process_podcast_intros.py --all-in-dir _podcast/ --update --dry-run + + # Process with custom prompt + python process_podcast_intros.py _podcast/s01e02-processes.md --prompt "Generate a short URL slug: {intro_text}" + + # Process multiple files and rename them + python process_podcast_intros.py _podcast/episode1.md _podcast/episode2.md --update + + # Process all files in a directory and rename + python process_podcast_intros.py --all-in-dir _podcast/ --update + + # Read file list from a text file + python process_podcast_intros.py --file-list podcasts.txt + + # Use custom API key + python process_podcast_intros.py _podcast/s01e02-processes.md --api-key sk-... + + # Save output to file + python process_podcast_intros.py _podcast/s01e02-processes.md --output result.txt + + # Use different model + python process_podcast_intros.py _podcast/s01e02-processes.md --model gpt-4o + """ + ) + + parser.add_argument('podcast_files', nargs='*', help='Podcast markdown files to process') + parser.add_argument('--file-list', help='Text file containing list of podcast files (one per line)') + parser.add_argument('--all-in-dir', help='Process all .md files in the specified directory') + parser.add_argument('--prompt', default=DEFAULT_PROMPT, help='Custom prompt template (use {intro_text} as placeholder)') + parser.add_argument('--api-key', help='OpenAI API key (or set OPENAI_API_KEY env var)') + parser.add_argument('--model', default='gpt-4o-mini', help='OpenAI model to use (default: gpt-4o-mini)') + parser.add_argument('--output', help='Output file to save results (only works for single file)') + parser.add_argument('--update', action='store_true', help='Automatically rename files to match generated slugs') + parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes') + + args = parser.parse_args() + + # Validate arguments + if not args.podcast_files and not args.file_list and not args.all_in_dir: + parser.error("Must provide podcast files, --file-list, or --all-in-dir") + + # Validate output file (only for single file) + output_file = None + if args.output: + files = get_podcast_files_from_args(args) + if len(files) > 1: + parser.error("--output can only be used when processing a single file") + output_file = Path(args.output) + + # Get list of files to process + files = get_podcast_files_from_args(args) + + if not files: + print("Error: No valid podcast files found", file=sys.stderr) + sys.exit(1) + + print(f"Found {len(files)} podcast file(s) to process") + if args.dry_run: + print("[DRY RUN MODE - No changes will be made]") + if args.update: + print("[UPDATE MODE - Files will be renamed to match generated slugs]") + print() + + # Use API key from environment if not provided + api_key = args.api_key or os.getenv('OPENAI_API_KEY') + + if not api_key: + print("Warning: No API key provided. Set OPENAI_API_KEY env var or use --api-key", file=sys.stderr) + + # Process each file + successful = 0 + failed = 0 + + for i, podcast_file in enumerate(files, 1): + print(f"\n[{i}/{len(files)}] ", end='') + success = process_podcast_file( + podcast_file, + prompt_template=args.prompt, + api_key=api_key, + model=args.model, + output_file=output_file if i == 1 else None, # Only save output for first file + update=args.update, + dry_run=args.dry_run + ) + + if success: + successful += 1 + else: + failed += 1 + + # Summary + print("\n" + "=" * 60) + print(f"Summary: {successful} successful, {failed} failed") + + if failed > 0: + sys.exit(1) + + +if __name__ == '__main__': + main() + From 374e2292b5a7fedecd8690f9e6dacba19620aeec Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Tue, 18 Nov 2025 11:36:35 +0100 Subject: [PATCH 2/9] All URLs updated --- ...-ecology-biodiversity-and-conservation.md} | 42 +- ...rid-cloud-on-prem-distributed-training.md} | 63 +- ...ading-with-python-and-machine-learning.md} | 43 +- ...research-and-career-growth-in-practice.md} | 32 +- ... bayesian-modeling-workflows-and-tools.md} | 43 +- ...elancer.md => becoming-data-freelancer.md} | 32 +- ...matics-worflows-tools-and-data-science.md} | 40 +- ...gineering-tooling-retrieval-evaluation.md} | 71 +- ...=> building-ai-digital-health-startups.md} | 46 +- ...lding-data-products-lead-data-scientist.md | 10 + ...building-domestic-risk-assessment-tool.md} | 39 +- ...ng-healthcare-machine-learning-systems.md} | 32 +- ... => building-production-search-systems.md} | 37 +- ...unity-building-and-teaching-in-ai-tech.md} | 40 +- ...y-market-demand-and-client-acquisition.md} | 40 +- ...oaching.md => data-leadership-coaching.md} | 38 +- ...ues.md => dataops-for-data-engineering.md} | 35 +- ...ble-data-community-3-years-anniversary.md} | 33 +- ...datatalksclub-scaling-and-free-courses.md} | 38 +- ...ss.md => fairness-in-ai-ml-engineering.md} | 48 +- ...finops.md => finops-for-data-engineers.md} | 35 +- ...search-to-data-engineering-freelancing.md} | 41 +- ...en-source-computer-vision-transformers.md} | 39 +- ...sion-research-to-autonomous-driving-ai.md} | 45 +- ...lancer-to-startup-open-source-products.md} | 30 +- ...ng-automation-open-source-volunteering.md} | 45 +- ...md => from-game-ai-to-modern-ai-agents.md} | 42 +- ...-science-research-software-engineering.md} | 39 +- ...om-marketing-to-product-owner-in-search.md | 7 + ...-machine-learning-and-data-engineering.md} | 35 +- ...uctor-data-to-applied-machine-learning.md} | 39 +- ...machine-learning-applied-ml-leadership.md} | 39 +- ...ive-ai-chatbots-in-production-security.md} | 39 +- ...ntered-ai-automatic-speech-recognition.md} | 43 +- ...e.md => interpretable-machine-learning.md} | 44 +- ...-in-tech-projects-skills-cv-networking.md} | 39 +- ...dmaster-to-production-ml-and-education.md} | 60 +- ...dge-graphs-and-llms-for-automotive-rnd.md} | 42 +- ...startups.md => lean-mlops-for-startups.md} | 37 +- ...dful-data-strategy-for-business-impact.md} | 41 +- ...=> mlops-and-ml-engineering-in-finance.md} | 43 +- ...lops-at-scale-reproducibility-adoption.md} | 39 +- ...ctor-databases-llms-semantic-retrieval.md} | 37 +- ...arning-freelancing-and-public-learning.md} | 30 +- ...eering-in-ai-for-data-ml-career-growth.md} | 34 +- ...-ml-tools-strategy-and-business-models.md} | 41 +- ...el-demofirst-education-and-open-source.md} | 39 +- ...ai-consulting-from-expertise-to-impact.md} | 38 +- ...d => practical-llm-engineering-and-rag.md} | 40 +- ...vector-search-embeddings-hybrid search.md} | 35 +- ....md => production-ready-ai-engineering.md} | 37 +- ...g-to-tesla-full-stack-data-engineering.md} | 33 +- ...-collaborative-data-science-in-business.md | 1404 ----------------- ...om-marketing-to-product-owner-in-search.md | 1065 ------------- ...d => trends-in-modern-data-engineering.md} | 39 +- ...veable-cities.md => urban-data-science.md} | 40 +- scripts/add_context_from_title.py | 205 +++ 57 files changed, 1793 insertions(+), 2989 deletions(-) rename _podcast/{to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md => ai-for-ecology-biodiversity-and-conservation.md} (65%) rename _podcast/{to-update/s20e01-trends-in-ai-infrastructure.md => ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md} (93%) rename _podcast/{to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md => algorithmic-trading-with-python-and-machine-learning.md} (95%) rename _podcast/{to-update/s20e07-build-strong-career-in-data.md => applied-llm-research-and-career-growth-in-practice.md} (96%) rename _podcast/{to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md => bayesian-modeling-workflows-and-tools.md} (96%) rename _podcast/{to-update/s16e09-become-data-freelancer.md => becoming-data-freelancer.md} (97%) rename _podcast/{to-update/s22e03-from-biotechnology-to-bioinformatics-software.md => bioinformatics-worflows-tools-and-data-science.md} (94%) rename _podcast/{to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md => building-agentic-ai-engineering-tooling-retrieval-evaluation.md} (92%) rename _podcast/{to-update/s16e08-ai-for-digital-health.md => building-ai-digital-health-startups.md} (95%) create mode 100644 _podcast/building-data-products-lead-data-scientist.md rename _podcast/{to-update/s18e07-building-domestic-risk-assessment-tool.md => building-domestic-risk-assessment-tool.md} (65%) rename _podcast/{to-update/s16e02-bridging-data-science-and-healthcare.md => building-healthcare-machine-learning-systems.md} (96%) rename _podcast/{to-update/s17e09-building-production-search-systems.md => building-production-search-systems.md} (96%) rename _podcast/{to-update/s18e05-community-building-and-teaching-in-ai-tech.md => community-building-and-teaching-in-ai-tech.md} (91%) rename _podcast/{to-update/s20e09-taking-your-freelance-career-to-next-level.md => data-freelancing-career-strategy-market-demand-and-client-acquisition.md} (92%) rename _podcast/{to-update/s18e01-inclusive-data-leadership-coaching.md => data-leadership-coaching.md} (96%) rename _podcast/{to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md => dataops-for-data-engineering.md} (90%) rename _podcast/{to-update/s16e01-datatalks-club-anniversary-interview.md => datatalksclub-building-sustainable-data-community-3-years-anniversary.md} (97%) rename _podcast/{to-update/s19e03-datatalks-club-anniversary-podcast.md => datatalksclub-scaling-and-free-courses.md} (95%) rename _podcast/{to-update/s19e09-linguistics-and-fairness.md => fairness-in-ai-ml-engineering.md} (94%) rename _podcast/{to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md => finops-for-data-engineers.md} (95%) rename _podcast/{to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md => from-academic-research-to-data-engineering-freelancing.md} (94%) rename _podcast/{to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md => from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md} (95%) rename _podcast/{to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md => from-computer-vision-research-to-autonomous-driving-ai.md} (94%) rename _podcast/{to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md => from-data-freelancer-to-startup-open-source-products.md} (97%) rename _podcast/{to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md => from-devops-to-data-engineering-automation-open-source-volunteering.md} (95%) rename _podcast/{to-update/s21e07-lessons-from-two-decades-of-ai.md => from-game-ai-to-modern-ai-agents.md} (94%) rename _podcast/{to-update/s19e05-large-hadron-collider-and-mentorship.md => from-large-hadron-collider-to-data-science-research-software-engineering.md} (95%) create mode 100644 _podcast/from-marketing-to-product-owner-in-search.md rename _podcast/{to-update/s21e05-from-astronomy-to-applied-ml.md => from-radio-astronomy-to-machine-learning-and-data-engineering.md} (96%) rename _podcast/{to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md => from-semiconductor-data-to-applied-machine-learning.md} (95%) rename _podcast/{to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md => from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md} (96%) rename _podcast/{to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md => generative-ai-chatbots-in-production-security.md} (94%) rename _podcast/{to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md => human-centered-ai-automatic-speech-recognition.md} (93%) rename _podcast/{to-update/s16e07-cracking-code-machine-learning-made-understandable.md => interpretable-machine-learning.md} (96%) rename _podcast/{to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md => job-search-strategy-in-tech-projects-skills-cv-networking.md} (96%) rename _podcast/{to-update/s20e02-competitive-machine-learning-and-teaching.md => kaggle-grandmaster-to-production-ml-and-education.md} (88%) rename _podcast/{to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md => knowledge-graphs-and-llms-for-automotive-rnd.md} (96%) rename _podcast/{to-update/s20e04-mlops-in-corporations-and-startups.md => lean-mlops-for-startups.md} (95%) rename _podcast/{to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md => mindful-data-strategy-for-business-impact.md} (95%) rename _podcast/{to-update/s17e05-machine-learning-engineering-in-finance.md => mlops-and-ml-engineering-in-finance.md} (96%) rename _podcast/{to-update/s19e04-mlops-as-team.md => mlops-at-scale-reproducibility-adoption.md} (94%) rename _podcast/{to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md => modern-search-systems-vector-databases-llms-semantic-retrieval.md} (96%) rename _podcast/{to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md => nonlinear-path-to-machine-learning-freelancing-and-public-learning.md} (95%) rename _podcast/{to-update/s17e07-make-impact-through-volunteering-open-source-work.md => open-source-and-volunteering-in-ai-for-data-ml-career-growth.md} (96%) rename _podcast/{to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md => open-source-ml-tools-strategy-and-business-models.md} (93%) rename _podcast/{to-update/s20e08-from-hackathons-to-developer-advocacy.md => practical-devrel-demofirst-education-and-open-source.md} (96%) rename _podcast/{to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md => practical-generative-ai-consulting-from-expertise-to-impact.md} (96%) rename _podcast/{to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md => practical-llm-engineering-and-rag.md} (95%) rename _podcast/{to-update/s17e08-building-machine-learning-products.md => production-ml-search-vector-search-embeddings-hybrid search.md} (96%) rename _podcast/{to-update/s20e05-data-intensive-ai.md => production-ready-ai-engineering.md} (94%) rename _podcast/{to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md => theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md} (96%) delete mode 100644 _podcast/to-update/s16e03-collaborative-data-science-in-business.md delete mode 100644 _podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md rename _podcast/{to-update/s20e03-trends-in-data-engineering.md => trends-in-modern-data-engineering.md} (90%) rename _podcast/{to-update/s19e01-using-data-to-create-liveable-cities.md => urban-data-science.md} (94%) create mode 100644 scripts/add_context_from_title.py diff --git a/_podcast/to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md b/_podcast/ai-for-ecology-biodiversity-and-conservation.md similarity index 65% rename from _podcast/to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md rename to _podcast/ai-for-ecology-biodiversity-and-conservation.md index f5aacd50..98a65fed 100644 --- a/_podcast/to-update/s18e03-ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/ai-for-ecology-biodiversity-and-conservation.md @@ -1,7 +1,6 @@ --- -title: "Context: The episode frames a biodiversity crisis made harder by fragmented, sparse data and limited monitoring capacity, then surveys AI tools (computer vision, remote sensing, platforms, citizen science), technical challenges, ethical concerns, and policy needs for conservation. - -Core narrative: AI's most important role in conservation is as an integrative, trustworthy infrastructure that turns heterogeneous, messy ecological data into continuous, scalable, and actionable knowledge—bridging camera traps, drones, satellites, citizen science, and field expertise through interoperable standards, robust models, edge deployment, and open platforms. Real impact requires coupling technical advances with ethics, community engagement, capacity building, sustainable funding, and multistakeholder governance so that AI-enabled monitoring directly informs equitable conservation decisions, enforcement, and long-term policy." +title: 'AI for Ecology, Biodiversity, and Conservation: Computer Vision, Remote Sensing + and Citizen Science' short: AI for Ecology, Biodiversity, and Conservation season: 18 episode: 3 @@ -16,12 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/ai-for-ecology-biodiversity-and-conservation-tanya/id1541710331?i=1000653709956 spotify: https://open.spotify.com/episode/3Hhz5N8ZDvsOPlPP3wxQxq?si=Oz7y_pBrTfeypfYZXubu-g youtube: https://www.youtube.com/watch?v=30tTrozbAkg - -description: 'Discover AI-driven wildlife conservation: computer vision, remote sensing & citizen science for scalable species ID, habitat maps, alerts and policy impact.' -intro: How can AI actually scale wildlife conservation in the face of accelerating biodiversity loss and persistent data gaps? In this episode, computational ecologist Tanya Berger-Wolf—director of TDAI@OSU, co‑founder of the Wildbook project, and director of technology at Wild Me—walks us through practical ways computer vision, remote sensing, and citizen science are transforming biodiversity monitoring.

We explore core AI techniques (machine learning, transfer learning, domain adaptation), image‑based monitoring with camera traps, drones and photo‑ID for individual tracking, and remote sensing for habitat mapping and change detection. Tanya addresses key data challenges—labeling, class imbalance, sparse observations—and the need for interoperable datasets, open standards and FAIR principles. We also cover model robustness, edge deployment in the field, ethics and Indigenous knowledge, scalable platforms like Wildbook, and how citizen science and crowdsourcing support quality control and long‑term monitoring.

Listeners will come away with a clearer understanding of tools and workflows for wildlife monitoring, practical barriers to scaling AI for conservation, policy and funding considerations, and resources to begin applying computer vision, remote sensing, and citizen science in their own conservation projects +description: Discover AI-driven computer vision and remote sensing strategies to scale + biodiversity monitoring, improve species ID, and inform conservation policy. +intro: How can AI help close critical data gaps in biodiversity monitoring and turn + images and sensor data into actionable conservation decisions? In this episode Tanya + Berger‑Wolf, a computational ecologist, director of TDAI@OSU, and co‑founder of + the Wildbook project (Wild Me), walks through practical applications of AI for ecology, + biodiversity monitoring, and conservation.

We cover core techniques—computer + vision, machine learning, and remote sensing—and their use in image‑based monitoring + with camera traps, drones, and species identification. Tanya explains individual + identification and longitudinal tracking, habitat mapping and change detection, + and the data challenges of labeling, class imbalance, and sparse observations. The + conversation addresses integration of heterogeneous datasets, model robustness (domain + shift and transfer learning), and ethical considerations including Indigenous knowledge + and equity. You’ll also hear about scalable platforms like Wildbook, citizen science + workflows for crowdsourcing and quality control, policy relevance, open data and + FAIR principles, edge deployment in the field, and building sustainable monitoring + programs.

Listen to gain concrete insights on tools, pitfalls, and next + steps for applying AI to conservation—what works now, what remains hard, and resources + to explore further. dateadded: 2024-04-28 - - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -119,9 +132,20 @@ quotableClips: startOffset: 3720 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3720 endOffset: 3720 +context: 'Context: The episode frames a biodiversity crisis made harder by fragmented, + sparse data and limited monitoring capacity, then surveys AI tools (computer vision, + remote sensing, platforms, citizen science), technical challenges, ethical concerns, + and policy needs for conservation. + Core narrative: AI''s most important role in conservation is as an integrative, + trustworthy infrastructure that turns heterogeneous, messy ecological data into + continuous, scalable, and actionable knowledge—bridging camera traps, drones, satellites, + citizen science, and field expertise through interoperable standards, robust models, + edge deployment, and open platforms. Real impact requires coupling technical advances + with ethics, community engagement, capacity building, sustainable funding, and multistakeholder + governance so that AI-enabled monitoring directly informs equitable conservation + decisions, enforcement, and long-term policy.' --- - Links: * [Biodiversity and Artificial Intelligence pdf](https://www.gpai.ai/projects/responsible-ai/environment/biodiversity-and-AI-opportunities-recommendations-for-action.pdf){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s20e01-trends-in-ai-infrastructure.md b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md similarity index 93% rename from _podcast/to-update/s20e01-trends-in-ai-infrastructure.md rename to _podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md index f3c47355..3f36f467 100644 --- a/_podcast/to-update/s20e01-trends-in-ai-infrastructure.md +++ b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md @@ -1,17 +1,6 @@ --- -title: "Context: A conversation with an AI-infrastructure practitioner about moving from developer tools to building DStack, exploring real-world trade-offs across hardware, software, deployment, and business models for practical AI adoption. - -Core theme (single unifying idea): Practical AI is an infrastructure-first problem — success depends less on chasing the biggest model and more on designing cost-effective, controllable, and efficient stacks (hardware, orchestration, and software) that fit hybrid cloud/on‑prem realities, leverage open-source ecosystems, and optimize distributed training and serving for real-world constraints. - -Dominant through-line: Every segment — from cost of ownership and cloud vs on‑prem trade‑offs to open vs proprietary models, decentralization, distributed training bottlenecks, orchestration gaps, and edge/federated use cases — returns to the same tension: how to deliver AI that is scalable, performant, and economically sustainable by choosing the right mix of tooling, deployment model, and optimizations. - -Key themes implied by the narrative: -- Cost and control drive architecture choices more than raw model capability. -- Hybrid cloud + on‑prem is the pragmatic reality; orchestration must adapt. -- Open-source ecosystems accelerate feedback, tooling, and business flexibility. -- Efficient distributed training and communication optimizations trump brute-force scaling. -- Decentralization (privacy, local control, edge) is often a matter of fit and trade-offs, not ideology. -- Practical provisioning, automation, and orchestration are the unsolved scaling problems for non–AI‑first organizations." +title: 'Post-ChatGPT AI Infrastructure: Open Source Orchestration, On-Prem Economics + & Distributed Training at Scale' short: Trends in AI Infrastructure season: 20 episode: 1 @@ -26,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/redefining-ai-infrastructure-open-source-chips-and/id1541710331?i=1000687565459 spotify: https://open.spotify.com/episode/5MIc1pAXPxVYSr0E4pndU4 youtube: https://www.youtube.com/watch?v=1aMuynlLM3o - -description: Discover DStack to cut AI infrastructure costs with on‑prem GPU training and MLOps alternatives—optimize distributed training, reduce orchestration overhead -intro: 'How can engineering teams cut AI infrastructure costs without sacrificing performance or control? In this episode, Andrey Cheptsov — founder and CEO of dstack and former JetBrains engineer — walks through the motivation behind DStack, an open‑source orchestration alternative designed to lower AI infrastructure total cost of ownership. We trace the cloud vs on‑prem economics (including MLOps limitations like SageMaker), the decision to build open‑source developer tooling, and the trade‑offs between open and proprietary models.

You’ll hear practical discussion of on‑prem GPU training and distributed training challenges: GPU requirements, PyTorch + NCCL communication bottlenecks, optimization strategies such as DeepSpeed, and tips for fine‑tuning and serving models for non–AI‑first companies. The episode also covers orchestration gaps — Kubernetes and SLURM limitations — plus bare‑metal provisioning, hybrid cloud realities, edge computing scope, and federated learning versus distributed compute.

If you’re evaluating MLOps alternatives, on‑prem GPU coordination, or ways to reduce AI infrastructure cost, this episode offers concrete perspectives on when to choose on‑prem vs cloud, how DStack fits into the stack, and practical trade‑offs for production ML workloads.' +description: 'Discover AI infrastructure strategies: open source orchestration, on-prem + economics and distributed training at scale to cut costs, boost performance and + control.' +intro: How has the rise of ChatGPT reshaped the infrastructure needed to build and + run large language models, and when does open source orchestration make sense compared + to cloud or proprietary systems? In this episode we speak with Andrey Cheptsov, + founder and CEO of dstack — an open-source alternative to Kubernetes and Slurm designed + to simplify AI infrastructure orchestration. Drawing on his decade-plus at JetBrains + building developer tools, Andrey frames practical trade-offs between on-prem economics + and cloud spend, the maturity of open source orchestration tools, and patterns for + distributed training at scale. We cover core topics including open source orchestration + for AI workloads, cost and operational considerations for on-prem deployments, and + strategies to scale distributed training efficiently and reliably. Listen to understand + when an open source approach like dstack is appropriate, what to evaluate in orchestration + tools, and how to balance performance, cost, and control as you scale AI projects + post-ChatGPT. This episode is for engineering leaders and ML infrastructure teams + seeking actionable insights on AI infrastructure, orchestration tools, on‑prem economics, + and distributed training best practices. dateadded: 2025-02-26 - duration: PT01H06M04S - quotableClips: - name: Episode Kickoff & Guest Introduction startOffset: 0 @@ -118,7 +120,6 @@ quotableClips: startOffset: 3938 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3938 endOffset: 3964 - transcript: - header: Episode Kickoff & Guest Introduction - line: This week, we'll talk about AI infrastructure and everything related to it. @@ -955,8 +956,30 @@ transcript: sec: 3964 time: '1:06:04' who: Andrey ---- +context: 'Context: A conversation with an AI-infrastructure practitioner about moving + from developer tools to building DStack, exploring real-world trade-offs across + hardware, software, deployment, and business models for practical AI adoption. + + Core theme (single unifying idea): Practical AI is an infrastructure-first problem + — success depends less on chasing the biggest model and more on designing cost-effective, + controllable, and efficient stacks (hardware, orchestration, and software) that + fit hybrid cloud/on‑prem realities, leverage open-source ecosystems, and optimize + distributed training and serving for real-world constraints. + Dominant through-line: Every segment — from cost of ownership and cloud vs on‑prem + trade‑offs to open vs proprietary models, decentralization, distributed training + bottlenecks, orchestration gaps, and edge/federated use cases — returns to the same + tension: how to deliver AI that is scalable, performant, and economically sustainable + by choosing the right mix of tooling, deployment model, and optimizations. + + Key themes implied by the narrative: - Cost and control drive architecture choices + more than raw model capability. - Hybrid cloud + on‑prem is the pragmatic reality; + orchestration must adapt. - Open-source ecosystems accelerate feedback, tooling, + and business flexibility. - Efficient distributed training and communication optimizations + trump brute-force scaling. - Decentralization (privacy, local control, edge) is + often a matter of fit and trade-offs, not ideology. - Practical provisioning, automation, + and orchestration are the unsolved scaling problems for non–AI‑first organizations.' +--- Links: * [Twitter](https://twitter.com/andrey_cheptsov/){:target="_blank"} diff --git a/_podcast/to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md b/_podcast/algorithmic-trading-with-python-and-machine-learning.md similarity index 95% rename from _podcast/to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md rename to _podcast/algorithmic-trading-with-python-and-machine-learning.md index 5ad7ffda..717bd1ae 100644 --- a/_podcast/to-update/s17e03-stock-market-analysis-with-python-and-machine-learning.md +++ b/_podcast/algorithmic-trading-with-python-and-machine-learning.md @@ -1,7 +1,5 @@ --- -title: "Context: This episode follows Ivan Brigida’s path from finance to analytics and walks listeners step‑by‑step through the practical craft of retail algorithmic investing — covering data sources and quality, time‑series market formats, strategy ideas (like mean reversion), rigorous backtesting and walk‑forward validation, risk management and execution, feature engineering and model choice, explainability, deployment, and learning resources. - -Core: The unifying idea is that successful retail algorithmic trading is built like an engineering pipeline — start with clean, well‑understood data; define precise prediction targets; design simple, interpretable models and handcrafted features; validate performance with rigorous, leakage‑free backtests and walk‑forward simulations; embed strict risk controls and disciplined execution; and iterate toward partial automation and reproducible deployment while treating the whole process as a continuous learning project rather than a shortcut to quick profits." +title: 'Algorithmic Trading with Python: Backtesting, Risk Management and Deployment' short: Stock Market Analysis with Python and Machine Learning season: 17 episode: 3 @@ -16,13 +14,26 @@ links: apple: https://podcasts.apple.com/us/podcast/stock-market-analysis-with-python-and-machine/id1541710331?i=1000641465239 spotify: https://open.spotify.com/episode/1ZXAeGr4Kx7F6oLQUip8Cc?si=KJwpYL-3SvuX8nPdc2cyOg youtube: https://www.youtube.com/watch?v=NThHAEIazFk - -description: 'Discover algorithmic trading & mean reversion: practical backtesting, data APIs, risk management, model choices and trade execution to boost strategy ROI.' -intro: 'How do you build, backtest, and deploy a robust mean-reversion algorithm without falling prey to bad data or time‑series leakage? In this episode, Ivan Brigida — Analytics Lead and creator of PythonInvest — draws on 10+ years in business intelligence, econometrics, forecasting, machine learning and finance to answer that question.

We walk through practical steps for algorithmic trading: choosing retail-friendly data APIs (Yahoo, Quandl, Polygon), understanding market data formats like OHLCV and adjusted close, and cleaning for data quality. Ivan explains mean reversion strategy design, risk management fundamentals including stop‑loss and position sizing, and rigorous backtesting methods—covering time‑series leakage and walk‑forward simulation. He also breaks down prediction targets, feature engineering with time‑window statistics, and model choices from logistic regression to XGBoost and neural networks, plus approaches to explainability and evaluation metrics (ROI, precision, trading fees). Finally, deployment options (cron, Airflow, APIs) and learning resources from PythonInvest are discussed.

Listen to gain actionable guidance on backtesting, data sources, risk controls, and machine learning techniques to move a mean‑reversion idea toward a reproducible algorithmic trading workflow.' +description: 'Master algorithmic trading: backtesting and risk management—learn practical + data sources, features, models & execution to build robust strategies.' +intro: How do you turn a trading idea into a robust, risk‑managed algorithm in Python? + In this episode Ivan Brigida — analytics lead behind PythonInvest with 10+ years + in statistical modeling, forecasting, econometrics and finance — walks through practical + steps for algorithmic trading with Python, from data sourcing to deployment (and + a clear reminder this is educational, not investment advice).

We cover + where retail traders get market data (Yahoo, Quandl, Polygon), OHLCV and adjusted‑close + nuances, and a concrete mean‑reversion example. Ivan explains backtesting methodology, + common pitfalls like time‑series data leakage, and walk‑forward simulation for realistic + validation. He breaks down risk management (stop‑loss thresholds, position sizing), + execution and trading fees, plus evaluation metrics (ROI, precision) and defining + prediction targets (binary growth thresholds such as 5%).

On the modeling + side you’ll hear practical feature engineering (time‑window stats, handcrafted indicators), + model choices (logistic regression, XGBoost, neural nets), explainability via feature + importance, and deployment options (cron, Airflow, APIs, partial automation). Listen + to gain actionable guidance for building, validating, and deploying algorithmic + trading systems in Python. dateadded: 2024-01-24 - duration: PT01H40S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -132,7 +143,6 @@ quotableClips: startOffset: 3696 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3696 endOffset: 3640 - transcript: - header: Podcast Introduction - header: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' @@ -1134,8 +1144,21 @@ transcript: sec: 3735 time: '1:02:15' who: Ivan ---- +context: 'Context: This episode follows Ivan Brigida’s path from finance to analytics + and walks listeners step‑by‑step through the practical craft of retail algorithmic + investing — covering data sources and quality, time‑series market formats, strategy + ideas (like mean reversion), rigorous backtesting and walk‑forward validation, risk + management and execution, feature engineering and model choice, explainability, + deployment, and learning resources. + Core: The unifying idea is that successful retail algorithmic trading is built like + an engineering pipeline — start with clean, well‑understood data; define precise + prediction targets; design simple, interpretable models and handcrafted features; + validate performance with rigorous, leakage‑free backtests and walk‑forward simulations; + embed strict risk controls and disciplined execution; and iterate toward partial + automation and reproducible deployment while treating the whole process as a continuous + learning project rather than a shortcut to quick profits.' +--- Links: * [Exploring Finance APIs](https://pythoninvest.com/long-read/exploring-finance-apis){:target="_blank"} diff --git a/_podcast/to-update/s20e07-build-strong-career-in-data.md b/_podcast/applied-llm-research-and-career-growth-in-practice.md similarity index 96% rename from _podcast/to-update/s20e07-build-strong-career-in-data.md rename to _podcast/applied-llm-research-and-career-growth-in-practice.md index 91a7fcff..b91622a0 100644 --- a/_podcast/to-update/s20e07-build-strong-career-in-data.md +++ b/_podcast/applied-llm-research-and-career-growth-in-practice.md @@ -1,5 +1,6 @@ --- -title: "A practical, curiosity-driven bridge between research and engineering: relentlessly iterate with hands‑on prototyping, rigorous evaluation, and open dissemination to solve real-world ML problems (ex: long‑context LLMs), while leveraging community, mentorship, and strategic projects to accelerate career growth and drive measurable impact." +title: 'Applied LLM Research & Career Growth: Long-Context Evaluation, Prototyping + & Industry Publishing' short: Build a Strong Career in Data season: 20 episode: 7 @@ -14,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/build-a-strong-career-in-data-lavanya-gupta/id1541710331?i=1000706988972 spotify: https://open.spotify.com/episode/2mJXd0lSZFPKJA0ZrG9iS2 youtube: https://www.youtube.com/watch?v=ekG5zJioyFs - -description: Discover long-context LLMs, chunking and retrieval for finance benchmarking—learn 32k–64k context limits, summarization tips, prototyping & career advice -intro: How do you evaluate and deploy long-context LLMs for real-world financial documents—when context windows stretch into tens of thousands of tokens? In this episode, Lavanya Gupta, a CMU LTI alum and Sr. AI/ML Applied Scientist at JPMorgan Chase’s MLCOE, walks through practical benchmarking and production strategies for long-context LLMs in finance. Drawing on her published work "Long Context LLMs on Financial Concepts" (EMNLP) and 5+ years of industrial research, Lavanya explains empirical findings around context-window performance (a notable droparound 32k–64k), and outlines the pragmatic trio of chunking, retrieval, and summarization for processing large documents. She also discusses industry research practices—publishing from corporate teams, dissemination via arXiv and endorsements—and rapid prototyping techniques like Streamlit for demos and feedback. Listeners will get concrete guidance on LLM benchmarking, context window trade-offs, dataset and licensing lessons from a Kaggle success, and actionable career advice on transitioning into ML roles, networking, portfolios, and interview prep. Tune in to learn how to benchmark long-context LLMs for financial NLP and translate research into production-ready workflows +description: Learn LLM research tactics, long-context evaluation approaches and prototyping + tips to boost your career, publish industry work, and ship impactful models. +intro: How do you evaluate and prototype long-context LLMs in a real-world setting + while advancing a career as an applied researcher? In this episode Lavanya Gupta + — a Carnegie Mellon Language Technologies Institute alum and Sr. AI/ML Applied Scientist + at JPMorgan Chase’s Machine Learning Center of Excellence — walks through practical + strategies for applied LLM research and career growth. With 5+ years of industrial + research experience, public talks at WiDS, PyData, TensorFlow User Group and reviewer + roles for NeurIPS 2024, ICLR 2025 and NAACL 2025, Lavanya connects technical practice + with professional development.

We cover core topics including long-context + evaluation methodologies for transformer models, rapid prototyping workflows for + LLM systems, and best practices for industry publishing and technical communication. + Listeners will get actionable guidance on setting up reproducible experiments, balancing + research rigor with product timelines, and positioning industry work for peer-reviewed + venues. This episode is for machine learning engineers, NLP researchers, and applied + scientists seeking concrete tactics for prototyping LLMs, conducting robust long-context + evaluations, and growing a research-oriented career in industry. dateadded: 2025-05-12 - duration: PT00H58M10S - quotableClips: - name: Episode Introduction & Topic Overview startOffset: 0 @@ -114,7 +127,6 @@ quotableClips: startOffset: 3466 url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3466 endOffset: 3490 - transcript: - header: Episode Introduction & Topic Overview - line: This week we'll talk about building a strong career in data and we have a @@ -1155,8 +1167,12 @@ transcript: sec: 3490 time: '58:10' who: Alexey +context: 'A practical, curiosity-driven bridge between research and engineering: relentlessly + iterate with hands‑on prototyping, rigorous evaluation, and open dissemination to + solve real-world ML problems (ex: long‑context LLMs), while leveraging community, + mentorship, and strategic projects to accelerate career growth and drive measurable + impact.' --- - Links: * [Linkedin](https://www.linkedin.com/in/lgupta18/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md b/_podcast/bayesian-modeling-workflows-and-tools.md similarity index 96% rename from _podcast/to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md rename to _podcast/bayesian-modeling-workflows-and-tools.md index 91a36198..1e51cc07 100644 --- a/_podcast/to-update/s17e04-bayesian-modeling-and-probabilistic-programming.md +++ b/_podcast/bayesian-modeling-workflows-and-tools.md @@ -1,7 +1,5 @@ --- -title: "Context: This episode centers on Rob Zinkov and the Hakaru probabilistic programming project, tracing his career shift into Bayesian machine learning, contrasting tools (Hakaru, PyMC, Stan), and practical techniques (priors, likelihoods, sampling, MCMC/HMC/NUTS) alongside the skills and learning resources needed to apply them. - -Central narrative: Probabilistic programming and the Bayesian workflow offer a practical, composable way to bring honest uncertainty quantification into real-world problems by turning statistical models into executable programs—models you can build incrementally, check, and refine. The core unifying idea is that encoding assumptions as programs makes intractable integrals manageable through numerical approximation (sampling and MCMC), lets you compose and reuse model parts, and shifts modeling toward an iterative, testable practice; doing this effectively requires foundational math and a mindset of principled model-building rather than chasing point estimates." +title: 'Bayesian Modeling: PyMC, Stan and Probabilistic Programming Workflows' short: Bayesian Modeling and Probabilistic Programming season: 17 episode: 4 @@ -16,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/bayesian-modeling-and-probabilistic-programming-rob/id1541710331?i=1000642253191 spotify: https://open.spotify.com/episode/5WUKDcTYv8ZvnqeHSQT7FF?si=K10siPBHQwmegCCXJ1VpIA youtube: https://www.youtube.com/watch?v=kcKvUSInm-M - -description: Master Bayesian modeling, MCMC/HMC/NUTS and probabilistic programming with Hakaru & PyMC—learn sampling, priors, posteriors and practical model building -intro: 'How do you move from point estimates to full Bayesian models and pick the right sampler for real problems? In this episode, Rob Zinkov — machine learning engineer, data scientist, and former lead developer of the Hakaru probabilistic programming language — walks through mastering Bayesian modeling and probabilistic programming, focusing on practical tools like MCMC, HMC/NUTS, sampling, Hakaru, and PyMC.

We cover the core Bayesian workflow: priors, likelihoods, and posterior distributions; why integrals become intractable and how numerical integration via sampling approximates expectations; and the fundamentals of Markov chain Monte Carlo for exploring high‑probability regions. Rob contrasts frequentist point estimates with Bayesian distributions, explains composability and incremental model building in probabilistic languages, and discusses language vs library design and Hakaru’s role in generating samplers. You’ll hear a concrete PyMC rainfall model example, strategies for interpreting posteriors, encoding spatial and hierarchical dependencies, and handling multimodality and uncertainty. The episode closes with practical learning resources (PyMC book, Statistical Rethinking) to support your self‑study. Tune in to get actionable guidance on building, sampling, and refining Bayesian models.' +description: 'Discover Bayesian modeling with PyMC and Stan: learn priors, MCMC/HMC + sampling, probabilistic programming workflows to build, debug and refine robust + models.' +intro: How do you move from point estimates to full uncertainty-aware models and choose + the right tools and workflows for Bayesian modeling? In this episode Rob Zinkov, + a machine learning engineer and former Indiana University research scientist who + led development of the Hakaru probabilistic programming language, walks through + practical Bayesian workflows and tool choices. We cover the core challenge of encoding + priors, likelihoods, and posteriors; why integrals become intractable and how numerical + integration and sampling (MCMC, Hamiltonian Monte Carlo, NUTS) approximate expectations; + and the trade-offs between probabilistic languages and libraries. Rob explains career + lessons on moving from software engineering to ML research, the essential math (calculus, + linear algebra, optimization), and self-study strategies for statistics. Concrete + topics include PyMC examples (a rainfall model and computational graph), Stan’s + advances in efficient sampling, composing hierarchical and spatial models, diagnosing + multimodality and uncertainty, and automating model tasks with probabilistic programming + (Hakaru). Listen to gain a clearer, practical understanding of Bayesian modeling, + when to use PyMC vs Stan, how samplers work, and recommended resources to build + your workflow. dateadded: '2024-01-22' - duration: PT01H05M05S - quotableClips: - name: Episode Introduction & Topic Overview startOffset: 0 @@ -124,7 +136,6 @@ quotableClips: startOffset: 3991 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3991 endOffset: 3905 - transcript: - header: Episode Introduction & Topic Overview - header: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming @@ -1295,8 +1306,20 @@ transcript: sec: 4009 time: '1:06:49' who: Alexey ---- +context: 'Context: This episode centers on Rob Zinkov and the Hakaru probabilistic + programming project, tracing his career shift into Bayesian machine learning, contrasting + tools (Hakaru, PyMC, Stan), and practical techniques (priors, likelihoods, sampling, + MCMC/HMC/NUTS) alongside the skills and learning resources needed to apply them. + Central narrative: Probabilistic programming and the Bayesian workflow offer a practical, + composable way to bring honest uncertainty quantification into real-world problems + by turning statistical models into executable programs—models you can build incrementally, + check, and refine. The core unifying idea is that encoding assumptions as programs + makes intractable integrals manageable through numerical approximation (sampling + and MCMC), lets you compose and reuse model parts, and shifts modeling toward an + iterative, testable practice; doing this effectively requires foundational math + and a mindset of principled model-building rather than chasing point estimates.' +--- Links: * [Book 1](https://bayesiancomputationbook.com/welcome.html){:target="_blank"} diff --git a/_podcast/to-update/s16e09-become-data-freelancer.md b/_podcast/becoming-data-freelancer.md similarity index 97% rename from _podcast/to-update/s16e09-become-data-freelancer.md rename to _podcast/becoming-data-freelancer.md index 923edf45..2effdfb3 100644 --- a/_podcast/to-update/s16e09-become-data-freelancer.md +++ b/_podcast/becoming-data-freelancer.md @@ -1,5 +1,5 @@ --- -title: "This episode centers on one clear idea: transitioning from employee to sustainable data freelancer is not a leap of faith but a deliberate, staged business transformation—one that combines technical credibility with market research, proactive outreach, sound pricing and contract choices, client vetting, and financial/legal safeguards so you can manage risk, build repeatable pipelines, and turn independence into a reliable, purpose-driven career." +title: 'Becoming a Data Freelancer: Pricing, Client Acquisition and Contract Strategy' short: Become a Data Freelancer season: 16 episode: 9 @@ -14,14 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/become-a-data-freelancer-dimitri-visnadi/id1541710331?i=1000637962993 spotify: https://open.spotify.com/episode/5OJfRiQ64JtLUmIkvadohg?si=uUEdvZwARN2hVGEfz73URg youtube: https://www.youtube.com/watch?v=R_EnSa9aZtE - -description: 'Launch your data freelancer career: pricing, outreach & contracts tactics, client vetting, legal risk and runway tips to win steady projects.' -intro: 'How do you move from corporate data roles into a sustainable freelance data career while setting rates, winning clients, and managing legal risk? In this episode Dimitri Visnadi — an independent data consultant who has advised brands like Unilever, Ferrero, Heineken and Red Bull, worked in HP’s data teams and a Google‑partner consulting firm, and holds an MSc in Business Analytics from UCL — walks through the practical steps he used to launch The Data Freelancer.

We cover the full arc of transition: career pivot and early outreach, market research and recruiter channels, pricing strategy across platforms vs direct clients, subcontracting and cutting out middlemen, and the contract risks around dependent contractor status. Dimitri also breaks down client vetting, handling corporate payment delays, recommended runway before quitting, and common pitfalls like mispositioning and mispricing. Listeners will leave with concrete tactics for freelance data consulting — outreach scripts, benchmarking approaches for rates, contract checkpoints, and resources (courses, mentors, newsletters) to reduce risk and build a reliable pipeline. Ideal for aspiring data freelancers seeking practical guidance on pricing, outreach, contracts and risk.' +description: Master data freelancer pricing, client acquisition and contract strategy—learn + rate benchmarking, outreach tactics, client vetting and runway planning. +intro: 'How do you move from corporate analytics to independent data consulting while + pricing services fairly, finding steady clients, and avoiding contract pitfalls? + In this episode Dimitri Visnadi — an independent data consultant who has advised + brands like Unilever, Ferrero, Heineken and Red Bull and who trained at UCL and + HP — walks through the practical realities of becoming a data freelancer.

+ We cover the full transition: career pivoting from marketing to data, early startup + and corporate analytics experience, and the decision to resign and pursue freelancing. + Dimitri breaks down client acquisition tactics (cold outreach, recruiter channels, + proactive self-marketing), pricing strategy (platforms vs direct contracting, rate + benchmarking, project pricing), and contract strategy (dependent contractor risk, + platform terms vs direct agreements, subcontracting). He also addresses vetting + clients, payment delays, financial runway recommendations, common pitfalls like + mispositioning and mispricing, and learning resources including the Data Freelancer + newsletter.

Listen to get actionable guidance on pricing, client acquisition, + contract formats, and the business practices that help sustain a freelance data + consultancy.' dateadded: 2023-12-09 date: 2025-11-07 - duration: PT00H59M49S - quotableClips: - name: Podcast Introduction startOffset: 91 @@ -131,7 +144,6 @@ quotableClips: startOffset: 3667 url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3667 endOffset: 3589 - transcript: - header: Podcast Introduction - line: This week, we'll talk about doing data freelancing. We have a very special @@ -1295,6 +1307,12 @@ transcript: sec: 3680 time: '1:01:20' who: Dimitri +context: 'This episode centers on one clear idea: transitioning from employee to sustainable + data freelancer is not a leap of faith but a deliberate, staged business transformation—one + that combines technical credibility with market research, proactive outreach, sound + pricing and contract choices, client vetting, and financial/legal safeguards so + you can manage risk, build repeatable pipelines, and turn independence into a reliable, + purpose-driven career.' --- Links: diff --git a/_podcast/to-update/s22e03-from-biotechnology-to-bioinformatics-software.md b/_podcast/bioinformatics-worflows-tools-and-data-science.md similarity index 94% rename from _podcast/to-update/s22e03-from-biotechnology-to-bioinformatics-software.md rename to _podcast/bioinformatics-worflows-tools-and-data-science.md index 77fe7287..2cc66e73 100644 --- a/_podcast/to-update/s22e03-from-biotechnology-to-bioinformatics-software.md +++ b/_podcast/bioinformatics-worflows-tools-and-data-science.md @@ -1,6 +1,8 @@ --- -title: "At its core this episode is about how building open, reproducible computational infrastructure and workflows lets us translate messy biological data into scalable, actionable insight—bridging wet lab and dry lab work so researchers can ask better questions, run fewer experiments, and move faster. From genomics and metagenomics pipelines to network inference, molecular simulation, knowledge graphs, visualization, and AI assistants, the through‑line is empowering scientists with accessible tools, automation, and community-driven software that make complex biology interpretable, shareable, and useful in the real world." -short: From Biotechnology to Bioinformatics Software +title: 'Bioinformatics Workflows in Practice: Sequencing, Metagenomics, and Open-Source + Tools' +short: Applying Data Science Concepts, Tools, and Workflows to Accelerate Biological + Research season: 22 episode: 3 guests: @@ -14,13 +16,27 @@ links: apple: https://podcasts.apple.com/us/podcast/from-biotechnology-to-bioinformatics-software-sebastian/id1541710331?i=1000733347636 spotify: https://open.spotify.com/episode/3CohNIXZdooLYoIyIbr6EF youtube: https://www.youtube.com/watch?v=ZFrcrTtnB1Q - -description: Discover wastewater metagenomics knowledge graphs & AlphaFold-driven network inference using open-source bioinformatics tools to map microbes and cut lab tests -intro: How can wastewater metagenomics and knowledge graphs reveal microbial interactions while reducing wet‑lab experiments? In this episode, Sebastian Ayala Ruano — a bioinformatics software developer and Master’s student in Systems Biology — walks through his wastewater microbiome knowledge graph thesis and open‑source tooling for multi‑omics analysis.

We cover metagenomics workflows from sequencing and abundance tables to building microbial networks with co‑abundance and association inference (CC Lasso, correlations, thresholding), plus network inference best practices. Sebastian also explains molecular simulations, protein–ligand dynamics and the practical impact of AlphaFold on structure prediction. Hear about MCW2 Graph, VueGen and VueCore, knowledge graph exploration with Neo4j and Streamlit, report automation (Quarto exports), and the bioinformatics package ecosystem (Bioconda, Bioconductor).

Listeners will gain actionable approaches for integrating wastewater metagenomics, network science, and knowledge graphs, practical open‑source tools to automate analysis and visualization, and guidance on project portfolios, language tradeoffs (R vs Python), and applying AI/LLMs in bioinformatics workflows. Ideal for researchers and engineers wanting to turn sequencing data into reproducible network models and automated reports +description: Master bioinformatics workflows for sequencing & metagenomics with open-source + tools, streamline pipelines, boost reproducibility, and speed analyses. +intro: How do you build reproducible, scalable bioinformatics workflows for sequencing + and metagenomics using open-source tools? In this episode we explore practical answers + with Sebastian Ayala Ruano, a bioinformatics software developer and Master's student + in Systems Biology at Maastricht University. Sebastian has contributed to open-source + projects such as MicW2Graph, VueGen, and VueCore to simplify multi-omics data analysis + and has a background in cheminformatics, peptide discovery, and network-based analysis. +

We discuss real-world sequencing and metagenomics workflows, trade-offs + in pipeline design, and how open-source tools and educational software can accelerate + reproducible research. Sebastian also outlines how machine learning and network + science concepts inform analysis strategies for complex biological data. Key topics + include sequencing data processing, metagenomic analysis approaches, workflow automation, + and practical considerations for integrating multi-omics datasets.

Listeners + will gain concrete guidance for designing bioinformatics pipelines, selecting open-source + tools, and applying network- and ML-driven methods to improve interpretation. This + episode is useful for researchers and developers wanting actionable perspectives + on sequencing, metagenomics, and building reliable workflows backed by community + tools and resources. dateadded: 2025-10-27 - duration: PT00H55M13S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -118,7 +134,6 @@ quotableClips: startOffset: 3250 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3250 endOffset: 3313 - transcript: - header: Podcast Introduction - line: Hi everyone, welcome to our event. This event is brought to you by Data Talks @@ -1066,8 +1081,15 @@ transcript: sec: 3313 time: '55:13' who: Sebastian +context: At its core this episode is about how building open, reproducible computational + infrastructure and workflows lets us translate messy biological data into scalable, + actionable insight—bridging wet lab and dry lab work so researchers can ask better + questions, run fewer experiments, and move faster. From genomics and metagenomics + pipelines to network inference, molecular simulation, knowledge graphs, visualization, + and AI assistants, the through‑line is empowering scientists with accessible tools, + automation, and community-driven software that make complex biology interpretable, + shareable, and useful in the real world. --- - Links: * [LinkedIn](https://www.linkedin.com/in/sayalaruano/){:target="_blank"} diff --git a/_podcast/to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md similarity index 92% rename from _podcast/to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md rename to _podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md index a2774148..eb00a1e1 100644 --- a/_podcast/to-update/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md +++ b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md @@ -1,21 +1,6 @@ --- -title: "Context: -The episode traces a practitioner’s journey from early ML and language work into building agentic systems for real-world SRE and productivity problems—covering definitions, architectures, planning, retrieval, tooling, frameworks, testing, and evaluation. - -Core (single unifying idea): -Pragmatic agent engineering: turning LLMs into reliable, task‑oriented autonomous systems by engineering around their capabilities and limits—designing objectives, orchestration, context/retrieval, tooling integrations, planning strategies, and rigorous evaluation so agents can safely, efficiently, and predictably perform real operational and enterprise tasks. - -Why this unifies the episode: -- Defines what an “agent” means in practice (autonomy + objectives + LLMs) and why design choices matter. -- Shows orchestration needs (tools, memory, knowledge stores) to ground LLM reasoning in real data and actions. -- Contrasts planning styles (single‑step, multi‑pass, self‑reflection) and implementation tradeoffs (prompts vs SDKs, code vs natural‑language agents) as engineering choices, not academic ones. -- Treats retrieval/RAG as an engineering component with latency/cost/GIGO constraints and explores agentic RAG when RAG alone falls short. -- Emphasizes integration abstractions and framework tradeoffs for production deployment (from bespoke stacks to marketplaces and SDKs). -- Centers testing and evaluation—mocking tools, regression tests, goal‑based benchmarks—to ensure outcomes over narrative plausibility. -- Highlights specialization and domain constraints: generic agents struggle; practical value comes from adapting agents to workflows, data, and operational requirements. - -Bottom line: -The episode’s through‑line is that successful agent projects are not just about large models: they are systems engineering problems requiring explicit choices about autonomy, grounding, tooling, planning, and measurement to deliver dependable, useful automation." +title: 'Building Agentic AI Systems: Pragmatic Agent Engineering, Tooling, Retrieval + & Evaluation' short: Building reliable AI products in the era of Gen AI and Agents season: 22 episode: 1 @@ -30,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/building-reliable-ai-products-in-the-era-of-gen/id1541710331?i=1000731199709 spotify: https://open.spotify.com/episode/7c22vqYNuNLKKYEfYGOos8?si=NBFT2e80S6WErW_tDDrijA youtube: https://www.youtube.com/watch?v=x2AAjqz2XmM - -description: Build autonomous LLM agents with RAG, orchestration & context engineering - master SRE automation, testing, evaluation metrics and latency/cost tradeoffs -intro: 'How do you build and evaluate truly autonomous LLM agents that balance retrieval, orchestration, and real-world SRE needs? In this episode, Ranjitha Gurunath Kulkarni — Staff ML Engineer at NeuBird.ai with earlier LLM and assistant work at Dropbox and Microsoft and an LTI master’s from Carnegie Mellon — walks through practical engineering trade-offs for autonomous LLM agents and retrieval-augmented generation (RAG).

We cover a clear agent definition (autonomy, objectives, LLMs), agent orchestration tools and memory/knowledge stores, planning strategies from single-step to self-reflection, and implementation choices: prompts, SDKs, tool wrappers, and the code‑vs‑natural‑language agent trade-offs. Ranjitha digs into context engineering techniques (chunking, metadata, wrappers), RAG realities (latency, cost, GIGO), and when retrieval alone suffices versus when full agents are needed. She also maps SRE workflows to agents (logs, metrics, remediation), integration abstractions, framework trade-offs (LangChain, OpenAI Agents SDK, Small Agents), and evaluation strategy: custom datasets, mocking tools, regression tests, and goal‑based outcome assertions.

Listen to learn practical guidance for building, testing, and deploying autonomous LLM agents, and which architectures and evaluation approaches work best for production systems.' +description: 'Discover agentic AI tactics: practical agent engineering and retrieval + strategies to build robust autonomous systems, boost performance and ensure reliability.' +intro: 'How do you build reliable, agentic AI systems that balance practical engineering, + tooling, retrieval, and robust evaluation? In this episode Ranjitha Kulkarni, Staff + Machine Learning Engineer at NeuBird.ai and former engineer on LLM- and agent-powered + product features at Dropbox Dash and Microsoft, explores pragmatic approaches to + agent design. Drawing on her work in speech recognition, language modeling, assistant + evaluation, and publications on voice query reformulation and automatic online evaluation, + Ranjitha discusses key elements of agent engineering: selecting and integrating + tools, designing effective retrieval pipelines, and establishing meaningful evaluation + metrics for intelligent assistants.

Listeners will get a grounded look + at the trade-offs of agentic AI in real products, how retrieval strategies impact + reasoning and performance, and practical evaluation frameworks to measure assistant + behavior. If you’re building LLM-powered agents, improving tool use, or defining + evaluation for agentic systems, this episode offers actionable perspectives rooted + in production experience and research. Keywords: agentic AI, agent engineering, + agent tooling, retrieval, agent evaluation, LLM-powered products.' dateadded: 2025-10-21 - duration: PT00H59M23S - quotableClips: - name: Event Introduction & Community Links startOffset: 0 @@ -154,7 +151,6 @@ quotableClips: startOffset: 3546 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3546 endOffset: 3563 - transcript: - header: Event Introduction & Community Links - line: Hi everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -991,8 +987,35 @@ transcript: sec: 3563 time: '59:23' who: Alexey ---- +context: 'Context: The episode traces a practitioner’s journey from early ML and language + work into building agentic systems for real-world SRE and productivity problems—covering + definitions, architectures, planning, retrieval, tooling, frameworks, testing, and + evaluation. + + Core (single unifying idea): Pragmatic agent engineering: turning LLMs into reliable, + task‑oriented autonomous systems by engineering around their capabilities and limits—designing + objectives, orchestration, context/retrieval, tooling integrations, planning strategies, + and rigorous evaluation so agents can safely, efficiently, and predictably perform + real operational and enterprise tasks. + Why this unifies the episode: - Defines what an “agent” means in practice (autonomy + + objectives + LLMs) and why design choices matter. - Shows orchestration needs + (tools, memory, knowledge stores) to ground LLM reasoning in real data and actions. + - Contrasts planning styles (single‑step, multi‑pass, self‑reflection) and implementation + tradeoffs (prompts vs SDKs, code vs natural‑language agents) as engineering choices, + not academic ones. - Treats retrieval/RAG as an engineering component with latency/cost/GIGO + constraints and explores agentic RAG when RAG alone falls short. - Emphasizes integration + abstractions and framework tradeoffs for production deployment (from bespoke stacks + to marketplaces and SDKs). - Centers testing and evaluation—mocking tools, regression + tests, goal‑based benchmarks—to ensure outcomes over narrative plausibility. - Highlights + specialization and domain constraints: generic agents struggle; practical value + comes from adapting agents to workflows, data, and operational requirements. + + Bottom line: The episode’s through‑line is that successful agent projects are not + just about large models: they are systems engineering problems requiring explicit + choices about autonomy, grounding, tooling, planning, and measurement to deliver + dependable, useful automation.' +--- Links: * [Linkedin](https://www.linkedin.com/in/ranjitha-gurunath-kulkarni){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s16e08-ai-for-digital-health.md b/_podcast/building-ai-digital-health-startups.md similarity index 95% rename from _podcast/to-update/s16e08-ai-for-digital-health.md rename to _podcast/building-ai-digital-health-startups.md index 61d3700e..718d93a9 100644 --- a/_podcast/to-update/s16e08-ai-for-digital-health.md +++ b/_podcast/building-ai-digital-health-startups.md @@ -1,7 +1,5 @@ --- -title: "Context: The episode traces a founder’s shift from engineering to healthcare entrepreneurship, driven by the opportunity to digitize fragmented medical systems. It covers pragmatic founder tactics (immersion, rapid MVPs, cold outreach), an unusual AR MVP to collect engagement data, and a discovery that everyday lifestyle interactions reveal skin‑health signals. The conversation ties product experimentation and iterative pivots to building a digital clinic flow (diagnosis → prescription → telemedicine), while confronting legacy infrastructure, rural access gaps, ethical UX, and regional go‑to‑market limits. Growth topics — community‑first data strategies, personalization, retention, hiring, fundraising, and monetization via SaaS/partnerships — are framed alongside human considerations like leadership choices and work‑life integration. - -Core theme: Building an ethical, product‑first digital healthcare startup by using rapid experimentation and community‑driven engagement to bootstrap meaningful clinical data and align AI capabilities with real patient workflows and viable business models—solving legacy access and workflow problems regionally, iterating from MVP to product‑market fit, and scaling sustainably while keeping human needs and ethics central." +title: 'Building Digital Health Startups: MVP Strategy, AI Diagnosis and Telemedicine' short: AI for Digital Health season: 16 episode: 8 @@ -16,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/ai-for-digital-health-maria-bruckert/id1541710331?i=1000637212773 spotify: https://open.spotify.com/episode/2NE0vbiYwXxOuqychHIqBR?si=QdRyuJvSRE2V3bLwHaEv-Q youtube: https://www.youtube.com/watch?v=whpkDmVVGUE - -description: Discover how to build a digital clinic with AI skin health and telemedicine—learn go-to-market, data strategy, monetization, and hiring wins -intro: How do you build and scale a digital clinic that blends AI-driven skin health, telemedicine, and an AR MVP? In this episode, Maria-Liisa Bruckert, Co‑Founder and Co‑CEO of SQIN and recipient of the Google Female Founder Immersion 2020 and Google Play Best of 2020, walks through the practical steps she took to turn an engineering mindset into a digital health business.

We cover industry immersion and MVP development, why healthcare digitization matters, and real operational challenges like data gaps, rural access, and legacy workflows. Maria explains the AR lipstick try-on as a data collection and engagement tactic, how to surface skin health signals from everyday interactions, and aligning AI capabilities with clear business cases. You’ll also hear about building a digital clinic flow from diagnosis to prescription, telemedicine’s role in remote follow-up and efficiency, ethics and inclusive UX, regional go-to-market tactics, data strategy for bootstrapping datasets, and early hiring, fundraising, and monetization approaches.

Listeners interested in digital clinic design, AI skin health, telemedicine implementation, or launching an AR MVP will find actionable tactics and lessons to apply to product-market fit, data strategy, and go-to-market execution +description: Discover actionable digital health MVP strategy, telemedicine tactics + to build, validate and monetize a scalable healthcare startup with faster remote + care. +intro: How do you build a digital health startup that ships a focused MVP, uses AI + for diagnosis, and delivers care via telemedicine while overcoming data gaps and + legacy workflows? In this episode Maria-Liisa Bruckert, Co‑Founder and Co‑CEO of + SQIN and recipient of the Google Play Best of 2020 award and Google Female Founder + Immersion 2020, walks through her transition from electrical engineering to health + tech and the practical playbook she uses to de‑risk product development.

+ We cover MVP strategy and market research tactics—cold outreach, accelerators, clinical + meetings—and unconventional experiments like an AR “lipstick try‑on” to collect + engagement data. Maria explains how SQIN aligns AI diagnosis with concrete business + cases, builds a digital clinic flow from diagnosis to prescription, and uses telemedicine + for remote follow‑up and prescriptions. You’ll also hear about data strategy and + community bootstrapping, ethics and UX for sensitive AI messaging, go‑to‑market + choices for regional rollout, and monetization through SaaS integrations and partnerships. +

Listen for actionable insights on product‑market fit, hiring priorities + for AI and full‑stack roles, and practical steps to launch a digital health startup + that balances technical credibility with patient access. dateadded: 2023-12-03 - duration: PT00H52M27S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -136,7 +148,6 @@ quotableClips: startOffset: 3138 url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3138 endOffset: 3147 - transcript: - header: Podcast Introduction - line: This week, we will talk about AI for digital healthcare. We have a special @@ -1168,8 +1179,23 @@ transcript: sec: 3147 time: '52:27' who: Maria ---- +context: 'Context: The episode traces a founder’s shift from engineering to healthcare + entrepreneurship, driven by the opportunity to digitize fragmented medical systems. + It covers pragmatic founder tactics (immersion, rapid MVPs, cold outreach), an unusual + AR MVP to collect engagement data, and a discovery that everyday lifestyle interactions + reveal skin‑health signals. The conversation ties product experimentation and iterative + pivots to building a digital clinic flow (diagnosis → prescription → telemedicine), + while confronting legacy infrastructure, rural access gaps, ethical UX, and regional + go‑to‑market limits. Growth topics — community‑first data strategies, personalization, + retention, hiring, fundraising, and monetization via SaaS/partnerships — are framed + alongside human considerations like leadership choices and work‑life integration. + Core theme: Building an ethical, product‑first digital healthcare startup by using + rapid experimentation and community‑driven engagement to bootstrap meaningful clinical + data and align AI capabilities with real patient workflows and viable business models—solving + legacy access and workflow problems regionally, iterating from MVP to product‑market + fit, and scaling sustainably while keeping human needs and ethics central.' +--- Links: * [Maria's LinkedIn profile](https://www.linkedin.com/in/mariabruckert/){:target="_blank"} diff --git a/_podcast/building-data-products-lead-data-scientist.md b/_podcast/building-data-products-lead-data-scientist.md new file mode 100644 index 00000000..18bb96ec --- /dev/null +++ b/_podcast/building-data-products-lead-data-scientist.md @@ -0,0 +1,10 @@ +--- +description: Discover actionable podcast strategy and growth tactics to boost audience, + optimize episodes, and convert listeners into customers with measurable results. +--- + +Links: + +* [LinkedIn](https://www.linkedin.com/in/ioannis-mesionis/){:target="_blank"} +* [Github](https://github.com/ioannismesionis){:target="_blank"} +* [Website](https://ioannismesionis.github.io/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s18e07-building-domestic-risk-assessment-tool.md b/_podcast/building-domestic-risk-assessment-tool.md similarity index 65% rename from _podcast/to-update/s18e07-building-domestic-risk-assessment-tool.md rename to _podcast/building-domestic-risk-assessment-tool.md index 6074f9ec..99e2096a 100644 --- a/_podcast/to-update/s18e07-building-domestic-risk-assessment-tool.md +++ b/_podcast/building-domestic-risk-assessment-tool.md @@ -1,7 +1,6 @@ --- -title: "Context: The episode follows the end-to-end effort to create a domestic risk assessment tool—framing the problem, assembling and preparing data, designing and evaluating models, addressing privacy, ethics and legal needs, integrating with frontline workflows and interfaces, engaging stakeholders, monitoring performance, and planning for scale, funding, and reproducibility. - -Core theme: Designing and operationalizing a people-centered, data-driven domestic risk assessment that translates technical rigor into trustworthy, ethical, legally compliant, and user-friendly decision support—balancing accuracy, fairness, privacy, and sustainability so models meaningfully improve frontline triage and resource allocation in the real world." +title: 'Building a Domestic Risk Assessment Tool: Data Cleaning, Risk Scoring Models + and Privacy Compliance' short: Building a Domestic Risk Assessment Tool season: 18 episode: 7 @@ -16,12 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/building-a-domestic-risk-assessment-tool-sabina-firtala/id1541710331?i=1000662124309 spotify: https://open.spotify.com/episode/7bjORhGzTQoxtbv60mMtzW?si=p6UaBdZJTnGvlwbGb6AsFQ youtube: https://www.youtube.com/watch?v=CpWlBAmD9ok - -description: 'Learn to build a domestic risk assessment tool: data cleaning, risk scoring models, privacy and deployment strategies to improve triage and resource allocation.' -intro: 'How do you build a domestic risk assessment tool that meaningfully improves triage while protecting people’s privacy and avoiding bias? In this episode, Sabina Firtala from Frontline’s AI product development walks through the end-to-end process of building a domestic risk assessment tool for triage. Sabina brings hands-on experience across data wrangling, visualization, statistical testing, model training and validation, with a background in Natural Sciences and prior analyst roles in finance and SaaS, plus freelance work for mission-driven projects.

We cover problem framing and project scope, data sources (case management systems, public records, surveys), and data preparation: cleaning, linking and feature engineering. Sabina explains risk scoring and model architecture, evaluation metrics and bias assessment, and practical privacy, ethical and legal compliance measures. Deployment topics include integrating risk tools into frontline workflows, user interface and decision-support design, stakeholder training and trust, plus monitoring for model drift and alerts. The episode also addresses operational constraints, partnerships, funding and open resources. Listen for concrete guidance on building, evaluating and deploying a domestic risk assessment tool—focused on impact, fairness, privacy and sustainability.' +description: 'Discover building a domestic risk assessment: data cleaning, risk scoring + models, and privacy compliance to improve triage, reduce bias, and ensure compliance.' +intro: 'How do you build an accurate, privacy‑compliant domestic risk assessment tool + that frontline teams can actually use? In this episode Sabina Firtala — who leads + Frontline’s AI product development and brings experience in data wrangling, model + validation, and applied analytics from finance, SaaS, and mission‑driven projects + — walks through a practical roadmap.

We cover problem framing and project + scope; sources like case management, public records, and surveys; and hands‑on data + work: cleaning, linking, and feature engineering. Sabina explains risk scoring approaches + and model architecture, evaluation metrics and bias assessment, plus privacy, ethical + considerations, and legal data governance. You’ll also hear about deployment into + frontline workflows, user interface and decision‑support design, training and stakeholder + trust, ongoing monitoring and drift detection, and examples of impact on triage + and resource allocation. The conversation closes with collaboration strategies, + funding and scaling, open documentation for reproducibility, and concrete lessons + learned.

Listen for actionable guidance on data cleaning, building and + validating risk scoring models, and ensuring privacy compliance so you can design + responsible, usable domestic risk assessment tools.' dateadded: 2024-07-15 - - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -111,9 +123,18 @@ quotableClips: startOffset: 3840 url: https://www.youtube.com/watch?v=CpWlBAmD9ok&t=3840 endOffset: 3840 +context: 'Context: The episode follows the end-to-end effort to create a domestic + risk assessment tool—framing the problem, assembling and preparing data, designing + and evaluating models, addressing privacy, ethics and legal needs, integrating with + frontline workflows and interfaces, engaging stakeholders, monitoring performance, + and planning for scale, funding, and reproducibility. + Core theme: Designing and operationalizing a people-centered, data-driven domestic + risk assessment that translates technical rigor into trustworthy, ethical, legally + compliant, and user-friendly decision support—balancing accuracy, fairness, privacy, + and sustainability so models meaningfully improve frontline triage and resource + allocation in the real world.' --- - Links: * [LinkedI](https://www.linkedin.com/company/frontline100/){:target="_blank"} diff --git a/_podcast/to-update/s16e02-bridging-data-science-and-healthcare.md b/_podcast/building-healthcare-machine-learning-systems.md similarity index 96% rename from _podcast/to-update/s16e02-bridging-data-science-and-healthcare.md rename to _podcast/building-healthcare-machine-learning-systems.md index f49f5089..616d3627 100644 --- a/_podcast/to-update/s16e02-bridging-data-science-and-healthcare.md +++ b/_podcast/building-healthcare-machine-learning-systems.md @@ -1,7 +1,6 @@ --- -title: "Context: A conversation with ML researcher Elena Stamatelou covering her path into healthcare data science, technical projects (from C‑arm imaging and cell sorting to ballistography and home‑pregnancy monitoring), data collection and annotation, low‑resource pediatric solutions, clinical use cases like sepsis prediction, and the practical constraints of validation, regulation, deployment, infrastructure, and clinician adoption. - -Core (single unifying theme): Building meaningful healthcare ML is not primarily about technical novelty but about purposefully bridging technical innovation with clinical reality—designing explainable, validated, and infrastructure‑aware systems through iterative, multidisciplinary collaboration and rigorous data collection so that models safely augment clinicians and improve patient outcomes (including equitable solutions for low‑resource settings)." +title: 'Building Healthcare ML Systems: From Sepsis Prediction to Low-Resource Clinical + Deployment' short: Bridging Data Science and Healthcare season: 16 episode: 2 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/bridging-data-science-and-healthcare-eleni-stamatelou/id1541710331?i=1000632040444 spotify: https://open.spotify.com/episode/5W6lfZVhjIKEmVzBuexfzE?si=0nUHr66eQa6oPVJDb3d0rw youtube: https://www.youtube.com/watch?v=pDOwlulDh0c - -description: Learn ML tactics for sepsis prediction and low-resource monitoring with clinical translation tips—deployment, validation, clinician adoption to accelerate impact -intro: 'How do you move machine learning from promising models to reliable tools that work in low-resource hospitals — and what does it take to predict conditions like sepsis from routinely collected vitals? In this episode, we speak with Eleni Stamatelou, a machine learning researcher focused on healthcare whose path spans the University of Patras, Erasmus exchanges, work at VUB/ULB, a Philips Healthcare internship and a doctorate in data science. Eleni’s work ranges from C‑arm 3D reconstruction and white blood cell image classification to home pregnancy monitoring and a vital‑sign system deployed for pediatric care in Malawi.

We dig into practical topics: designing sensors and linking them to lab outcomes, ballistography signal denoising and U‑Net heart‑rate estimation, the tradeoffs between signal‑processing and deep learning approaches, and a sepsis prediction use case built from vitals and clinical data. We also cover clinical translation challenges — annotation scarcity, explainability, validation timelines, population generalization, and on‑device versus cloud deployment constraints. Listen to understand the technical and clinical steps needed to build, validate and deploy ML in healthcare and how to navigate a career in healthcare data science.' +description: Learn to build reliable healthcare ML systems for sepsis prediction and + low-resource clinical deployment—improve patient outcomes, scalability, and trust. +intro: 'How do you build machine learning systems that can predict sepsis and actually + work in low-resource clinical settings? In this episode Eleni Stamatelou, a machine + learning researcher and educator focused on using data science to improve healthcare, + walks through the technical and practical steps of turning models into deployed + clinical tools. With expertise in signal processing, deep learning, and data-driven + design, Eleni frames the core challenges of healthcare ML: data quality and preprocessing, + model reliability for sepsis prediction, and the constraints of low-resource deployment. +

We cover key topics including designing robust models for noisy clinical + signals, evaluation and validation strategies suited to patient safety, and pragmatic + considerations for integrating ML into clinical workflows with limited infrastructure. + Listeners will gain actionable insights on bridging research and practice—how to + prioritize features, manage trade-offs between complexity and reliability, and make + deployment decisions that respect resource limitations.

If you work on + machine learning in healthcare, clinical AI, or sepsis prediction, this episode + provides concrete perspectives on building systems that are both scientifically + sound and practically deployable in low-resource environments.' dateadded: 2023-10-23 - duration: PT00H59M01S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -148,7 +160,6 @@ quotableClips: startOffset: 3539 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3539 endOffset: 3541 - transcript: - header: Podcast Introduction - header: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' @@ -1137,8 +1148,9 @@ transcript: sec: 3585 time: '59:45' who: Alexey +context: 'Building Healthcare ML Systems: From Sepsis Prediction to Low-Resource Clinical + Deployment' --- - Links: * [LinkedIn](https://www.linkedin.com/in/elenistamatelou/){:target="_blank"} diff --git a/_podcast/to-update/s17e09-building-production-search-systems.md b/_podcast/building-production-search-systems.md similarity index 96% rename from _podcast/to-update/s17e09-building-production-search-systems.md rename to _podcast/building-production-search-systems.md index d6ebae3d..51c89b06 100644 --- a/_podcast/to-update/s17e09-building-production-search-systems.md +++ b/_podcast/building-production-search-systems.md @@ -1,5 +1,5 @@ --- -title: "Central narrative: Building effective, real-world search and retrieval is a systems engineering problem that pragmatically combines modern representation learning (dense, multimodal embeddings and specialized encoders) with classical IR techniques (inverted indexes, filters, recency, and ranking), wrapped in robust MLOps, evaluation, and product-oriented trade-offs. The episode’s through-line is that success comes from hybrid architectures and operational discipline—careful choices about embeddings, indexing, model versioning, pipeline design, vendor/tool selection, and business metrics—so teams can move fast from prototype (e.g., CLIP experiments) to scalable, maintainable, and measurable production search." +title: 'Building Search Systems: Dense Embeddings, MLOps and Evaluation Metrics' short: Building Production Search Systems season: 17 episode: 9 @@ -14,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/building-production-search-systems-daniel-svonava/id1541710331?i=1000650138905 spotify: https://open.spotify.com/episode/19R0rLA8hULTBZi9FhZuTs?si=xggb0OzfRHCFSmXtJWm7bA youtube: https://www.youtube.com/watch?v=gEmSrknGKDE - -description: Discover vector search, embeddings & vector database practices - indexing, hybrid retrieval, CLIP prototype and MLOps tips to boost relevance & ship faster -intro: How do you design and operate reliable vector search systems that balance embeddings, traditional indexing, and production MLOps? In this episode, Daniel Svonava — co-founder of Superlinked and VectorHub, former ML infrastructure tech lead for YouTube Ads with a 20‑year engineering background including competitive programming and research internships at Google and IBM — answers that question with practical detail.

We dig into the mechanics of indexing (inverted indexes, document chunking, candidate generation and ranking), the evolution from bag‑of‑words to dense embeddings, and the role of vector databases for nearest‑neighbor search. Daniel walks through vector compute tradeoffs (ingestion vs query‑time encoding), model versioning, pipeline challenges like recomputing embeddings, and hybrid retrieval strategies that combine vector similarity with filters, recency, and Lucene-style constraints. He also explains multi‑modal retrieval with CLIP, multi‑embedding designs, timestamp/positional encoding, and vendor selection criteria.

Listen to learn concrete guidance on prototyping with CLIP, when to use Lucene/Elasticsearch versus dedicated vector DBs, MLOps tradeoffs, and how to measure search impact through A/B testing and operational metrics — actionable insight for engineers building production search and recommender systems +description: Learn dense embeddings, vector databases & MLOps to productionize search—get + indexing, hybrid search, evaluation metrics and deploy tips to boost relevance. +intro: 'How do you build search systems that balance dense embeddings, MLOps, and + meaningful evaluation metrics? In this episode Daniel Svonava — an entrepreneurial + technologist with 20 years of experience (from competitive programming and research + internships to leading ML infrastructure at YouTube Ads) and co-founder of Superlinked/VectorHub + — walks through practical design and operational decisions for modern search and + retrieval.

We cover core topics: framing search as a decision problem, + representation learning from bag-of-words to dense vector embeddings, inverted index + mechanics, document chunking and ingestion, and when to use Lucene/Elasticsearch + versus dedicated vector databases. Daniel explains vector compute trade-offs (ingestion + vs query-time encoding), model versioning and recomputing embeddings, hybrid search + strategies, CLIP-style cross-modal retrieval, multi-embedding designs, and techniques + for encoding recency and timestamps. He also digs into MLOps concerns — pipeline + brittleness, configuration debt, and deployment trade-offs — plus evaluation: business + metrics, A/B testing, offline evaluation and operational metrics.

Listeners + will get concrete guidance on embedding strategy, vector database selection, indexing + and ranking trade-offs, and how to measure search impact so teams can prototype + faster and productionize reliable retrieval systems.' dateadded: 2024-03-25 - duration: PT01H05M23S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -162,7 +176,6 @@ quotableClips: startOffset: 4008 url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=4008 endOffset: 3923 - transcript: - header: Podcast Introduction - header: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' @@ -1194,8 +1207,16 @@ transcript: sec: 4030 time: '1:07:10' who: Daniel +context: 'Central narrative: Building effective, real-world search and retrieval is + a systems engineering problem that pragmatically combines modern representation + learning (dense, multimodal embeddings and specialized encoders) with classical + IR techniques (inverted indexes, filters, recency, and ranking), wrapped in robust + MLOps, evaluation, and product-oriented trade-offs. The episode’s through-line is + that success comes from hybrid architectures and operational discipline—careful + choices about embeddings, indexing, model versioning, pipeline design, vendor/tool + selection, and business metrics—so teams can move fast from prototype (e.g., CLIP + experiments) to scalable, maintainable, and measurable production search.' --- - Links: * [VectorHub](https://superlinked.com/vectorhub/?utm_source=community&utm_medium=podcast&utm_campaign=datatalks){:target="_blank"} diff --git a/_podcast/to-update/s18e05-community-building-and-teaching-in-ai-tech.md b/_podcast/community-building-and-teaching-in-ai-tech.md similarity index 91% rename from _podcast/to-update/s18e05-community-building-and-teaching-in-ai-tech.md rename to _podcast/community-building-and-teaching-in-ai-tech.md index 74bcfdb5..bc199b1d 100644 --- a/_podcast/to-update/s18e05-community-building-and-teaching-in-ai-tech.md +++ b/_podcast/community-building-and-teaching-in-ai-tech.md @@ -1,7 +1,6 @@ --- -title: "Context: This episode follows Erum Afzal and Omdena Academy’s evolution—how global, project‑based AI collaborations and community organizing were systematized into accessible, tiered courses and local chapters to teach practical, ethical AI skills. - -Core theme: The unifying idea is that democratizing real‑world AI expertise requires a community‑first, project‑to‑course approach—turning collaborative problem‑solving into structured learning pathways, open instructor pipelines, regional sub‑communities, and integrity‑focused practices so diverse learners can rapidly gain practical skills, leadership opportunities, and ethical career pathways in AI." +title: 'Community Building and Teaching in AI & Tech: Project-to-Course Model for + AI Education' short: Community Building and Teaching in AI & Tech season: 18 episode: 5 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/community-building-and-teaching-in-ai-tech-erum-afzal/id1541710331?i=1000655187649 spotify: https://open.spotify.com/episode/4iAvz4Qu0l28fxXvaHdAPj?si=7MdKKu1fTrqxIGPQBT61Ag youtube: https://www.youtube.com/watch?v=7SLd5V7z3xQ - -description: 'Discover Omdena Academy''s project-to-course AI education for data science: learn Python, NLP, instructor paths, and gain real-world project experience.' -intro: 'How do you turn real-world AI project experience into repeatable courses that launch data science careers and train instructors? In this episode, Erum Afzal — lead ML engineer, Teaching Expert at Women in AI Academy, and PhD researcher in AI for teacher training — explains how Omdena Academy evolved from collaborative projects into a project-to-course model for AI education.

We cover the Academy’s shift from global Omdena projects to structured data science courses, foundational topics taught (Python, Pandas, NumPy, NLP), and the process for developing courses: instructor application, content review, delivery, and evaluation. Erum outlines access pathways—enrolling without prior Omdena membership, pathways into projects, and an open instructor pipeline—plus community and leadership development through regional chapters and sub-communities. You’ll hear about curriculum tiers (basic to advanced), boosting engagement with live sessions, and maintaining hiring integrity amid plagiarism and responsible ChatGPT use. Practical details include how to apply (Omdena.com/Omdena-Academy), scholarship and GitHub resources, and options for instructors to volunteer or monetize content.

Listen to learn actionable steps for joining, teaching, or designing project-based data science courses that prepare learners for careers in AI.' +description: Discover Omdena's project-to-course model for AI education and community + building—master curriculum, instructor pipeline, and pathways to join real AI projects. +intro: 'How can communities turn real-world AI projects into repeatable courses that + scale learning and careers? In this episode Erum Afzal — lead ML engineer, PhD researcher + in AI for teacher training, and head of Omdena Academy — walks through a project-to-course + model for AI education rooted in community collaboration.

We cover Omdena’s + evolution from global, problem-focused projects into structured courses, the design + of foundational data science curricula (Python, Pandas, NumPy, NLP), and the practical + steps for course development: instructor application, content review, delivery, + and engagement strategies (live sessions, selection, graduation). Erum explains + the instructor pipeline and open applications, access pathways for learners, the + academy’s free-course business model with organizational partnerships, and options + for monetization or volunteer teaching.

Listeners will get concrete guidance + on curriculum tiers (basic to advanced), community growth tactics (start small, + empower sub-communities), ethical concerns around hiring integrity and responsible + ChatGPT use, and where to apply to teach (Omdena.com/Omdena-Academy). This episode + is for educators, community builders, and early-career practitioners who want actionable + models for teaching, curriculum design, and building inclusive AI learning communities.' dateadded: 2024-05-12 - duration: PT00H57M03S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -134,7 +146,6 @@ quotableClips: startOffset: 3466 url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3466 endOffset: 3423 - transcript: - header: Podcast Introduction - header: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' @@ -677,8 +688,17 @@ transcript: sec: 3508 time: '58:28' who: Alexey ---- +context: 'Context: This episode follows Erum Afzal and Omdena Academy’s evolution—how + global, project‑based AI collaborations and community organizing were systematized + into accessible, tiered courses and local chapters to teach practical, ethical AI + skills. + Core theme: The unifying idea is that democratizing real‑world AI expertise requires + a community‑first, project‑to‑course approach—turning collaborative problem‑solving + into structured learning pathways, open instructor pipelines, regional sub‑communities, + and integrity‑focused practices so diverse learners can rapidly gain practical skills, + leadership opportunities, and ethical career pathways in AI.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/erum-afzal-64827b24/){:target="_blank"} diff --git a/_podcast/to-update/s20e09-taking-your-freelance-career-to-next-level.md b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md similarity index 92% rename from _podcast/to-update/s20e09-taking-your-freelance-career-to-next-level.md rename to _podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md index ae6ac6b7..252b1a6d 100644 --- a/_podcast/to-update/s20e09-taking-your-freelance-career-to-next-level.md +++ b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md @@ -1,7 +1,5 @@ --- -title: "Context: Through Dimitri’s journey and practical segments on job data, client acquisition, pricing, AI tools, and scaling, the episode maps the real-world mechanics of going freelance in data. - -Core narrative: The unifying idea is that a sustainable, scalable freelance data career is built by starting with market demand—validate financial targets, specialize around high‑impact problems, productize repeatable analytics offerings, leverage productivity tools (including AI) to deliver efficiently, and adopt deliberate pricing and client‑retention models (projects, subscriptions, or agency paths) so you can reliably land clients, capture value, and grow on your own terms." +title: "Building a Sustainable Data Freelancing Career: Market Validation, Client Acquisition & Strategic Positioning" short: Taking your Freelance Career to the Next Level season: 20 episode: 9 @@ -16,9 +14,25 @@ links: apple: https://podcasts.apple.com/us/podcast/can-you-quit-your-job-and-still-succeed-as-a-data-freelancer/id1541710331?i=1000718997257 spotify: https://open.spotify.com/episode/3BknrKqhLggx1G5ZbrfgFc youtube: https://www.youtube.com/watch?v=S93V8RgwBig - -description: Master data freelancer tactics, pricing strategies and AI tools to land clients, price services confidently, and boost productivity for higher income -intro: How do you move from employed data pro to a sustainable data freelancer who consistently lands clients, prices services well, and uses AI to boost productivity? In this episode, Dimitri Visnadi — an independent data consultant focused on data strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer Science from UCL — walks through a practical playbook for data freelancers.

Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus problem-solving, and how to validate freelance viability with financial targets. He explains how to land initial clients through recruiters and LinkedIn, the idea behind a data-freelancer job board, market-driven specialization, and insights on rates, top skills and data management. You’ll hear about scaling choices (lifestyle business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course and community approaches for branding and marketing, subscription models and client relationship management, high-impact small analyses, pricing strategies (hourly vs packages), and transition planning.

Listen to get concrete guidance on landing clients, setting prices, structuring offers, and using AI tools to increase productivity as a freelance data consultant +description: Master data freelancer tactics, pricing strategies and AI tools to land + clients, price services confidently, and boost productivity for higher income +intro: How do you move from employed data pro to a sustainable data freelancer who + consistently lands clients, prices services well, and uses AI to boost productivity? + In this episode, Dimitri Visnadi — an independent data consultant focused on data + strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles + at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer + Science from UCL — walks through a practical playbook for data freelancers.

+ Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus + problem-solving, and how to validate freelance viability with financial targets. + He explains how to land initial clients through recruiters and LinkedIn, the idea + behind a data-freelancer job board, market-driven specialization, and insights on + rates, top skills and data management. You’ll hear about scaling choices (lifestyle + business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course + and community approaches for branding and marketing, subscription models and client + relationship management, high-impact small analyses, pricing strategies (hourly + vs packages), and transition planning.

Listen to get concrete guidance + on landing clients, setting prices, structuring offers, and using AI tools to increase + productivity as a freelance data consultant topics: - Freelance - Career Growth @@ -29,9 +43,7 @@ topics: - Business Development dateadded: 2025-07-28 date: 2025-11-07 - duration: PT01H05M29S - quotableClips: - name: Episode Opening & Dimitri’s Data Journey startOffset: 0 @@ -97,7 +109,6 @@ quotableClips: startOffset: 3929 url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3929 endOffset: 3929 - transcript: - header: Episode Opening & Dimitri’s Data Journey - header: Episode Opening & Dimitri’s Data Journey @@ -665,8 +676,17 @@ transcript: sec: 3929 time: '1:05:29' who: Alexey ---- +context: 'Context: Through Dimitri’s journey and practical segments on job data, client + acquisition, pricing, AI tools, and scaling, the episode maps the real-world mechanics + of going freelance in data. + Core narrative: The unifying idea is that a sustainable, scalable freelance data + career is built by starting with market demand—validate financial targets, specialize + around high‑impact problems, productize repeatable analytics offerings, leverage + productivity tools (including AI) to deliver efficiently, and adopt deliberate pricing + and client‑retention models (projects, subscriptions, or agency paths) so you can + reliably land clients, capture value, and grow on your own terms.' +--- Links: * [Previous podcast episode](https://datatalks.club/podcast/s16e09-become-data-freelancer.html){:target="_blank"} diff --git a/_podcast/to-update/s18e01-inclusive-data-leadership-coaching.md b/_podcast/data-leadership-coaching.md similarity index 96% rename from _podcast/to-update/s18e01-inclusive-data-leadership-coaching.md rename to _podcast/data-leadership-coaching.md index 0564d34f..39e440ab 100644 --- a/_podcast/to-update/s18e01-inclusive-data-leadership-coaching.md +++ b/_podcast/data-leadership-coaching.md @@ -1,5 +1,5 @@ --- -title: "Helping technical professionals—especially data practitioners—become high-impact, inclusive leaders by combining mindset shifts and practical routines: making invisible technical work visible with a product/value lens, cultivating psychological safety and feedback skills, managing sustainable team scope, and using empathetic communication and stakeholder framing to influence across functions." +title: 'Data Leadership Coaching: Transition to Manager, Stakeholder Skills and Team Impact' short: Inclusive Data Leadership Coaching season: 18 episode: 1 @@ -7,23 +7,32 @@ guests: - terezaiofciu image: images/podcast/s18e01-inclusive-data-leadership-coaching.jpg ids: - anchor: 'on has three major components, this includes the main AI framework which - is the “TermAIte”, the main database, and the mobile application. These three - components work together to process the data that is inputted into the system - by the user. The data includes the images of the wood and the different environmental - conditions readings: temperature, humidity, and wood moisture. ' youtube: Z4vOTgzLkJQ links: apple: https://podcasts.apple.com/us/podcast/inclusive-data-leadership-coaching-tereza-iofciu/id1541710331?i=1000650865043 spotify: https://open.spotify.com/episode/3zVzlQ0NmAVCtaFQXbqvHE?si=sSZhU-KXRamv2x5YZCDxAg youtube: https://www.youtube.com/watch?v=Z4vOTgzLkJQ - -description: Learn data leadership, feedback skills and influencing without authority to transition to manager, increase impact and lead cross-functional teams -intro: How do you move from an individual contributor to an effective data leader while coaching teams, giving constructive feedback, and influencing without formal authority? In this episode, Tereza Iofciu—an experienced data practitioner who has worked as a data scientist, data engineer, product manager, leads a coaching team, and teaches data science at neuefische—walks through the practical challenges of that transition. She shares her career journey from a computer science PhD to data lead and coach, early coaching experiments, and why managers need teammates who can solve problems independently.

Key topics include transition-to-manager tactics, building feedback skills and psychological safety, designing sustainable team span-of-control (the “pizza” metaphor), making foundational data work visible with product-minded KPIs, and influencing without authority through stakeholder framing, active listening, and empathy. Tereza also covers coaching delivery formats—one-shot sessions, CV reviews, and community initiatives like PyLadies and conference newcomer talks—and how to blend coaching, mentoring, and practical advice.

If you’re stepping into a lead role or coaching data teams, listen for actionable frameworks, feedback routines, and inclusive leadership practices to increase impact, visibility, and promotion readiness. Closing notes include how to reach Tereza and schedule time via Calendly +description: 'Master data leadership coaching: transition to manager, build stakeholder + skills, and boost team impact with feedback, visibility, and influence strategies.' +intro: How do you move from a strong individual contributor into a data leader who + can influence stakeholders, grow team impact, and build inclusive practices? In + this episode Tereza Iofciu—data science manager, data scientist, data engineer, + product manager, coach and community organizer—walks through her transition from + a PhD in computer science to leading teams and running data leadership coaching. +

We cover the practical challenges of the manager transition, experiments + that shaped her coaching approach, and ways to scale manager bandwidth using the + “pizza” span-of-control metaphor. Tereza breaks down feedback skills, psychological + safety, and routines for team feedback training, plus leadership learning through + workshops and frameworks. You’ll hear actionable guidance on increasing impact and + promotions, making foundational data work visible with product mindsets and KPIs, + and influencing without authority by framing projects to stakeholders’ priorities. + The conversation also explores cross-functional and inclusive leadership, self-promotion + versus bragging, and concrete coaching formats like one-shot sessions, CV reviews, + and Calendly-driven delivery.

Listen if you want practical data leadership + coaching on managing the IC-to-manager shift, stakeholder skills, and boosting your + team’s measurable impact. dateadded: 2024-03-31 - duration: PT00H56M35S - quotableClips: - name: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) startOffset: 86 @@ -131,7 +140,6 @@ quotableClips: startOffset: 3468 url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3468 endOffset: 3395 - transcript: - header: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) @@ -1210,8 +1218,12 @@ transcript: sec: 3481 time: '58:01' who: Tereza +context: 'Helping technical professionals—especially data practitioners—become high-impact, + inclusive leaders by combining mindset shifts and practical routines: making invisible + technical work visible with a product/value lens, cultivating psychological safety + and feedback skills, managing sustainable team scope, and using empathetic communication + and stakeholder framing to influence across functions.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/tereza-iofciu/){:target="_blank"} diff --git a/_podcast/to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md b/_podcast/dataops-for-data-engineering.md similarity index 90% rename from _podcast/to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md rename to _podcast/dataops-for-data-engineering.md index ed8b805f..1524f049 100644 --- a/_podcast/to-update/s18e09-dataops-observability-and-cure-for-data-team-blues.md +++ b/_podcast/dataops-for-data-engineering.md @@ -1,5 +1,6 @@ --- -title: "DataOps is the episode’s unifying idea: treating data and ML work as engineered, production-ready products by applying software best practices—automation, CI/CD, testing and test data, immutable versioning, and observability—plus cultural change and leadership to remove fear, reduce rework and burnout, and shorten cycle time. The through-line argues that operationalizing the full lifecycle (day‑one provisioning through day‑two reliability and day‑three evolution) turns pockets of heroic, ad‑hoc data work into consistent, reliable delivery that enables safe, scalable use of AI and analytics." +title: 'DataOps for Data Engineering: Automation, Observability, CI/CD & Reliable + ML Deployments' short: DataOps, Observability, and The Cure for Data Team Blues season: 18 episode: 9 @@ -14,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/dataops-observability-and-the-cure-for-data-team/id1541710331?i=1000665429770 spotify: https://open.spotify.com/episode/02VoOk5UkMcvfq7VkSOegb youtube: https://www.youtube.com/watch?v=HzGpIxV8HtA - -description: Learn DataOps best practices for observability, CI/CD and deployment automation to reduce rework, boost model reliability and speed analytics delivery -intro: How do you move data teams from fragile, firefighting workloads to reliable, automated production? In this episode, Christopher Bergh of DataKitchen walks through his career journey from software engineering to data entrepreneurship and tackles that exact challenge through the lens of DataOps.

You’ll hear a clear definition of DataOps and why it matters—covering pre-cloud data engineering pain points, early DevOps lessons, and workforce burnout tied to poor deployment culture. Key topics include core DataOps practices (automation, observability, productivity), operational lifecycle thinking (Day One/Two/Three), model reliability and on‑call readiness for data science, CI/CD pipelines, regression testing and test data for analytics, and data versioning strategies. The conversation also addresses MLOps and LLMs, the limits of AI generation versus process improvement, containers versus serverless tradeoffs, and how observability-first monitoring drives real change.

Listeners will come away with practical starting steps for individual contributors and leaders to reduce rework and cycle time, improve deployment automation, and create sustainable data engineering and ML practices that lower turnover and increase reliability +description: Master DataOps, data engineering, and CI/CD to deploy reliable ML, cut + cycle time, reduce rework, and build production-ready tests for on-call readiness. +intro: How do you transform fragile data pipelines and unreliable ML deployments into + automated, observable, production-ready systems? In this episode Christopher Bergh, + CEO of DataKitchen and co-author of the DataOps Cookbook and DataOps Manifesto, + walks through practical DataOps for data engineering—drawing on 25+ years across + research, software engineering, and analytics.

We trace his career from + pre‑cloud SQL Server scaling challenges to early DevOps lessons, then dig into what + DataOps means for teams facing burnout, deployment fear, and inconsistent processes. + Key topics include automation, observability, CI/CD pipelines, regression tests + and test data for analytics, model reliability and on‑call readiness, end‑to‑end + deployment automation, data versioning, and the differences between containers and + serverless. The episode also clarifies MLOps and LLM buzzwords, explores day‑one/day‑two/day‑three + operational lifecycle practices, and outlines concrete steps to reduce rework and + cycle time.

If you’re a data engineer, data scientist, or engineering leader + looking to improve analytics delivery, this conversation offers actionable guidance + on implementing DataOps practices — automation, monitoring, CI/CD, and culture changes + — to make ML deployments more reliable and repeatable. dateadded: 2024-09-04 - duration: PT01H01M55S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -114,7 +128,6 @@ quotableClips: startOffset: 3847 url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3847 endOffset: 3715 - transcript: - header: Podcast Introduction - header: 'Guest Introduction: Christopher Bergh & DataKitchen' @@ -458,4 +471,12 @@ transcript: sec: 3847 time: '1:04:07' who: Alexey +context: 'DataOps is the episode’s unifying idea: treating data and ML work as engineered, + production-ready products by applying software best practices—automation, CI/CD, + testing and test data, immutable versioning, and observability—plus cultural change + and leadership to remove fear, reduce rework and burnout, and shorten cycle time. + The through-line argues that operationalizing the full lifecycle (day‑one provisioning + through day‑two reliability and day‑three evolution) turns pockets of heroic, ad‑hoc + data work into consistent, reliable delivery that enables safe, scalable use of + AI and analytics.' --- diff --git a/_podcast/to-update/s16e01-datatalks-club-anniversary-interview.md b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md similarity index 97% rename from _podcast/to-update/s16e01-datatalks-club-anniversary-interview.md rename to _podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md index 8846b08e..936fd0a3 100644 --- a/_podcast/to-update/s16e01-datatalks-club-anniversary-interview.md +++ b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md @@ -1,5 +1,6 @@ --- -title: "Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and Evolution" +title: 'Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and + Evolution' short: DataTalks.Club Anniversary Interview season: 16 episode: 1 @@ -15,12 +16,29 @@ links: apple: https://podcasts.apple.com/us/podcast/datatalks-club-anniversary-interview-alexey-grigorev/id1541710331?i=1000631114088 spotify: https://open.spotify.com/episode/0j1eKj9NbK3oAXHXHyaNae?si=M7rw9WixTvWw-BfKPXPwVg youtube: https://www.youtube.com/watch?v=nCqwZT9zA0M - -intro: How do you build and sustain a data community that helps people switch into machine learning careers while adapting to rapid AI change? In this anniversary episode of DataTalks.Club, contributors who transitioned from roles like Java development into machine learning and Python—and who now work full‑time on community and engineering efforts—reflect on that exact challenge. They cover practical topics including sustainability and monetization strategies, the roles of community and marketing leads, and decisions around building courses (LLM/AI content versus volatile material).

You’ll hear a detailed discussion of GPTs and LLMs and their effects on data workflows, hiring and take‑home tests; community programs like Project of the Week, competitions, and portfolio building; moderation and safety practices; and the evolution from a Slack community to instructor‑led Zoomcamps and a Machine Learning Bookcamp. The episode also examines outcomes—career switches, internships, and student success—plus metrics that matter (newsletter performance, active users, sponsors). Listen to learn concrete ideas for running a community‑driven learning program, designing resilient courses in an AI era, and measuring long‑term impact +intro: 'How do you build a sustainable data community that endures beyond meetup hype + and founder energy? In this episode Alexey Grigorev, founder of DataTalks.Club, + and Johanna Bayer, a researcher about to complete her PhD in machine learning for + clinical neuroimaging at the University of Melbourne, discuss three years of community + growth and evolution.

Alexey brings the perspective of launching and running + a global data community, while Johanna contributes her background in psychology, + computational neuroscience, and research software engineering, plus advocacy for + open source and open science. Together they explore core topics around sustainable + data community building: membership growth, volunteer and contributor roles, the + intersection of research software engineering with community practice, and how open + source and open science principles support longevity.

Listeners will come + away with concrete considerations for creating and maintaining a data-focused community—practical + lessons on community governance, contributor engagement, and aligning technical + and social infrastructure—making this episode valuable for anyone building a dataTalks-style + group, open source project, or research software community.' +topics: +- community building +- machine learning +- data science +- data engineering +- MLOps dateadded: 2023-10-16 - duration: PT01H02M57S - quotableClips: - name: Episode Opening & DataTalks.Club 3rd Anniversary startOffset: 0 @@ -106,7 +124,6 @@ quotableClips: startOffset: 3726 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3726 endOffset: 3777 - transcript: - header: Episode Opening & DataTalks.Club 3rd Anniversary - line: Welcome everyone. My name is Johanna and Welcome to DataTalks.Club. DataTalks.Club @@ -1202,4 +1219,8 @@ transcript: sec: 3777 time: '1:02:57' who: Johanna +context: 'Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth + and Evolution' +description: Discover DataTalks.Club's 3-year playbook to build a sustainable data + community—scaling, engagement & retention tactics that grow your network. --- diff --git a/_podcast/to-update/s19e03-datatalks-club-anniversary-podcast.md b/_podcast/datatalksclub-scaling-and-free-courses.md similarity index 95% rename from _podcast/to-update/s19e03-datatalks-club-anniversary-podcast.md rename to _podcast/datatalksclub-scaling-and-free-courses.md index d2c6fd2b..8a939e0a 100644 --- a/_podcast/to-update/s19e03-datatalks-club-anniversary-podcast.md +++ b/_podcast/datatalksclub-scaling-and-free-courses.md @@ -1,7 +1,6 @@ --- -title: "Context: Born during COVID as a volunteer meetup, DataTalks.Club scaled organically into a free-to-learn, community-first education platform—driven by practical courses (data engineering, ML, MLOps, LLMs), events, mentorship, and hands-on student success—while the founders stayed technically engaged and navigated financial, operational, and growth trade-offs. - -Core: The unifying idea is that sustainable, founder-led communities that combine technical experimentation and product-building with human-centered connection (mentorship, events, accessible learning) create lasting impact and resilience—allowing thoughtful stewardship to adapt to AI-driven change, achieve product-market fit, and scale education without sacrificing community values." +title: 'Inside Scaling DataTalks.Club: How We Built Free Data Engineering, MLOps & + LLM Courses' short: DataTalks.Club Anniversary Podcast season: 19 episode: 3 @@ -16,12 +15,10 @@ links: apple: https://podcasts.apple.com/us/podcast/datatalks-club-4th-anniversary-ama-podcast-alexey-grigorev/id1541710331?i=1000674473200 spotify: https://open.spotify.com/episode/50wIZxjq6goREu9pwXYITP?si=mPW0v5fBQxuBpg622CpCEA youtube: https://www.youtube.com/watch?v=GHbeXIKnkLQ - -description: How do you grow an open, free-to-learn data community into a sustainable education platform? In this episode, Alexey Grigorev — who founded DataTalks.Club during COVID and later transitioned to running it full-time — walks through the practical decisions and trade-offs behind building courses, community, and a product. We cover the course portfolio (machine learning, data engineering, MLOps, LLMs, stock analytics), organic growth from the Data Engineering Zoomcamp, and the technical stack for scaling (a Django-based course platform). Johanna shares lessons on sponsorship dynamics, prepaid tax realities in Germany, protecting community safety from scams, and staying technical through pet projects, LLM experiments and an automated storytelling pipeline. You’ll also hear about launching an LLM course and RAG experiments, early validation and product-market fit, scaling challenges and loneliness, plus concrete ways to contribute—guesting, mentoring, or joining project weeks. Listen for actionable insights on running an online data community, course productization, community moderation, and practical next steps if you want to build or support a data education ecosystem +description: 'Discover how DataTalks.Club built free Data Engineering, MLOps & LLM + courses: scaling open-source curriculum, community growth, and career-ready projects.' dateadded: 2024-11-08 - duration: PT01H03M17S - quotableClips: - name: Podcast Welcome & AMA Format (community links and live questions) startOffset: 0 @@ -146,7 +143,6 @@ quotableClips: startOffset: 3761 url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3761 endOffset: 3797 - transcript: - header: Podcast Welcome & AMA Format (community links and live questions) - line: Hi, everyone. Welcome to our event. This event is brought to you by DataTalks.Club, @@ -1191,4 +1187,30 @@ transcript: sec: 3797 time: '1:03:17' who: Johanna +context: 'Context: Born during COVID as a volunteer meetup, DataTalks.Club scaled + organically into a free-to-learn, community-first education platform—driven by practical + courses (data engineering, ML, MLOps, LLMs), events, mentorship, and hands-on student + success—while the founders stayed technically engaged and navigated financial, operational, + and growth trade-offs. + + Core: The unifying idea is that sustainable, founder-led communities that combine + technical experimentation and product-building with human-centered connection (mentorship, + events, accessible learning) create lasting impact and resilience—allowing thoughtful + stewardship to adapt to AI-driven change, achieve product-market fit, and scale + education without sacrificing community values.' +intro: How do you scale a volunteer-run learning community into a sustainable platform + offering free data engineering, MLOps, and LLM courses? In this episode Alexey Grigorev, + founder of DataTalks.Club, walks through the origin story of the project, the leap + to running it full‑time, and the practical tradeoffs of building free data engineering + courses at scale.

Alexey’s background as the founder guides discussions + on course portfolio decisions (Machine Learning, Data Engineering, MLOps, LLMs, + Stock Analytics), organic growth strategies like Zoomcamp word‑of‑mouth, and technical + choices—building the course platform in Django. We cover community safety and moderation, + revenue volatility from sponsorships, tax and cashflow considerations in Germany, + and how staying technical through pet projects and LLM experiments informed their + curriculum (including RAG and LLM course development).

Listeners will get + concrete takeaways on scaling online education, community-driven learning, course + product work, and practical ways to help—mentoring, guesting, or joining projects + and events. Useful for educators, course builders, and data practitioners wondering + how to create and sustain free, high‑quality data science and MLOps training. --- diff --git a/_podcast/to-update/s19e09-linguistics-and-fairness.md b/_podcast/fairness-in-ai-ml-engineering.md similarity index 94% rename from _podcast/to-update/s19e09-linguistics-and-fairness.md rename to _podcast/fairness-in-ai-ml-engineering.md index 43531ca4..cee83509 100644 --- a/_podcast/to-update/s19e09-linguistics-and-fairness.md +++ b/_podcast/fairness-in-ai-ml-engineering.md @@ -1,9 +1,6 @@ --- -title: "Context: -This episode follows Tamara’s journey from software and music‑tech engineering into computational linguistics and open‑source stewardship, and uses concrete case studies (credit‑scoring fairness, moderation systems) plus tool discussions (Fairlearn, interpretability packages, secure model serialization) to examine how technical choices, metrics, and developer practices translate into real societal outcomes. Recurring threads include tradeoffs in fairness metrics, the necessity of domain expertise and human‑in‑the‑loop processes, the engineering challenges of interoperable, secure ML tooling, and the role of community and practitioner education in shaping responsible ML. - -Core: -The unifying idea is that building fair, trustworthy AI is a sociotechnical engineering task: it requires not just algorithms but pragmatic, community‑driven tools, secure software practices, clear interpretability, and organizational processes that embed human judgment and domain knowledge so technical models produce just, accountable outcomes in the real world." +title: 'Fairness in AI/ML Engineering: Interpretability, Metrics and Sociotechnical + Design' short: Linguistics and Fairness season: 19 episode: 9 @@ -18,13 +15,27 @@ links: apple: https://podcasts.apple.com/us/podcast/linguistics-and-fairness-tamara-atanasoska/id1541710331?i=1000684411354 spotify: https://open.spotify.com/episode/6S4a85iiRzl7NU1HykXeKT?si=FNoDtj74T2ujQKzKdDWwzA youtube: https://www.youtube.com/watch?v=sXU9vMDBjmk - -description: 'Discover how to use Fairlearn to mitigate credit scoring bias and build explainable models: practical tools, human-in-the-loop tips, and evaluation tradeoffs.' -intro: 'How do you reduce bias in credit scoring models without sacrificing explainability? In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., Fairlearn maintainer, and contributor to scikit-learn and skops with a background in software engineering and computational linguistics — walks through practical approaches to fairness in AI. We dig into a real credit scoring use case, empirical findings on gender disparities, and the societal harms of biased models such as debt and repossession.

Tamara explains Fairlearn’s group fairness tools, visualization and mitigation methods, and the tradeoffs between false positives, false negatives, and demographic parity. She discusses how to choose sensitive groups in domain‑specific settings, the limits of automation, the need for human‑in‑the‑loop systems, and who in an organization should decide fairness tradeoffs. The episode also covers interpretability and explainable models — inspection tools, partial dependence, and cross‑library integration with scikit‑learn and estimator APIs — plus practical concerns like secure model serialization and community contribution paths.

Listen to learn actionable guidance on auditing and mitigating credit scoring bias, building explainable models, and integrating Fairlearn into real‑world ML workflows' +description: Learn fairness, interpretability, and metrics in AI/ML engineering—practical + sociotechnical design steps to evaluate bias, improve transparency, protect users. +intro: How do you reduce bias in credit scoring models without sacrificing explainability? + In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., + Fairlearn maintainer, and contributor to scikit-learn and skops with a background + in software engineering and computational linguistics — walks through practical + approaches to fairness in AI. We dig into a real credit scoring use case, empirical + findings on gender disparities, and the societal harms of biased models such as + debt and repossession.

Tamara explains Fairlearn’s group fairness tools, + visualization and mitigation methods, and the tradeoffs between false positives, + false negatives, and demographic parity. She discusses how to choose sensitive groups + in domain‑specific settings, the limits of automation, the need for human‑in‑the‑loop + systems, and who in an organization should decide fairness tradeoffs. The episode + also covers interpretability and explainable models — inspection tools, partial + dependence, and cross‑library integration with scikit‑learn and estimator APIs — + plus practical concerns like secure model serialization and community contribution + paths.

Listen to learn actionable guidance on auditing and mitigating credit + scoring bias, building explainable models, and integrating Fairlearn into real‑world + ML workflows dateadded: 2025-02-24 - duration: PT00H59M14S - quotableClips: - name: Podcast Introduction & Episode Overview startOffset: 0 @@ -159,7 +170,6 @@ quotableClips: startOffset: 3494 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3494 endOffset: 3554 - transcript: - header: Podcast Introduction & Episode Overview - line: This week, we’ll talk about linguistic fairness and a sociotechnical perspective @@ -1062,8 +1072,22 @@ transcript: sec: 3554 time: '59:14' who: Alexey ---- +context: 'Context: This episode follows Tamara’s journey from software and music‑tech + engineering into computational linguistics and open‑source stewardship, and uses + concrete case studies (credit‑scoring fairness, moderation systems) plus tool discussions + (Fairlearn, interpretability packages, secure model serialization) to examine how + technical choices, metrics, and developer practices translate into real societal + outcomes. Recurring threads include tradeoffs in fairness metrics, the necessity + of domain expertise and human‑in‑the‑loop processes, the engineering challenges + of interoperable, secure ML tooling, and the role of community and practitioner + education in shaping responsible ML. + Core: The unifying idea is that building fair, trustworthy AI is a sociotechnical + engineering task: it requires not just algorithms but pragmatic, community‑driven + tools, secure software practices, clear interpretability, and organizational processes + that embed human judgment and domain knowledge so technical models produce just, + accountable outcomes in the real world.' +--- Links: * [Linkedin](https://www.linkedin.com/in/tamaraatanasoska/){:target="_blank"} diff --git a/_podcast/to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md b/_podcast/finops-for-data-engineers.md similarity index 95% rename from _podcast/to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md rename to _podcast/finops-for-data-engineers.md index 7c53361c..3cb343a4 100644 --- a/_podcast/to-update/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md +++ b/_podcast/finops-for-data-engineers.md @@ -1,6 +1,5 @@ --- -title: "Context: Eddy’s journey from industrial engineering and analyst tools to staff data engineer frames conversations about modern data stacks, digital warehouses, and FinOps as practical responses to real business problems. -Core: The episode’s unifying idea is that building impactful data systems requires translating domain and analyst expertise into operational, scalable, and cost-conscious engineering—combining the right tools (ELT, dbt, cloud platforms), disciplined practices (testing, monitoring, CI/CD), and FinOps accountability—to deliver trusted metrics, align technical work with business value, and enable continuous learning and adaptation." +title: 'FinOps for Data Engineers: Optimize Cloud Costs, BigQuery & Modern Data Stack' short: From Supply Chain Management to Digital Warehousing and FinOps season: 20 episode: 6 @@ -15,13 +14,25 @@ links: apple: https://podcasts.apple.com/us/podcast/from-supply-chain-management-to-digital-warehousing/id1541710331?i=1000702233986 spotify: https://open.spotify.com/episode/33YZpX7zE6YcBGbQK9Iclp youtube: https://www.youtube.com/watch?v=7ePp6wuxM5s - -description: Master FinOps for data engineers—optimize BigQuery costs with dbt, cloud cost modeling, tagging and forecasting to cut spend and boost pipeline efficiency -intro: How can data teams optimize cloud costs for analytics without slowing down delivery? In this episode, Eddy Zulkifly, Staff Data Engineer at Kinaxis, walks through practical FinOps strategies for data engineers working with the modern data stack. Eddy brings a decade of experience across Google Cloud, Azure, and AWS, plus prior roles at Home Depot and ongoing graduate studies at Georgia Tech, and explains how his background in supply chain and analytics shapes cost-aware engineering.

We cover building a digital data warehouse using ELT, dbt, BigQuery and orchestration; operational differences like change velocity, monitoring, and tests; and translating business needs into metric trees and data specs for FinOps. Eddy breaks down cloud cost modeling—VM sizing, storage tiers, reservation instances, and multi-cloud comparisons—alongside cost-tagging, OUCS and standardized reporting across AWS/GCP/Azure. He also shares vendor negotiation tactics, demand-forecasting analogies for capacity planning, and the strategic responsibilities of senior data engineers.

Listen to learn actionable approaches to cloud cost optimization, practical dbt and BigQuery patterns, and how to embed FinOps practices into your data platform and team workflows +description: 'Master FinOps for data engineers: optimize cloud costs with BigQuery + best practices, query tuning and governance to cut spend and boost performance.' +intro: How can data engineers bring FinOps practices into their day-to-day work to + control cloud spend across BigQuery and the modern data stack? In this episode, + Eddy Zulkifly — Staff Data Engineer at Kinaxis with a decade of experience building + data platforms on Google Cloud, Azure, and AWS — breaks down practical ways to make + cost optimization part of platform design and operations.

We explore core + topics including cloud cost optimization for data teams, BigQuery cost controls + and query efficiency, cost-aware architecture in the modern data stack, multi-cloud + considerations, and monitoring and governance for predictable spend. Eddy draws + on experience from Home Depot e-commerce and supply chain analytics, mentoring and + teaching roles, and his work on open-source data projects to translate FinOps principles + into engineering choices.

If you’re a data engineer or platform owner responsible + for budgets and performance, you’ll get actionable guidance on reducing unnecessary + cloud costs, improving visibility into usage, and designing pipelines that balance + performance with price. Listen to learn practical steps to align data engineering + practices with FinOps goals and make cloud spend more predictable. dateadded: 2025-04-30 - duration: PT00H59M54S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -119,7 +130,6 @@ quotableClips: startOffset: 3572 url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3572 endOffset: 3594 - transcript: - header: Podcast Introduction - line: Let’s get started. This week, we’ll discuss Digital Data Warehousing and FinOps. @@ -1076,8 +1086,15 @@ transcript: sec: 3594 time: '59:54' who: Eddy +context: 'Context: Eddy’s journey from industrial engineering and analyst tools to + staff data engineer frames conversations about modern data stacks, digital warehouses, + and FinOps as practical responses to real business problems. Core: The episode’s + unifying idea is that building impactful data systems requires translating domain + and analyst expertise into operational, scalable, and cost-conscious engineering—combining + the right tools (ELT, dbt, cloud platforms), disciplined practices (testing, monitoring, + CI/CD), and FinOps accountability—to deliver trusted metrics, align technical work + with business value, and enable continuous learning and adaptation.' --- - Links: * [Twitter](https://x.com/eddarief){:target="_blank"} diff --git a/_podcast/to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md b/_podcast/from-academic-research-to-data-engineering-freelancing.md similarity index 94% rename from _podcast/to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md rename to _podcast/from-academic-research-to-data-engineering-freelancing.md index 0d8bc1f8..d1227b3e 100644 --- a/_podcast/to-update/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md +++ b/_podcast/from-academic-research-to-data-engineering-freelancing.md @@ -1,7 +1,6 @@ --- -title: "Context: An electrical-engineering researcher turned founder and freelancer describes moving from simulation-driven academia into startups and consulting, recounting a pivot to synthetic medical imaging, building IoT/data prototypes, client acquisition, and practical tooling and workflows while balancing technical depth, cashflow risks, and continuous learning. - -Core: The episode’s unifying idea is a scientific, problem-first approach to data engineering and product development—validate hypotheses quickly with minimal viable (often manual) solutions, iterate fast using domain specialization and systems thinking, and pragmatically balance technical rigor with business constraints to turn research into real, sustainable products and freelance work." +title: 'From Academic Research to Lean Data Consulting: MVP Strategy, Problem-First + Thinking & Freelance Practice Building' short: From Simulation Algorithms to Production-Grade Data Systems season: 21 episode: 1 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/from-simulations-to-freelance-data-engineering-orells/id1541710331?i=1000720245457 spotify: https://open.spotify.com/episode/5HCSIO0mO8Pr5Yv9puZ72R youtube: https://www.youtube.com/watch?v=pkcpH5N-GP8 - -description: 'Learn synthetic medical imaging & data engineering: build MVPs, integrate simulation-HPC, optimize ETL, and shift to freelance with client-acquisition tactics.' -intro: 'How do you turn simulation research into usable synthetic medical imaging data for AI, build a minimal viable data pipeline, and pivot into freelance consulting? In this episode, Orell Garten — an electrical engineer trained in simulation algorithms who left a PhD during COVID and explored productization through a government-funded startup program — walks through that journey. We cover his simulation work in RF and wave propagation, the startup pivot to synthetic medical imaging data for AI, and the go-to-market lesson of problem-first versus technology-first.

Listen for practical data engineering guidance: minimal viable data work, simulation–HPC integration, secure data management, and an MVP workflow built on manual extraction, CSVs, and local analysis. Orell also discusses scientific-method product discovery, preventing overengineering with weekly feedback, and tool choices (Python, C++, DBT, Docker, DuckDB). He explains launching a freelance practice via LinkedIn, prototype delivery for IoT data engineering, client acquisition, and managing runway and cashflow. If you’re building synthetic data pipelines, medical imaging datasets, or transitioning to freelance data engineering, this episode delivers concrete tactics, risks to plan for, and hands-on techniques you can apply immediately.' +description: Learn lean data, MVP strategy, and problem-first thinking to build a + freelance consulting practice—turn research into actionable services and win clients + fast. +intro: How do you turn academic research and simulation expertise into a lean data + consulting practice without getting bogged down in perfect solutions? In this episode + we talk with Orell Garten, an electrical engineering graduate who focused on simulation + algorithms, left a PhD during COVID, and learned through a government-funded startup + program how to translate scientific research into real products.

Orell + breaks down problem-first thinking, MVP strategy for data and simulation projects, + and the practical steps involved in freelance practice building after academia. + We explore how to apply rigorous simulation methods to client problems, prioritize + minimal viable products over perfection, and navigate the transition from lab-based + research to lean data consulting.

Listeners will come away with a clearer + framework for deciding what to build first, how to validate assumptions with lightweight + experiments, and how to position technical skills for consulting engagements. This + episode is for researchers and engineers considering freelance work, consultants + refining their MVP approach, and anyone interested in applying simulation methods + and problem-first thinking to deliver practical data-driven solutions. dateadded: 2025-08-05 - duration: PT01H03M31S - quotableClips: - name: Episode Introduction & Overview startOffset: 0 @@ -112,7 +124,6 @@ quotableClips: startOffset: 3811 url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3811 endOffset: 3811 - transcript: - header: Episode Introduction & Overview - line: This week, we'll talk about many different things. We will discuss our guest’s @@ -881,8 +892,18 @@ transcript: sec: 3811 time: '1:03:31' who: Alexey ---- +context: 'Context: An electrical-engineering researcher turned founder and freelancer + describes moving from simulation-driven academia into startups and consulting, recounting + a pivot to synthetic medical imaging, building IoT/data prototypes, client acquisition, + and practical tooling and workflows while balancing technical depth, cashflow risks, + and continuous learning. + Core: The episode’s unifying idea is a scientific, problem-first approach to data + engineering and product development—validate hypotheses quickly with minimal viable + (often manual) solutions, iterate fast using domain specialization and systems thinking, + and pragmatically balance technical rigor with business constraints to turn research + into real, sustainable products and freelance work.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/ogarten/){:target="_blank"} diff --git a/_podcast/to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md similarity index 95% rename from _podcast/to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md rename to _podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md index ca8c2948..a61f7ef2 100644 --- a/_podcast/to-update/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md +++ b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md @@ -1,7 +1,6 @@ --- -title: "Context: Isabella Bicalho’s episode traces a career arc from biology to machine learning—through internships (INRIA), freelancing, open-source contributions, teaching, and community engagement—illustrating practical projects, networking, and pedagogical work as the vehicles for growth. - -Core: The unifying idea is that continuous, community‑centered, project‑based learning—combining hands‑on applied work, open‑source contribution, mentorship, clear communication, and judicious use of AI tools—serves as the most effective pathway to build job‑ready skills, bridge disciplines, and create real-world impact in data science and ML." +title: 'From Biology to ML: Build a Data Science Portfolio with Open-Source, Computer + Vision & Transformers' short: Career advice, learning, and featuring women in ML and AI season: 19 episode: 7 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/career-advice-learning-and-featuring-women-in-ml-and/id1541710331?i=1000680294201 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=rx69Xf98QZqGqgpEQgzX2w youtube: https://www.youtube.com/watch?v=GifY8Zn-pnU - -description: Build a data science portfolio with open-source computer vision projects, gain real job-ready experience, networking tactics and freelance tips -intro: 'How do you pivot from biology into machine learning and build a job-ready data science portfolio using open-source, computer vision and transformers? In this episode Isabella Bicalho — a Machine Learning Engineer and Data Scientist with three years of hands-on AI development and prior computational research — walks through her path from Biology (University of Maranhão, University of Marseille) to ML, including an INRIA internship on biomarkers and immunotherapy prediction.

We cover practical steps for portfolio building: using open-source contributions and community courses (Hugging Face) to get experience, real project examples like green space segmentation with Sentinel-2 and the trade-offs between CNNs and transformers, and applied freelance work such as recommendation systems and knowledge graph automation. Isabella also explains how statistics became her gateway to transformers, how to find low-barrier open-source projects (docs, data, applied code), and how collaboration builds soft skills recruiters value.

Listen to learn concrete strategies for creating a data science portfolio, where to find computer vision and transformer projects, how to leverage community and mentorship, and how to communicate your work to land roles in machine learning.' +description: Build a data science portfolio with open-source computer vision & transformers—gain + hands-on projects, GitHub proof, and interview-ready ML skills. +intro: How do you move from a biology background into machine learning and build a + data science portfolio that actually gets noticed? In this episode, Isabella Bicalho + — a machine learning engineer and data scientist with three years of hands‑on AI + development and roots in computational research — walks through practical approaches + for showcasing skills with open-source, computer vision, and transformer projects. +

We cover how to translate domain knowledge from biology into ML problem + framing, the role of open-source contributions in a data science portfolio, and + project ideas that demonstrate computer vision and transformer expertise. Isabella + also discusses how to document work, choose reproducible experiments, and highlight + impact for hiring managers or collaborators. She runs a newsletter dedicated to + showcasing women’s accomplishments in data science, bringing an equity-minded perspective + to building visible work.

If you’re building a data science portfolio, + shifting careers into ML, or want concrete ways to leverage open-source and modern + architectures like transformers and computer vision models, this conversation offers + practical guidance, realistic project priorities, and tips for making your work + discoverable to recruiters and the community. dateadded: 2024-12-17 - duration: PT01H03M42S - quotableClips: - name: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella Bicalho)' @@ -121,7 +133,6 @@ quotableClips: startOffset: 3822 url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3822 endOffset: 3822 - transcript: - header: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella Bicalho)' @@ -1072,8 +1083,16 @@ transcript: sec: 3822 time: '1:03:42' who: Alexey ---- +context: 'Context: Isabella Bicalho’s episode traces a career arc from biology to + machine learning—through internships (INRIA), freelancing, open-source contributions, + teaching, and community engagement—illustrating practical projects, networking, + and pedagogical work as the vehicles for growth. + Core: The unifying idea is that continuous, community‑centered, project‑based learning—combining + hands‑on applied work, open‑source contribution, mentorship, clear communication, + and judicious use of AI tools—serves as the most effective pathway to build job‑ready + skills, bridge disciplines, and create real-world impact in data science and ML.' +--- Links: * [Github](https://github.com/bellabf){:target="_blank"} diff --git a/_podcast/to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md similarity index 94% rename from _podcast/to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md rename to _podcast/from-computer-vision-research-to-autonomous-driving-ai.md index d46ea9ef..63052d89 100644 --- a/_podcast/to-update/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md +++ b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md @@ -1,7 +1,6 @@ --- -title: "Context — This episode moves from the guest’s finance-to-self-driving AI career and research in computer vision to concrete projects (AI Guide Dog, malaria mapping), deep dives on sensor and model tradeoffs (LiDAR vs cameras, on-vehicle inference, model compression), operational realities (data collection, labeling, validation pipelines, staged releases, edge cases), system-level questions (reinforcement learning vs perception, multimodal LLMs), and practical career/project advice. - -Core — Building trustworthy, real‑world AI is an engineering-driven cycle that tightly couples pragmatic sensor and model choices, efficient on‑device inference, rigorous data and validation pipelines, staged safe deployment, and ethical/social purpose: the episode’s unifying idea is that successful AI systems aren’t just about better algorithms but about integrating perception, hardware constraints, data practices, testing, and human-centered impact into a continuous, safety‑first development process that scales across domains from autonomous vehicles to assistive tech and public‑health applications." +title: Applying Computer Vision Research to Building Production-Ready AI Systems for + Real-World Deployment short: 'Lessons from Applied AI: Tesla, Waymo, and Beyond' season: 22 episode: 2 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/lessons-from-applied-ai-tesla-waymo-and-beyond/id1541710331?i=1000731200298 spotify: https://open.spotify.com/episode/0h9eX7m6H2TPqOjUwb3Jw6?si=I4rKrHXpQTmS7cJBMJbUMA youtube: https://www.youtube.com/watch?v=vK_SxyqIfwk - -description: Discover LiDAR vs camera tradeoffs and model compression for on-vehicle inference in autonomous driving - learn quantization, edge speedups, testing tips -intro: How should self-driving systems balance LiDAR, cameras and edge compute to deliver safe, real-time perception? In this episode, Aishwarya Jadhav — a machine learning engineer with a Master's from Carnegie Mellon and four years deploying multimodal LLMs, generative AI and computer vision — walks through the practical tradeoffs in autonomous driving AI. Drawing on her assistive-tech work (AI Guide Dog) and research background, she explains LiDAR vs camera principles, radar and cost constraints, and Tesla’s camera-first approach for 360° vision.

We cover on-vehicle inference limits, model compression techniques like quantization and speedups for edge inference, plus validation pipelines from simulation to closed tracks and on-road testing. You’ll also hear about sensor data management, labeling strategies, multimodal LLM challenges in autonomy, gesture recognition for traffic control, and cross-domain transfer to robotics and drones. The conversation closes with real-world complexity, testing sensitive cases, and actionable career pathways and projects.

If you want concrete guidance on sensor fusion, model compression, and deployment-ready perception systems — plus practical testing and data strategies for self-driving AI — this episode delivers grounded, technical insight +description: Master computer vision to build production-ready AI systems - learn deployment, + scaling, validation and monitoring to launch reliable real-world models. +intro: 'How do you take computer vision research out of the lab and turn it into production-ready + AI that actually works in the real world? In this episode Aishwarya Jadhav, a Machine + Learning Engineer with over four years of industry experience and a Master’s from + Carnegie Mellon University, walks through the challenges of applying computer vision + research to production systems. Her background spans multimodal LLMs, generative + AI, and computer vision, with research experience in multimodal deep learning and + text information extraction and projects including assistive technologies for the + visually impaired.

We cover the bridge between applied research and engineering: + translating prototypes into robust, deployable models, integrating multimodal pipelines, + balancing model accuracy with latency and scalability, and practical considerations + for production-ready AI and real-world deployment. Listeners will gain concrete + perspectives on how research informs product choices, what to prioritize when deploying + computer vision systems, and how multimodal approaches and generative models fit + into end-to-end solutions. This episode is useful for ML engineers, researchers, + and product teams focused on building reliable, deployable computer vision and multimodal + AI systems.' dateadded: 2025-10-21 - duration: PT00H59M01S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -136,7 +148,6 @@ quotableClips: startOffset: 3515 url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3515 endOffset: 3541 - transcript: - header: Podcast Introduction - line: Hey everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -1188,8 +1199,22 @@ transcript: sec: 3541 time: '59:01' who: Alexey ---- +context: 'Context — This episode moves from the guest’s finance-to-self-driving AI + career and research in computer vision to concrete projects (AI Guide Dog, malaria + mapping), deep dives on sensor and model tradeoffs (LiDAR vs cameras, on-vehicle + inference, model compression), operational realities (data collection, labeling, + validation pipelines, staged releases, edge cases), system-level questions (reinforcement + learning vs perception, multimodal LLMs), and practical career/project advice. + Core — Building trustworthy, real‑world AI is an engineering-driven cycle that tightly + couples pragmatic sensor and model choices, efficient on‑device inference, rigorous + data and validation pipelines, staged safe deployment, and ethical/social purpose: + the episode’s unifying idea is that successful AI systems aren’t just about better + algorithms but about integrating perception, hardware constraints, data practices, + testing, and human-centered impact into a continuous, safety‑first development process + that scales across domains from autonomous vehicles to assistive tech and public‑health + applications.' +--- Links: * [Linkedin](https://www.linkedin.com/in/aishwaryajadhav8/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md b/_podcast/from-data-freelancer-to-startup-open-source-products.md similarity index 97% rename from _podcast/to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md rename to _podcast/from-data-freelancer-to-startup-open-source-products.md index c470d923..256483b3 100644 --- a/_podcast/to-update/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md +++ b/_podcast/from-data-freelancer-to-startup-open-source-products.md @@ -1,5 +1,5 @@ --- -title: "Turning hands‑on consulting and hard‑won data engineering experience into a library‑first, open‑source company that solves a concrete pain—declarative JSON→relational transformations for Python users—by validating through workshops and docs, iterating with real user feedback, and scaling via bottom‑up adoption, ecosystem integrations, and paid complementary offerings rather than agency growth or platform lock‑in." +title: 'From Data Freelancer to Startup: Open-Source Products and Bottom-Up Adoption' short: 'The Entrepreneurship Journey: From Freelancing to Starting a Company' season: 17 episode: 1 @@ -14,9 +14,23 @@ links: apple: https://podcasts.apple.com/us/podcast/the-entrepreneurship-journey-from-freelancing-to/id1541710331?i=1000638715212 spotify: https://open.spotify.com/episode/7wBmJHSXPHoW0mEIbNDgqr?si=z7klLtveT1ioGi6bg8hR7Q youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw - -description: Discover building open-source JSON-to-Relational data pipelines in Python, practical DLT patterns, anti-pattern fixes, bootstrap tips to speed adoption -intro: 'How do you build an open-source data company that helps Python developers turn messy JSON into reliable relational tables? In this episode, Adrian Brudaru — an economics-trained, Berlin-based founder who moved from startups to freelancing and now co‑founded a data tooling company — walks through the journey of launching developer-focused open‑source software for data engineering.

We cover why dumping JSON into data warehouses is an anti‑pattern and introduce the core DLT concept: a declarative JSON→relational transformation engine aimed at Python devs. Adrian explains product iteration (engine, abstractions, user feedback), running workshops as a validation loop, treating documentation as a product asset, and practical bootstrapping strategies (savings, consulting revenue, scrappy operations). He also discusses team formation via projects, go‑to‑market tactics with a bottom‑up, library‑first approach, ecosystem partnerships (DocDB integration and joint demos), roadmap plans for a paid complement to the open‑source library, and experiments with source generation like OpenAPI generators for pipelines.

Listen if you want concrete technical and GTM guidance on building an open‑source data company, implementing declarative JSON→relational workflows for Python, and how to validate and scale developer tooling without prematurely becoming a platform.' +description: 'Discover how to build an open-source data product for Python devs: bootstrap, + ship DLT transforms, and drive bottom-up adoption to find PMF.' +intro: How do you move from freelancing to building an open‑source data company that + wins via bottom‑up adoption? In this episode Adrian Brudaru — an economics graduate + who pivoted to business analysis in Berlin, then spent years freelancing before + co‑founding a data startup — walks through that transition and the practical tradeoffs + he encountered.

We cover lessons from freelancing and agency work, why + they chose product over agency growth, and the recurring pain of stakeholder alignment + versus technical setup. Adrian explains DLT — a declarative JSON→relational transformation + for data pipelines — and why the product targets Python users as a developer‑focused + library. Hear how workshops, documentation, and live support doubled as product + validation, how scrappy bootstrapping and consulting revenue funded early payroll, + and what signals indicate product–market fit for open‑source tooling.

If + you’re building open‑source data tools, developer tooling, or plotting a bottom‑up + go‑to‑market, this episode offers concrete tactics on iteration, docs-as-product, + ecosystem partnerships, and positioning against platforms like Airbyte/Fivetran + — helping you prioritize engineering, adoption, and sustainable monetization. topics: - entrepreneurship - freelance @@ -27,9 +41,7 @@ topics: - consulting dateadded: 2023-12-18 date: 2025-11-07 - duration: PT00H59M43S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -151,7 +163,6 @@ quotableClips: startOffset: 3656 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3656 endOffset: 3583 - transcript: - header: Podcast Introduction - header: 'Episode Overview: Building an Open‑Source Data Company' @@ -1298,6 +1309,11 @@ transcript: sec: 3696 time: '1:01:36' who: Alexey +context: Turning hands‑on consulting and hard‑won data engineering experience into + a library‑first, open‑source company that solves a concrete pain—declarative JSON→relational + transformations for Python users—by validating through workshops and docs, iterating + with real user feedback, and scaling via bottom‑up adoption, ecosystem integrations, + and paid complementary offerings rather than agency growth or platform lock‑in. --- Links: diff --git a/_podcast/to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md similarity index 95% rename from _podcast/to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md rename to _podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md index 77cfbd84..4e9c9c91 100644 --- a/_podcast/to-update/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md +++ b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md @@ -1,9 +1,6 @@ --- -title: "Context: The episode traces a journey from hands-on technical beginnings (trade school, web design, C++, DevOps, automation) through burnout and volunteering, into community management, open source, NGO founding, career coaching, and a lifestyle experiment (off-grid living), while exploring personality, team fit, product focus, and practical processes applied across corporate and nonprofit settings. - -Core: This episode centers on intentionally aligning technical skills, systems-thinking problem solving, and personal values—leveraging automation, community, and experimentation—to design a sustainable, impact-driven career and life that bridges corporate, volunteer, and personal worlds. - -Key themes: transferable problem-solving and automation; values-driven career design; community & open source as leverage; translating corporate processes to NGOs; personality and team-fit for role choice; experimentation in lifestyle and governance." +title: 'From DevOps to Data Engineering: Automation, Open Source & Volunteering for + Career Transitions' short: Career choices, transitions and promotions in and out of tech season: 19 episode: 8 @@ -18,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/career-choices-transitions-and-promotions-in-and-out/id1541710331?i=1000683499310 spotify: https://open.spotify.com/episode/0UW7fLgm9fqMG64GQwvgIN?si=ZixbzDcZT2mNkVrJjZVbeA youtube: https://www.youtube.com/watch?v=QKWu5-6_6TE - -description: 'Learn DevOps-to-Data-Engineering career tactics: automation, open source & volunteering to build skills, earn rapid promotions, and lead projects.' -intro: How do you move from DevOps into data engineering while using automation, open source contributions, and volunteering to shape your career? In this episode, Agita Jaunzeme — a DevOps/DataOps engineer, community manager, educator and NGO founder focused on inclusion in Porto — walks through that exact path.

We trace her journey from trade school and early programming to configuration management and rapid promotion through scripting repetitive tasks, then into burnout, Erasmus+ volunteering, and community work at VMware. Key topics include automation case studies, building and contributing to open source (Versatile Data Kit), applying corporate processes and agile documentation to NGOs, volunteer management versus employment, spotting volunteer-to-career opportunities, and the practical differences between data scientists and data engineers. We also cover community management, career coaching, founding an NGO, meetup activities, and even an off-grid living experiment.

Listen for actionable guidance on automation best practices, how open source community work can reopen corporate doors, designing volunteer processes, and aligning technical career moves with personal values — practical takeaways for anyone navigating a career pivot into data engineering or community-driven tech work +description: 'Discover DevOps to Data Engineering strategies: open source contributions + and volunteering to build pipelines, projects and a hireable portfolio.' +intro: How do you pivot from DevOps to data engineering without starting over? In + this episode Agita Jaunzeme — a DevOps/DataOps engineer, manager, community builder + and NGO founder — breaks down practical strategies for career transitions that center + on automation, open source participation, and volunteering.

Agita draws + on experience across corporate, startup, open source and non‑governmental sectors + and shares how automation and DevOps practices translate to data engineering and + DataOps. We discuss using open source projects to build credibility, volunteering + and community work to gain hands‑on experience and networks, and concrete approaches + to getting promoted or making purposeful career pivots. Agita also talks about designing + work that aligns with passion and purpose, including founding an NGO to support + expats and locals in Porto.

Listeners will come away with actionable ideas + for bridging skill gaps, leveraging automation and open source contributions, and + using volunteering as a pathway into data engineering roles. This episode is for + DevOps professionals, aspiring data engineers, and career changers seeking pragmatic + advice on transitions, promotions, and aligning work with meaningful impact. dateadded: 2025-01-12 - duration: PT01H01M46S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -142,7 +151,6 @@ quotableClips: startOffset: 3657 url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3657 endOffset: 3706 - transcript: - header: Podcast Introduction - line: We have a special guest today, Agita. She has done a lot in her career, including @@ -1163,8 +1171,21 @@ transcript: sec: 3706 time: '1:01:46' who: Agita ---- +context: 'Context: The episode traces a journey from hands-on technical beginnings + (trade school, web design, C++, DevOps, automation) through burnout and volunteering, + into community management, open source, NGO founding, career coaching, and a lifestyle + experiment (off-grid living), while exploring personality, team fit, product focus, + and practical processes applied across corporate and nonprofit settings. + Core: This episode centers on intentionally aligning technical skills, systems-thinking + problem solving, and personal values—leveraging automation, community, and experimentation—to + design a sustainable, impact-driven career and life that bridges corporate, volunteer, + and personal worlds. + + Key themes: transferable problem-solving and automation; values-driven career design; + community & open source as leverage; translating corporate processes to NGOs; personality + and team-fit for role choice; experimentation in lifestyle and governance.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/agita/){:target="_blank"} diff --git a/_podcast/to-update/s21e07-lessons-from-two-decades-of-ai.md b/_podcast/from-game-ai-to-modern-ai-agents.md similarity index 94% rename from _podcast/to-update/s21e07-lessons-from-two-decades-of-ai.md rename to _podcast/from-game-ai-to-modern-ai-agents.md index 36c3384b..e6b05900 100644 --- a/_podcast/to-update/s21e07-lessons-from-two-decades-of-ai.md +++ b/_podcast/from-game-ai-to-modern-ai-agents.md @@ -1,7 +1,6 @@ --- -title: "Context: The episode follows a two-decade arc from game-AI research and evolutionary/RL methods through industry product leadership to present work on LLM-driven multi‑agent assistants—covering technical deep dives (prompt engineering, orchestration vs flow, sequential thinking servers, code generation, procedural content), tooling and deployment challenges (local models, model specialization, monitoring), and career/publishing lessons. - -Core narrative: The unifying idea is that practical, production‑ready AI agents are built by applying game‑AI engineering principles—minimal, modular task decomposition; evolutionary and learning‑based search; and clear orchestration patterns—to modern LLMs and multi‑agent systems, balancing creative capabilities with efficiency, tooling, and real‑world deployability." +title: 'From Game AI to LLM Agents: 20-Year Evolution of Multi-Agent Systems, Evolutionary + Algorithms & Modern AI Tooling' short: Lessons from Two Decades of AI season: 21 episode: 7 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/lessons-from-two-decades-of-ai-micheal-lanham/id1541710331?i=1000728604349 spotify: https://open.spotify.com/episode/7uhe5ZysRi07S6mb14nnox youtube: https://www.youtube.com/watch?v=DSxqUlumM3A - -description: Discover multi-agent AI, evolutionary algorithms and LLM tooling—learn agent workflows, prompt engineering, game AI examples, code patterns & career tips -intro: How do you design practical multi-agent AI assistants that scale from game AI experiments to real-world LLM tooling? In this episode, Micheal Lanham — best‑selling author and AI engineer with two decades of work across games, graphics, GIS and machine learning — traces the path from game AI and reinforcement learning to evolutionary algorithms and modern agent architectures. We explore his research on games for cognitive testing, evolutionary deep learning for hyperparameter and architecture search, and how those methods inform prompt engineering and multi-agent workflows.

Key topics include minimalist agent workflow design and task decomposition, flow versus orchestration, parallel collaboration patterns, agent tooling such as the OpenAI Agent SDK and MCP integration, sequential “thinking” servers and scratchpads, plus practical code examples from game development and GPT-5 Pro case studies. The conversation also covers generative AI in games, local and open‑source LLM trends, model specialization, and evaluation/monitoring pipelines. Whether you’re building AI assistants, experimenting with evolutionary algorithms, or integrating LLM tooling into products, this episode offers concrete techniques, tooling insights, and career guidance for AI engineers +description: Discover 20 years of Game AI, Evolutionary Algorithms, and LLM agents—practical + AI tooling, architecture tips, and faster deployment for real projects. +intro: How did techniques born in game AI become the foundation for today's LLM-driven + agents, and what practical lessons does that 20-year evolution offer to engineers + and researchers? In this episode, AI engineer and best-selling author Micheal Lanham + walks through the lineage from game AI and multi-agent systems to modern LLM agents, + evolutionary algorithms, and contemporary AI tooling.

Micheal brings hands-on + experience across games, graphics, GIS, enterprise software, and machine learning, + and is the author of Evolutionary Deep Learning, Hands-On Reinforcement Learning + for Games, and AI Agents in Action. He discusses how deep reinforcement learning, + evolutionary methods, and generative AI intersect to build intelligent systems, + and how industry practices from oil and gas to fintech shaped tooling and architectures + for multi-agent systems.

Listeners will come away with a clearer view of + the technical continuity between game AI and current agent design, practical considerations + when applying evolutionary algorithms and reinforcement learning, and what modern + AI tooling enables for deploying LLM agents. This episode is useful for AI practitioners, + game developers, and anyone interested in the evolution of multi-agent systems, + evolutionary algorithms, and agent-based AI. dateadded: 2025-10-01 - duration: PT01H48S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -140,7 +152,6 @@ quotableClips: startOffset: 3623 url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3623 endOffset: 3648 - transcript: - header: Podcast Introduction - line: Hi everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -1035,8 +1046,19 @@ transcript: sec: 3648 time: '1:00:48' who: Michael ---- +context: 'Context: The episode follows a two-decade arc from game-AI research and + evolutionary/RL methods through industry product leadership to present work on LLM-driven + multi‑agent assistants—covering technical deep dives (prompt engineering, orchestration + vs flow, sequential thinking servers, code generation, procedural content), tooling + and deployment challenges (local models, model specialization, monitoring), and + career/publishing lessons. + Core narrative: The unifying idea is that practical, production‑ready AI agents + are built by applying game‑AI engineering principles—minimal, modular task decomposition; + evolutionary and learning‑based search; and clear orchestration patterns—to modern + LLMs and multi‑agent systems, balancing creative capabilities with efficiency, tooling, + and real‑world deployability.' +--- Links: * [Linkedin](https://www.linkedin.com/in/micheal-lanham-189693123/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s19e05-large-hadron-collider-and-mentorship.md b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md similarity index 95% rename from _podcast/to-update/s19e05-large-hadron-collider-and-mentorship.md rename to _podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md index b7c29e73..e13e35d0 100644 --- a/_podcast/to-update/s19e05-large-hadron-collider-and-mentorship.md +++ b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md @@ -1,7 +1,6 @@ --- -title: "Context: A physicist’s journey from building and analyzing massive collider experiments to applying those technical, collaborative, and software-engineering skills in industry—culminating in a deliberate turn toward mentoring others through career transitions and leadership challenges. - -Core theme: Experimental physics training—rooted in tackling large-scale data, complex systems, rigorous software and teamwork practices—is a powerful, transferable foundation, and mentorship is the essential bridge that translates that expertise into effective industry roles, career progression, and leadership." +title: 'From Collider Physics to Data Science: Research Software Engineering, Interview + Prep & Mentorship' short: Large Hadron Collider and Mentorship season: 19 episode: 5 @@ -16,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/large-hadron-collider-and-mentorship-anastasia-karavdina/id1541710331?i=1000677930293 spotify: https://open.spotify.com/episode/6AZ26Q8O4VBkC9YtUNzhab?si=75154323e14d4dca youtube: https://www.youtube.com/watch?v=kV0ZDy2UtJA - -description: Discover research software engineering, data science, and mentorship strategies from a collider physicist - interview prep, CI/CD practices, and career-shift tips -intro: 'How do you pivot from collider physics to industry data science while mastering research software engineering, interview prep, and mentorship? In this episode, Anastasia Karavdina — a particle physicist turned data scientist with experience at Large Hadron Collider experiments, Blue Yonder, and Kaufland e‑commerce — walks through that transition and the concrete skills that made it possible.

We unpack collider physics basics (particle acceleration, detector imaging, event volumes, statistical analysis), roles in large research collaborations, and how hardware development and data analysis intersect. Anastasia explains how research software engineering practices — version control, CI/CD, and reproducible workflows — translate into enterprise machine learning and supply chain AI. She also covers interview challenges (position fit, evolving hiring expectations, behavioral interviews and cultural fit in Germany), how to prepare leadership stories, and practical tactics for moving into ML engineer and data science roles. Finally, she discusses mentoring: motivation, boundaries, paid vs free options, and platforms like MentorCruise.

Listen to gain actionable guidance on translating high‑energy physics expertise into data science, improving technical interview performance, and building effective mentorship relationships.' +description: Discover how collider physics skills power data science careers and research + software engineering; gain interview prep tactics, mentorship tips, and growth. +intro: How do you move from collider physics to industry data science while keeping + rigorous research software engineering practices, succeeding in interviews, and + giving or getting effective mentorship? In this episode Anastasia Karavdina — a + particle physicist turned data scientist who worked on Large Hadron Collider experiments + and later built AI solutions at Blue Yonder and Kaufland e‑commerce — walks through + that journey.

We start with collider basics (particle acceleration, detector + imaging, event volumes, and roles in large collaborations) to show the data scale + and statistical thinking that map to industry. Anastasia explains dual hardware‑and‑analysis + roles, how multivariate analysis translates to machine learning, and concrete research + software engineering practices like version control and CI/CD. She also covers interview + prep (technical fit, behavioral stories, cultural fit in Germany) and evolving hiring + expectations, plus supply chain AI use cases. Finally, she discusses mentoring — + how she started, structuring mentorship, paid vs. free options, and platforms like + MentorCruise.

Listen to learn practical steps for translating physics expertise + into data science careers, applying RSE workflows, preparing interview narratives, + and finding mentorship to accelerate your next move. dateadded: 2024-12-17 - duration: PT01H01M22S - quotableClips: - name: Episode Opening & Guest Introduction startOffset: 0 @@ -132,7 +144,6 @@ quotableClips: startOffset: 3652 url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3652 endOffset: 3682 - transcript: - header: Episode Opening & Guest Introduction - line: This week, we’ll talk about your career. Specifically, your transition from @@ -1014,8 +1025,16 @@ transcript: sec: 3682 time: '1:01:22' who: Alexey ---- +context: 'Context: A physicist’s journey from building and analyzing massive collider + experiments to applying those technical, collaborative, and software-engineering + skills in industry—culminating in a deliberate turn toward mentoring others through + career transitions and leadership challenges. + Core theme: Experimental physics training—rooted in tackling large-scale data, complex + systems, rigorous software and teamwork practices—is a powerful, transferable foundation, + and mentorship is the essential bridge that translates that expertise into effective + industry roles, career progression, and leadership.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/dr-anastasia-karavdina/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/from-marketing-to-product-owner-in-search.md b/_podcast/from-marketing-to-product-owner-in-search.md new file mode 100644 index 00000000..90c6d7ef --- /dev/null +++ b/_podcast/from-marketing-to-product-owner-in-search.md @@ -0,0 +1,7 @@ +--- +description: Learn actionable growth strategies, marketing tips, and productivity + hacks to scale faster—get frameworks, examples, and clear next steps today. +--- +Links: + +* [Post](https://www.linkedin.com/posts/leracaiman_elasticsearch-ecommerce-activity-7106615081588674560-5WQO){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s21e05-from-astronomy-to-applied-ml.md b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md similarity index 96% rename from _podcast/to-update/s21e05-from-astronomy-to-applied-ml.md rename to _podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md index f95e9852..c1e009d8 100644 --- a/_podcast/to-update/s21e05-from-astronomy-to-applied-ml.md +++ b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md @@ -1,5 +1,6 @@ --- -title: "Modern astrophysical discovery—illustrated by the challenge of finding rare radio-emitting stars—depends on the seamless integration of domain knowledge, careful physics-informed data curation, and production-grade, scalable data/ML workflows: from telescope instrumentation and multi-wavelength cross-matching through positional-uncertainty analysis, to cloud-native pipelines, reproducible tooling, and deployment. The episode’s through-line is that building curated, interpretable datasets and end-to-end infrastructure (not just models) is the essential bridge that turns complex observational data into reliable science, practical education, and transferable career skills." +title: 'From Radio Astronomy to Applied ML: MEERKAT Data Pipelines, Multi-Wavelength + Cross-Matching & Production-Grade ML Systems' short: From Astronomy to Applied ML season: 21 episode: 5 @@ -14,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/from-astronomy-to-applied-ml-daniel-egbo/id1541710331?i=1000728601772 spotify: https://open.spotify.com/episode/0hV7d1zSKO7ykGDZxjXyJ8 youtube: https://www.youtube.com/watch?v=b92gwrsVQtg - -description: Discover MEERKAT radio-emitting stars using ML & cloud pipelines — learn Astropy tools, catalog cross-matching, and production deployment at scale -intro: 'How do you find rare radio-emitting stars in massive MEERKAT datasets—and turn that search into reliable machine learning and cloud data pipelines? In this episode Daniel Egbo, an astrophysicist turned ML engineer and PhD candidate at the University of Cape Town, walks through the practical intersection of astronomy, ML, and cloud engineering. We cover MEERKAT and SKA context, the electromagnetic spectrum, and the core research goal: detecting point sources in radio images and confirming them via multi-wavelength cross-matching and physics-based verification. Daniel explains positional uncertainty, foreground/background confusion, and why curated datasets are essential for future ML. He also shares tooling and infrastructure practices—Astropy, NumPy/SciPy, JupyterHub, cloud compute, orchestration with Airflow/Kestra, MinIO and Spark—and outlines an end-to-end pipeline pattern (MySQL → MinIO → Spark → warehouse). Listeners will come away with concrete methods for building reproducible astronomical data workflows, practical machine learning readiness steps, and resources for learning and deployment (edge testing, LLMs, and community courses) to apply to radio telescope and astronomical data projects.' +description: 'Discover MEERKAT radio astronomy pipelines and machine learning: build + production ML, master multi-wavelength cross-match, accelerate discovery.' +intro: How do you transform raw radio astronomy observations into reliable, production-grade + machine learning systems that enable multi-wavelength science? In this episode we + talk with Daniel Egbo — an astrophysicist turned machine learning engineer and AI + ambassador (Arize, Tavily) and PhD candidate at the University of Cape Town — about + bridging radio astronomy and applied ML. Daniel explains the challenges of working + with MEERKAT data pipelines, strategies for multi-wavelength cross-matching, and + the engineering practices needed to take models from research to production.

+ You’ll hear about end-to-end ML and LLM applications with an emphasis on reliability, + practical evaluation, and knowledge-retrieval assistants, plus how data science + techniques apply to astronomy workflows. Whether you’re building pipelines for radio + telescopes, tackling cross-matching across optical and radio catalogs, or aiming + to deploy robust production-grade ML systems, this episode offers concrete perspectives + on data handling, evaluation, and operationalizing models in scientific contexts. + Listen to gain actionable insights for integrating astrophysical datasets with modern + ML tooling and improving model reliability in real-world deployments. dateadded: 2025-09-30 - duration: PT01H04M35S - quotableClips: - name: Podcast Introduction & Lunar Eclipse Anecdote startOffset: 0 @@ -142,7 +155,6 @@ quotableClips: startOffset: 3742 url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3742 endOffset: 3875 - transcript: - header: Podcast Introduction & Lunar Eclipse Anecdote - line: Hi everyone, welcome to our event. This event is brought to you by Data Talks @@ -1282,8 +1294,15 @@ transcript: sec: 3875 time: '1:04:35' who: Alexey +context: 'Modern astrophysical discovery—illustrated by the challenge of finding rare + radio-emitting stars—depends on the seamless integration of domain knowledge, careful + physics-informed data curation, and production-grade, scalable data/ML workflows: + from telescope instrumentation and multi-wavelength cross-matching through positional-uncertainty + analysis, to cloud-native pipelines, reproducible tooling, and deployment. The episode’s + through-line is that building curated, interpretable datasets and end-to-end infrastructure + (not just models) is the essential bridge that turns complex observational data + into reliable science, practical education, and transferable career skills.' --- - Links: * [Linkedin](https://www.linkedin.com/in/egbodaniel/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md b/_podcast/from-semiconductor-data-to-applied-machine-learning.md similarity index 95% rename from _podcast/to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md rename to _podcast/from-semiconductor-data-to-applied-machine-learning.md index c3dd2044..b5e2d919 100644 --- a/_podcast/to-update/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md +++ b/_podcast/from-semiconductor-data-to-applied-machine-learning.md @@ -1,5 +1,6 @@ --- -title: "A single through-line: the episode is about a hands‑on, end‑to‑end journey into applied machine learning — a multidisciplinary career pivot powered by self‑education and cohort/community support that takes messy, high‑frequency industrial data through pragmatic tool‑building, model development, explainability tradeoffs, and MLOps (APIs, containers, Terraform, ONNX) into real production impact, with a commitment to teaching and scaling that practice to others." +title: 'From Classical Guitar to Production ML: Nonlinear Career Path Through Semiconductors, + Yield Analytics & Community-Driven Learning' short: 'From Semiconductors to Machine Learning: A Career in Data and Teaching' season: 21 episode: 8 @@ -14,15 +15,29 @@ links: apple: https://podcasts.apple.com/us/podcast/from-semiconductors-to-machine-learning-a-career-in/id1541710331?i=1000731197034 spotify: https://open.spotify.com/episode/1znRtNRf5IUYcBblJYH53r youtube: https://www.youtube.com/watch?v=B2tzuUg5uZs - -description: 'Learn predictive maintenance & yield analytics for semiconductors: deploy ML with Flask, Docker & MLOps to boost yield, enable explainability, and ship APIs.' -intro: 'How do you move machine learning for predictive maintenance and yield analytics out of a notebook and into production on the fab floor? In this episode, Dashel Ruiz Perez—data analyst, ML engineer, and educator who spent nearly a decade at Microchip Technology—walks through practical steps for deploying ML to improve semiconductor yield. Drawing on millisecond tool logs, process telemetry, and a “Wafers at Risk” predictive model, Dashel explains how to build explainable yield analytics, iterate with Kaggle-style EDA and feature engineering, and ensure model portability with ONNX.

Listen for concrete implementation details: turning models into Flask REST APIs, containerizing with Docker, using Google Cloud and Terraform for infrastructure automation, and MLOps best practices for production monitoring. Dashel also covers hands-on learning paths from ML Zoomcamp—course deliverables beyond Jupyter notebooks, common roadblocks (Mac M1 issues, wide categorical data), and examples like a COVID comorbidity API demo and a TensorFlow computer vision project. If you’re responsible for semiconductor predictive maintenance, yield analytics, or ML deployment, this episode gives actionable guidance on tools, workflows, and learning strategies to get models reliably running in production.' +description: Discover a nonlinear path from classical guitar to production ML, semiconductors + & yield analytics. Learn actionable career tactics and community-driven learning. +intro: How do you move from playing classical guitar to applying machine learning + in semiconductor yield analytics? In this episode Dashel Ruiz Perez — a data analyst, + ML engineer, and educator — walks us through a nonlinear career path that spans + nearly a decade at Microchip Technology and now teaching programming and data skills + through ThriveDX. With roles across production, process, yield, and software engineering, + Dashel explains how hands-on production experience informs production analytics + and ML engineering work in semiconductor manufacturing.

We cover practical + topics including translating manufacturing problems into data science projects, + building models for yield optimization, and the role of software engineering in + production analytics. Dashel also discusses learning pathways — from degrees in + computer science and data analytics at Western Governors University to graduating + from ML Zoomcamp — and how community-driven learning accelerates skill acquisition. +

Listeners will gain actionable guidance on career transition strategies, + concrete examples of applying machine learning and data analytics in semiconductor + contexts, and resources for growing technical skills through community and formal + training. This episode is useful for engineers, data analysts, and anyone considering + a switch into ML, AI, or semiconductor yield analytics. dateadded: 2025-10-21 - duration: PT01H13M08S - quotableClips: -- name: Podcast Introduction & Data Docs Club +- name: Podcast Introduction & DataTalksClub startOffset: 0 url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=0 endOffset: 111 @@ -134,9 +149,8 @@ quotableClips: startOffset: 4369 url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4369 endOffset: 4388 - transcript: -- header: Podcast Introduction & Data Docs Club +- header: Podcast Introduction & DataTalksClub - line: Hi everyone, welcome to our event. This event is brought to you by the Data Docs Club, a community of people who love data. We have weekly events and today is one of them. If you want to find out more about our events, there is a link @@ -1146,8 +1160,13 @@ transcript: sec: 4388 time: '1:13:08' who: Dashel +context: 'A single through-line: the episode is about a hands‑on, end‑to‑end journey + into applied machine learning — a multidisciplinary career pivot powered by self‑education + and cohort/community support that takes messy, high‑frequency industrial data through + pragmatic tool‑building, model development, explainability tradeoffs, and MLOps + (APIs, containers, Terraform, ONNX) into real production impact, with a commitment + to teaching and scaling that practice to others.' --- - Links: * [Linkedin](https://www.linkedin.com/in/dashel-ruiz-perez-2b036172/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md similarity index 96% rename from _podcast/to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md rename to _podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md index abfab51d..49e7a789 100644 --- a/_podcast/to-update/s16e06-unwritten-rules-for-success-in-machine-learning.md +++ b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md @@ -1,7 +1,6 @@ --- -title: "Context — A career arc from software engineer to VP of ML frames concrete stories about promotion, informal leadership, stakeholder selling, demo-driven buy‑in, rapid prototyping, baseline-first experiments, domain immersion, and building full‑stack production capabilities. - -Core narrative — Success in applied machine learning is not primarily about squeezing marginal accuracy from models but about bridging technical craft and business impact: become a product‑focused, full‑stack practitioner and leader who rapidly validates hypotheses with simple baselines and demos, speaks the language of stakeholders, builds trust and reputation, communicates trade‑offs clearly, and embeds ML into real user workflows so technical work directly drives measurable outcomes." +title: 'From Software Engineer to VP of Machine Learning: Stakeholder Buy-In, Rapid + POCs and Full-Stack Skills' short: The Unwritten Rules for Success in Machine Learning season: 16 episode: 6 @@ -16,13 +15,24 @@ links: apple: https://podcasts.apple.com/us/podcast/the-unwritten-rules-for-success-in-machine-learning/id1541710331?i=1000635206953 spotify: https://open.spotify.com/episode/2c8E0hZ02osih7ljEB6I6f?si=lSPp07r4TgmpGQey0cUjsA youtube: https://www.youtube.com/watch?v=su2M058m3Lw - -description: 'Discover how to lead and ship actionable ML products: master stakeholder communication, rapid POCs, demo design, and full‑stack ML to deliver business impact.' -intro: 'How do you move from software engineer to VP of Machine Learning while learning to lead, sell, and ship ML products that actually change outcomes? In this episode Jack Blandin—now VP of Data Science & Machine Learning at Fi, who transitioned from full‑stack engineering to data science and has managed teams of 2–15—walks through that exact journey.

We cover Jack’s career pivot and early leadership lessons, practical approaches to problem framing and reputation management, and how to speak the language of stakeholders (CAC, KPIs) to win buy‑in for ML projects. You’ll hear concrete tactics for selling ML: fast POCs and user‑centric demos (Gradio, Streamlit), starting with baseline heuristics and manual processes, running quick hypothesis validation experiments, and communicating model trade‑offs without obsessing over raw accuracy. Jack also explains the importance of domain immersion, full‑stack engineering for production ML, and prioritizing actionability over accuracy—illustrated by a churn model lesson.

If you lead or ship ML products, this episode delivers actionable guidance on machine learning leadership, rapid prototyping, demo design, and stakeholder communication to move models from prototype to product.' +description: Discover how to win stakeholder buy-in, build rapid POCs and scale machine + learning with full-stack skills—accelerate to VP-level impact and leadership. +intro: How do you move from a hands-on software engineer to a VP of Machine Learning + while getting stakeholders to say “yes,” delivering rapid POCs, and building the + full-stack skills teams need? In this episode Jack Blandin walks through that transition. + Jack began as a Software Engineer in 2015, shifted into Data Science and Machine + Learning in 2017, and has held ML and leadership roles at Fi, Wayfair, Trunk Club, + and GoHealth—managing teams of 2 to 15. He’s now VP of Data Science & Machine Learning + at Fi, finishing a PhD focused on ML, reinforcement learning, and algorithmic fairness, + and launching a hiring marketplace for data and ML professionals.

We dig + into practical strategies for stakeholder buy-in, how to scope and run rapid proofs + of concept that prove value, and which full-stack skills accelerate career growth + in ML and data science leadership. Listeners will come away with actionable approaches + to design fast, business-focused POCs, communicate technical tradeoffs to non-technical + stakeholders, and level up skill sets that bridge engineering and product — useful + for anyone aiming to scale into ML management or improve ML engineering outcomes. dateadded: 2023-11-20 - duration: PT00H53M23S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -120,7 +130,6 @@ quotableClips: startOffset: 3182 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3182 endOffset: 3203 - transcript: - header: Podcast Introduction - header: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' @@ -1241,8 +1250,18 @@ transcript: sec: 3216 time: '53:36' who: Jack ---- +context: 'Context — A career arc from software engineer to VP of ML frames concrete + stories about promotion, informal leadership, stakeholder selling, demo-driven buy‑in, + rapid prototyping, baseline-first experiments, domain immersion, and building full‑stack + production capabilities. + Core narrative — Success in applied machine learning is not primarily about squeezing + marginal accuracy from models but about bridging technical craft and business impact: + become a product‑focused, full‑stack practitioner and leader who rapidly validates + hypotheses with simple baselines and demos, speaks the language of stakeholders, + builds trust and reputation, communicates trade‑offs clearly, and embeds ML into + real user workflows so technical work directly drives measurable outcomes.' +--- Links: * [Jack's LinkedIn profile](https://www.linkedin.com/in/jackblandin/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md b/_podcast/generative-ai-chatbots-in-production-security.md similarity index 94% rename from _podcast/to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md rename to _podcast/generative-ai-chatbots-in-production-security.md index 9bbb9b89..0908fdb8 100644 --- a/_podcast/to-update/s19e06-ai-in-industry-trust-return-on-investment-and-future.md +++ b/_podcast/generative-ai-chatbots-in-production-security.md @@ -1,7 +1,6 @@ --- -title: "Context: Across a career-spanning conversation about linguistics, industry AI roles, chatbot hacks, safety failures, mitigations, human-in-the-loop workflows, translation and ancient-language challenges, and industry trade-offs, the episode maps how generative AI is rapidly democratized yet brittle, risky, and dependent on data and linguistic nuance. - -Core: The central imperative is that realizing the real-world promise of generative AI requires marrying deep linguistic and domain expertise with layered technical defenses, human oversight, and pragmatic product trade-offs—so systems can be safe, trustworthy, and useful despite hallucinations, manipulation, data-quality limits, and operational constraints." +title: 'Hardening Generative AI Chatbots: Prevent Prompt Injection, Data Exfiltration + & Hallucinations' short: 'AI in Industry: Trust, Return on Investment and Future' season: 19 episode: 6 @@ -16,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/ai-in-industry-trust-return-on-investment-and-future/id1541710331?i=1000679505962 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=a99463e34ffb48f1 youtube: https://www.youtube.com/watch?v=bT7-HRNCltk - -description: Discover generative AI, chatbot safety, and prompting strategies to prevent hallucinations and data exfiltration, boost translation quality and ROI -intro: How do we balance the rapid democratization of generative AI with real-world chatbot safety, trust, and operational value? In this episode, we speak with a linguist-turned-computational-linguist who now serves as a principal key expert in AI advising on technology and risk. We trace their career path into industry and then dig into the practical security and reliability challenges of large-scale chatbots.

Topics include the rise of prompt engineering and new “AI experts,” a large-scale chatbot hacking exercise and its findings on hallucinations, legal exposure, and financial incidents, and data exfiltration methods like overloaded prompts and knowledge-base retrieval. The conversation moves to concrete mitigations—output validation, query analysis, layered defenses, and non-LLM classifiers—plus usability and ROI issues that slow adoption. We also cover human-in-the-loop review, AI-assisted translation workflows, prompt customization for controlled machine translation, and broader multilingual and historical-linguistics challenges (from orthography to low-resource languages).

Listen to learn actionable strategies for chatbot safety, practical prompt and translation techniques, and how to evaluate trade-offs between research innovation and operational risk +description: Learn to harden generative AI chatbots against prompt injection and data + exfiltration—defenses, detection, and techniques to reduce hallucinations. +intro: How do you harden generative AI chatbots against prompt injection, data exfiltration, + and dangerous hallucinations? In this episode Maria Sukhareva — a principal key + expert in AI at Siemens with 15+ years working at the intersection of linguistics + and computational AI — walks through real-world risks, attack findings, and practical + defenses for chatbot security.

We trace Maria’s path from linguist to industry + expert and her role advising on technology and risk, then dive into a large-scale + chatbot hacking exercise and the common failures that lead to legal exposure and + financial incidents. Key topics include prompt injection and knowledge-base exfiltration + techniques, hallucination causes and their impact on trust, and mitigations such + as output validation, query analysis, layered defenses, and the use of non-LLM classifiers. + We also cover human-in-the-loop workflows, AI-as-assistant moderation tools, and + prompt customization for controlled machine translation.

If you’re building + or deploying generative AI systems, this episode offers practical, production-focused + guidance on chatbot security, AI safety, and improving accuracy and trust in deployed + models. dateadded: 2024-12-17 - duration: PT00H59M53S - quotableClips: - name: Episode Introduction & Guest Overview startOffset: 0 @@ -113,7 +124,6 @@ quotableClips: startOffset: 3554 url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3554 endOffset: 3593 - transcript: - header: Episode Introduction & Guest Overview - line: This week, we’re discussing the practical application of generative AI in @@ -892,4 +902,15 @@ transcript: sec: 3593 time: '59:53' who: Alexey +context: 'Context: Across a career-spanning conversation about linguistics, industry + AI roles, chatbot hacks, safety failures, mitigations, human-in-the-loop workflows, + translation and ancient-language challenges, and industry trade-offs, the episode + maps how generative AI is rapidly democratized yet brittle, risky, and dependent + on data and linguistic nuance. + + Core: The central imperative is that realizing the real-world promise of generative + AI requires marrying deep linguistic and domain expertise with layered technical + defenses, human oversight, and pragmatic product trade-offs—so systems can be safe, + trustworthy, and useful despite hallucinations, manipulation, data-quality limits, + and operational constraints.' --- diff --git a/_podcast/to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md b/_podcast/human-centered-ai-automatic-speech-recognition.md similarity index 93% rename from _podcast/to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md rename to _podcast/human-centered-ai-automatic-speech-recognition.md index 24e6a2e1..3d147743 100644 --- a/_podcast/to-update/s19e02-human-centered-ai-for-disordered-speech-recognition.md +++ b/_podcast/human-centered-ai-automatic-speech-recognition.md @@ -1,7 +1,5 @@ --- -title: "Context: The episode surveys how linguistics and computational methods intersect to address limitations of mainstream ASR for people with disordered, accented, or atypical speech — covering phonetics and morpho‑syntax foundations, distinctions between accent and disorder, modern ASR advances and failure modes, data collection and GDPR constraints, targeted datasets and augmentation, multimodal and transfer approaches, personalization and on‑device deployment, and the ethical/assistive implications. - -Core: Build ASR systems that are human‑centered and linguistically informed—prioritizing inclusive data practices, phonetics‑aware modeling, adaptive techniques (augmentation, transfer learning, multimodal cues, personalization), and ethical deployment—so speech technology recognizes and respects the communicative diversity and needs of people with disordered or atypical speech." +title: 'Human-Centered Speech Recognition: ASR for Disordered Speech and Accents' short: Human-Centered AI for Disordered Speech Recognition season: 19 episode: 2 @@ -16,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/human-centered-ai-for-disordered-speech-recognition/id1541710331?i=1000671805368 spotify: https://open.spotify.com/show/0pck8zuiXdI0OrCg86DAPy?si=ac857db69d484277 youtube: https://www.youtube.com/watch?v=yTZ4cddD7DU - -description: 'Learn ASR strategies for disordered speech: data, multimodal cues and personalization to build robust assistive voice systems and on-device speech tools.' -intro: 'How can automatic speech recognition (ASR) systems reliably understand disordered and atypical speech without compromising user identity or privacy? In this episode Katarzyna Foremniak, a computational linguist with 10+ years in NLP who developed language models for Audi and Porsche and teaches at the University of Warsaw, tackles that question through a human‑centered lens.

We explore core phonetics and morpho‑syntax concepts that matter for disordered speech, distinctions between accents and disorders, and practical limits of modern models (e.g., Whisper) when faced with atypical articulation, stammering, and voice quality variation. Katarzyna walks through data‑driven strategies: specialized datasets, data augmentation, transfer learning and fine‑tuning with limited data, plus multimodal ASR approaches that integrate lip‑reading and visual cues. The conversation also covers data collection challenges (GDPR, clinical data, language and dialect coverage), personalization and on‑device adaptation, and assistive and automotive use cases with deployment constraints.

If you work on speech recognition, accessibility, or multilingual NLP, this episode offers concrete technical strategies and ethical considerations for building personalized, multimodal ASR systems that better serve people with speech disorders.' +description: Discover ASR solutions for disordered speech and accents—boost recognition + accuracy, reduce bias, and design accessible human-centered models now. +intro: How can automatic speech recognition (ASR) better serve people with disordered + speech and diverse accents? In this episode Katarzyna Foremniak, a computational + linguist with over 10 years in NLP who has built language models for Audi and Porsche + and teaches at the University of Warsaw, examines human‑centered ASR for atypical + and accented speech. We trace her move from linguistics to computational approaches + and cover core phonetics and morpho‑syntax concepts that matter for speech recognition. +

Key topics include distinctions between accents and speech disorders, limitations + of standard ASR datasets, strategies for disordered speech recognition such as specialized + datasets, data augmentation and synthetic variations, multimodal ASR with lip‑reading, + and transfer learning for fine‑tuning with limited data. We also discuss data collection + challenges (GDPR, clinical data), bilingualism effects, stammering and fluency, + pronunciation issues like Polish consonant clusters, and practical workflows including + Amazon Transcribe plus LLM post‑processing. Deployment tradeoffs—model size, on‑device + setups, automotive voice use cases—and assistive applications round out the conversation. +

Listeners interested in speech recognition, disordered speech, accents, + and ethical data practices will gain practical technical strategies and a clearer + view of research and deployment priorities. dateadded: 2024-10-10 - duration: PT00H57M19S - quotableClips: - name: 'Episode Introduction: Human‑Centered AI for Disordered Speech' startOffset: 0 @@ -136,7 +148,6 @@ quotableClips: startOffset: 3925 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3925 endOffset: 3439 - transcript: - header: 'Episode Introduction: Human‑Centered AI for Disordered Speech' - header: Guest Introduction & Career Highlights (Katarzyna Foremniak) @@ -913,8 +924,20 @@ transcript: sec: 3925 time: '1:05:25' who: Katarzyna ---- +context: 'Context: The episode surveys how linguistics and computational methods intersect + to address limitations of mainstream ASR for people with disordered, accented, or + atypical speech — covering phonetics and morpho‑syntax foundations, distinctions + between accent and disorder, modern ASR advances and failure modes, data collection + and GDPR constraints, targeted datasets and augmentation, multimodal and transfer + approaches, personalization and on‑device deployment, and the ethical/assistive + implications. + Core: Build ASR systems that are human‑centered and linguistically informed—prioritizing + inclusive data practices, phonetics‑aware modeling, adaptive techniques (augmentation, + transfer learning, multimodal cues, personalization), and ethical deployment—so + speech technology recognizes and respects the communicative diversity and needs + of people with disordered or atypical speech.' +--- Links: * [Eleven elevator](https://www.youtube.com/live/NMS2VnDveP8){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s16e07-cracking-code-machine-learning-made-understandable.md b/_podcast/interpretable-machine-learning.md similarity index 96% rename from _podcast/to-update/s16e07-cracking-code-machine-learning-made-understandable.md rename to _podcast/interpretable-machine-learning.md index 62e85035..06577acf 100644 --- a/_podcast/to-update/s16e07-cracking-code-machine-learning-made-understandable.md +++ b/_podcast/interpretable-machine-learning.md @@ -1,8 +1,5 @@ --- -title: "Context: Christoph Molnar’s journey from statistician and Kaggle competitor to full‑time technical author frames a consistent practice: hands‑on modeling, careful documentation, and public, iterative teaching about interpretable machine learning techniques (SHAP, conformal prediction, etc.), plus the practical mechanics of publishing and staying current. - -Core narrative: At the episode’s center is the idea that trustworthy, useful machine learning emerges not from opaque accuracy chasing but from a disciplined loop of hands‑on experimentation, clear interpretation, and open communication — using interpretable methods and calibrated uncertainty to debug and understand models, keeping meticulous logs and competitions to stay sharp, and publishing incrementally (with feedback and transparency) to teach others while refining your own understanding. This unified through‑line ties together the technical tools, the writing and publishing choices, and the everyday workflows that make complex ML accessible, reproducible, and actionable." -short: 'Cracking the Code: Machine Learning Made Understandable' +title: 'Interpretable Machine Learning: SHAP, Conformal Prediction and Model Trust' season: 16 episode: 7 guests: @@ -16,13 +13,26 @@ links: apple: https://podcasts.apple.com/us/podcast/cracking-the-code-machine-learning-made/id1541710331?i=1000636448000 spotify: https://open.spotify.com/episode/3SjDB0E2of9IS9TXn2Fof3?si=FwWH99FGTgmL1OGI3-sLAg youtube: https://www.youtube.com/watch?v=LBuGzyOkx7c - -description: Discover Interpretable ML, SHAP and Conformal Prediction with Python examples and self-publishing tips, debug models, calibrate uncertainty, and publish -intro: How can we make machine learning interpretable in practice — and how do you turn that expertise into clear, usable technical writing? In this episode, Christoph Molnar, statistician, machine learner, and author of Interpretable ML, walks through the tools and workflows he uses to answer that question.

Christoph traces his path from statistics and Kaggle competitions to becoming a full‑time technical writer, and drills into core topics like SHAP for debugging models, conformal prediction for calibrated uncertainty and prediction sets, and practical Python examples. We also cover interpretability vs. accuracy, terminology around explainable AI, and keeping skills sharp through competitions and an Obsidian logbook.

On the writing side, Christoph explains his chapter‑by‑chapter “publishing in public” workflow, self‑publishing choices (Leanpub, Amazon KDP, print‑on‑demand), feedback strategies with beta readers, and advice for aspiring technical writers. Listen for actionable guidance on applying interpretable machine learning techniques and concrete steps for turning technical work into publishable, useful content +description: 'Discover interpretable machine learning: learn SHAP, Conformal Prediction, + calibrated uncertainty and model trust to debug models and boost reliability.' +intro: How can you reliably trust a machine learning model’s predictions in real-world + settings? In this episode Christoph Molnar — statistician, machine learner, and + author of Interpretable Machine Learning — walks through practical approaches for + building model trust. Drawing on his experience from Kaggle competitions to authoring + a technical book, Christoph explains the trade-offs between interpretability and + accuracy and shows how interpretability techniques help debug models.

Key + topics include a SHAP deep dive with practical Python examples for attributing predictions, + conformal prediction for calibrated uncertainty and creating prediction sets, and + the difference between explainable AI and interpretable machine learning. He also + discusses using interpretability to debug models, maintain hands‑on skills through + competitions, and document experiments for reproducible insights.

If you + want concrete tools to evaluate model trust—how to quantify uncertainty, interpret + feature effects with SHAP, and produce reliable prediction sets with conformal methods—this + episode offers clear, actionable guidance and directions for further reading. Ideal + for data scientists and ML practitioners focused on interpretable machine learning, + model debugging, and trustworthy AI. dateadded: 2023-11-27 - duration: PT00H56M20S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -125,7 +135,6 @@ quotableClips: startOffset: 3413 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3413 endOffset: 3380 - transcript: - header: Podcast Introduction - header: 'Guest Intro: Christoph Molnar, Interpretable ML Author' @@ -1317,8 +1326,21 @@ transcript: sec: 3422 time: '57:02' who: Alexey ---- +context: 'Context: Christoph Molnar’s journey from statistician and Kaggle competitor + to full‑time technical author frames a consistent practice: hands‑on modeling, careful + documentation, and public, iterative teaching about interpretable machine learning + techniques (SHAP, conformal prediction, etc.), plus the practical mechanics of publishing + and staying current. + Core narrative: At the episode’s center is the idea that trustworthy, useful machine + learning emerges not from opaque accuracy chasing but from a disciplined loop of + hands‑on experimentation, clear interpretation, and open communication — using interpretable + methods and calibrated uncertainty to debug and understand models, keeping meticulous + logs and competitions to stay sharp, and publishing incrementally (with feedback + and transparency) to teach others while refining your own understanding. This unified + through‑line ties together the technical tools, the writing and publishing choices, + and the everyday workflows that make complex ML accessible, reproducible, and actionable.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/christoph-molnar/){:target="_blank"} diff --git a/_podcast/to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md similarity index 96% rename from _podcast/to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md rename to _podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md index ceb56df2..7037e5de 100644 --- a/_podcast/to-update/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md +++ b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md @@ -1,7 +1,5 @@ --- -title: "Context: A coach-led roadmap for technical career changers (often returning parents) that covers defining an ideal role, choosing a specialization, validating skills through projects, targeting companies, crafting resumes/stories, and running consistent, relationship-driven outreach. - -Core theme: Intentionally design a focused, market-aligned career identity and then convert it into tangible evidence and relationships—using targeted projects, tailored applications, informational interviews, and a weekly, measurable outreach plan—to turn validated skills and clear storytelling into job offers." +title: 'Tech Job Search Strategy: Portfolio Projects, Resume Tips and Networking' short: Accelerating The Job Hunt for The Perfect Job in Tech season: 17 episode: 6 @@ -16,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/accelerating-the-job-hunt-for-the-perfect-job-in/id1541710331?i=1000643971899 spotify: https://open.spotify.com/episode/7giHGC86pjtIYrLOvwP7g4?si=NB9w6S6QTfCBHB_n93LkBQ youtube: https://www.youtube.com/watch?v=PchwbIs0tOg - -description: 'Master data science career change with a 4-pillar job search: informational interviews, resume strategy & specialization tips to land your role.' -intro: 'Facing a career change into data science but unsure how to structure your job search, networking, and informational interviews? In this episode, Sarah Mestiri — data scientist and certified career & interview coach with 6+ years in tech (international companies, FIS, startups) — breaks down a practical Four‑Pillar Job Search Framework: goals, networking, CV, and strategy. Sarah draws on her transition from full‑stack engineering to data science and her work supporting women returning to work to show how to define your ideal role, choose a specialization (ML engineering, data engineering, MLOps), and validate skills through projects versus courses.

You’ll hear step‑by‑step guidance on job research and informational interviews: outreach messaging, key questions to ask, and how to build mutual value and referrals. The episode also covers resume strategy, weekly networking action plans, target company selection, part‑time transition tactics, and assessment tools to align strengths and interests. Listen to gain an actionable job search framework, templates for outreach and interviews, and resources to accelerate a successful data science career change.' +description: 'Learn a four-pillar tech job search: build portfolio projects, sharpen + your resume and network strategically to land ML/data roles faster with outreach + tactics.' +intro: 'How do you turn portfolio projects, a sharper resume, and targeted networking + into a successful tech job search? In this episode Sarah Mestiri — Data Scientist + and Certified Career & Interview Coach with 6+ years in tech across startups, international + firms and financial services (FIS) — walks through a practical job search strategy + for career changers and return-to-work professionals. Sarah outlines a four-pillar + framework (goals, networking, CV, strategy) and shows how to define your ideal role, + choose a specialization (ML engineering, data engineering, MLOps), and validate + skills through projects versus courses. You’ll hear step-by-step advice on building + a top-5 target company list, crafting personalized outreach and informational interview + questions, and creating a weekly networking action plan that leverages weak ties + and referrals. The episode also covers resume tactics — prioritizing projects, skills, + and storytelling — self-research methods, assessments, part-time strategies, and + age or career-change considerations. Listen for actionable takeaways: how to build + portfolio projects that prove impact, write concise outreach messages, and structure + a job search you can maintain — plus recommended resources and follow-up support + (links and Slack) to help you execute.' dateadded: 2024-02-03 - duration: PT01H26S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -132,7 +144,6 @@ quotableClips: startOffset: 3722 url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3722 endOffset: 3626 - transcript: - header: Podcast Introduction - header: Guest Introduction & Coaching Mission @@ -1155,8 +1166,16 @@ transcript: sec: 3797 time: '1:03:17' who: Sarah ---- +context: 'Context: A coach-led roadmap for technical career changers (often returning + parents) that covers defining an ideal role, choosing a specialization, validating + skills through projects, targeting companies, crafting resumes/stories, and running + consistent, relationship-driven outreach. + Core theme: Intentionally design a focused, market-aligned career identity and then + convert it into tangible evidence and relationships—using targeted projects, tailored + applications, informational interviews, and a weekly, measurable outreach plan—to + turn validated skills and clear storytelling into job offers.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/sarahmestiri/){:target="_blank"} diff --git a/_podcast/to-update/s20e02-competitive-machine-learning-and-teaching.md b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md similarity index 88% rename from _podcast/to-update/s20e02-competitive-machine-learning-and-teaching.md rename to _podcast/kaggle-grandmaster-to-production-ml-and-education.md index 6d511d38..43f52f89 100644 --- a/_podcast/to-update/s20e02-competitive-machine-learning-and-teaching.md +++ b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md @@ -1,15 +1,6 @@ --- -title: "Context: A Kaggle Grandmaster recounts a career arc from competitive modeling and open-source tooling (MLEM) through industry roles, curriculum design, large-scale online teaching, and mentoring. Episodes segments cover how competitions teach iterative problem-solving, validation, infrastructure and teamwork; how those skills map (and sometimes must be adapted) to production ML and MLOps; how to design practical coursework and assessments; and how to show business value and respond to new tools like AutoML and generative AI. - -Core through-line (single high-level theme): Hands-on, competition-driven practice—grounded in iteration, rigorous validation, tooling, and community—is the crucible that converts data-science craft into production-ready systems, scalable education, and demonstrable career and business impact. - -Key themes that support this through-line: -- Competitions as accelerated, low-risk labs for learning baselines, feature engineering, and workflows. -- The necessity of infrastructure, repeatable pipelines, and MLOps to make contest solutions production-ready. -- Teaching and curriculum design that mirror real-world system projects to transfer practical skills at scale. -- Community, mentorship, documentation, and open-source tooling as force multipliers for learning and adoption. -- Communicating business value and adapting competitive techniques to regional and organizational contexts. -- New productivity tools (AutoML, generative AI) change how work is done but reinforce the need for sound validation and system design." +title: 'From Kaggle Grandmaster to Production ML: Competition Rigor, System Design + & Large-Scale Education' short: Competitive Machine Learning and Teaching season: 20 episode: 2 @@ -24,13 +15,26 @@ links: apple: https://podcasts.apple.com/us/podcast/competitive-machine-leaning-and-teaching-alexander/id1541710331?i=1000692309866 spotify: https://open.spotify.com/episode/6xsov9a1US8D8w5xKcjkNm youtube: https://www.youtube.com/watch?v=NfAJAr7FvyY&t - -description: Master Kaggle strategies, MLOps and curriculum design to convert competition skills into production ML, scalable courses, teamwork and career boosts -intro: How do you turn Kaggle competition wins into production-ready machine learning and effective teaching? In this episode, Alexander Guschin — a machine learning engineer with 10+ years’ experience, a Kaggle Grandmaster ranked 5th globally, leader of DS and SE teams, open-source contributor, and instructor to 100K+ students — walks through that transition. We cover MLOps and tooling anecdotes (including the MLEM story), practical competition strategies like baselines, iteration and infrastructure, and how those practices map to production ML. Alexander also discusses preparing for competitions while studying, regional career differences, solo vs. team collaboration, and demoing Kaggle’s business value to managers. For educators and program leads, he outlines curriculum design grounded in machine learning system design projects, problem-centered assignments (a bot-detection case study), dual leaderboards for ML and engineering, and scaling online courses—drawing on his Coursera work and student-built software. Listeners will gain actionable guidance on competition strategy, MLOps best practices, designing real-world assignments, and how to use competitive experience to deliver production-grade ML and teach it effectively +description: Discover Production ML, system design, and competition rigor from a Kaggle + Grandmaster—practical deployment tactics, model scaling tips, and education strategies +intro: How do you take the rigor and creativity that wins Kaggle competitions and + turn it into reliable, maintainable production ML? In this episode we explore that + question with Alexander Guschin — a Machine Learning Engineer with 10+ years of + experience, a Kaggle Grandmaster ranked 5th globally, a leader of DS and SE teams, + contributor to open-source ML tools, and instructor to 100K+ students.

+ Alexander breaks down the differences between competition modeling and production + constraints, practical approaches to system design for machine learning, and lessons + for scaling education and teams around technical content. Key topics include competition + rigor versus maintainability, production ML and model deployment considerations, + designing ML systems at scale, leveraging open-source tooling, and approaches to + teaching complex ML concepts to large audiences.

Listeners will come away + with actionable perspective on translating research and contest solutions into production-ready + pipelines, questions to ask when designing ML systems, and guidance on building + reproducible workflows and scalable learning programs. Ideal for machine learning + engineers, technical leaders, and educators focused on production ML, MLOps, and + large-scale education. dateadded: 2025-02-26 - duration: PT01H05M09S - quotableClips: - name: Episode Start startOffset: 0 @@ -136,7 +140,6 @@ quotableClips: startOffset: 4134 url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=4134 endOffset: 3909 - transcript: - header: Episode Start - header: Guest Introduction & Kaggle Grandmaster Credentials @@ -731,8 +734,29 @@ transcript: sec: 4139 time: '1:08:59' who: Alexey ---- +context: 'Context: A Kaggle Grandmaster recounts a career arc from competitive modeling + and open-source tooling (MLEM) through industry roles, curriculum design, large-scale + online teaching, and mentoring. Episodes segments cover how competitions teach iterative + problem-solving, validation, infrastructure and teamwork; how those skills map (and + sometimes must be adapted) to production ML and MLOps; how to design practical coursework + and assessments; and how to show business value and respond to new tools like AutoML + and generative AI. + Core through-line (single high-level theme): Hands-on, competition-driven practice—grounded + in iteration, rigorous validation, tooling, and community—is the crucible that converts + data-science craft into production-ready systems, scalable education, and demonstrable + career and business impact. + + Key themes that support this through-line: - Competitions as accelerated, low-risk + labs for learning baselines, feature engineering, and workflows. - The necessity + of infrastructure, repeatable pipelines, and MLOps to make contest solutions production-ready. + - Teaching and curriculum design that mirror real-world system projects to transfer + practical skills at scale. - Community, mentorship, documentation, and open-source + tooling as force multipliers for learning and adoption. - Communicating business + value and adapting competitive techniques to regional and organizational contexts. + - New productivity tools (AutoML, generative AI) change how work is done but reinforce + the need for sound validation and system design.' +--- Links: * [Linkedin](https://www.linkedin.com/in/1aguschin/){:target="_blank"} diff --git a/_podcast/to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md similarity index 96% rename from _podcast/to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md rename to _podcast/knowledge-graphs-and-llms-for-automotive-rnd.md index 97fbba1e..1228f5c1 100644 --- a/_podcast/to-update/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md +++ b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md @@ -1,7 +1,5 @@ --- -title: "Context: The episode follows a mechanical-engineer-turned-applied-AI practitioner exploring how finite element analysis, crash-simulation optimization, and automotive R&D can be augmented by graph-based representations and modern language models—covering knowledge graphs, computational/graph analytics, embeddings/RAG, trust and hallucination, and practical deployment lessons from a project that parses papers and links domain artifacts. - -Core unifying theme: Knowledge graphs serve as the essential bridge between physics-based engineering models and data-driven AI (graph ML and LLMs), providing a structured, explainable substrate that grounds retrieval and reasoning, enables graph-native analytics and optimization workflows, and thereby accelerates trustworthy, automatable engineering discovery and decision-making." +title: 'Using Knowledge Graphs & LLMs for Automotive R&D: RAG, Graph ML & Crash Simulation' short: Knowledge Graphs and LLMs Across Academia and Industry season: 18 episode: 2 @@ -16,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/knowledge-graphs-and-llms-across-academia-and/id1541710331?i=1000651561079 spotify: https://open.spotify.com/episode/1yDgx6uNaSQxKTjGU1qtIj?si=g0xQjWmDTRinzxhoYV3sdA youtube: https://www.youtube.com/watch?v=YncdlUscUOo - -description: Discover how knowledge graphs, LLMs and RAG boost automotive R&D—improve crash simulation insights, grounded retrieval, graph ML and faster paper parsing -intro: How can knowledge graphs and large language models (LLMs) be combined to improve automotive R&D workflows like crash simulation and paper reading? In this episode Anahita Pakiman—Senior Knowledge Graph-Data Scientist Consultant at brox IT-Solutions—walks us from her mechanical engineering roots into applied AI, explaining how finite element analysis (FEA) and optimization intersect with data-driven approaches.

We cover practical topics including FEA vs. machine learning, topology optimization, semantic reporting for crash simulations, and the motivation for adopting knowledge graphs (Neo4j) in automotive R&D. Anahita compares graph and tabular representations, shows how NetworkX and graph analytics bridge knowledge graphs to computational graphs, and dives into graph data science techniques like similarity measures and SimRank. She also explains grounding LLMs with retrieval-augmented generation (RAG), the trade-offs between embeddings/vector databases and KG semantics, Cypher-driven retrieval, prompt templates, and limits around trust and hallucination.

Listeners will get concrete guidance on building KG+LLM systems (including the ADPT-LRN-PHYS project), parsing papers into graphs, deployment and frontend considerations, and recommended graph ML learning resources—valuable for engineers and data scientists working on crash simulation, knowledge graphs, and RAG workflows +description: 'Learn Knowledge Graphs, LLMs & RAG for automotive R&D: optimize crash + simulation, apply Graph ML to FEA, reduce hallucination and speed prototyping' +intro: How can knowledge graphs and large language models (LLMs) be combined to accelerate + automotive R&D — from crash simulation insights to reproducible reports? In this + episode Anahita Pakiman, a data scientist-engineer who moved from mechanical engineering + and finite element analysis (FEA) into applied AI and now works as Senior Knowledge + Graph-Data Scientist Consultant at brox IT‑Solutions, walks through practical strategies + and tradeoffs.

We cover FEA vs machine learning, optimization and topology + in crash simulations, and why teams adopt Neo4j for semantic reporting and load-path + detection. Anahita explains graph vs tabular representations, moving from knowledge + graphs to computational graphs with NetworkX, and applying Graph Data Science and + Graph ML techniques like SimRank. She demonstrates grounding LLMs with retrieval-augmented + generation (RAG), contrasts embeddings and vector databases with KG semantics, and + shows Cypher-driven prompt templates. The episode also addresses trust, hallucination + and verification limits of LLM-extracted knowledge, plus the ADPT-LRN-PHYS project + for adaptive paper reading and graph visualization.

Listen to learn concrete + approaches for combining knowledge graphs, RAG, graph ML and LLMs to improve crash + simulation analysis, semantic reporting, and deployable pipelines for automotive + R&D. dateadded: 2024-04-07 - duration: PT00H59M24S - quotableClips: - name: Episode Introduction startOffset: 0 @@ -113,7 +125,6 @@ quotableClips: startOffset: 3641 url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3641 endOffset: 3564 - transcript: - header: Episode Introduction - header: 'Guest Bio: career path from mechanical engineering to applied AI' @@ -1137,8 +1148,19 @@ transcript: sec: 3664 time: '1:01:04' who: Alexey ---- +context: 'Context: The episode follows a mechanical-engineer-turned-applied-AI practitioner + exploring how finite element analysis, crash-simulation optimization, and automotive + R&D can be augmented by graph-based representations and modern language models—covering + knowledge graphs, computational/graph analytics, embeddings/RAG, trust and hallucination, + and practical deployment lessons from a project that parses papers and links domain + artifacts. + Core unifying theme: Knowledge graphs serve as the essential bridge between physics-based + engineering models and data-driven AI (graph ML and LLMs), providing a structured, + explainable substrate that grounds retrieval and reasoning, enables graph-native + analytics and optimization workflows, and thereby accelerates trustworthy, automatable + engineering discovery and decision-making.' +--- Links: * [Github repo](https://github.com/antahiap/ADPT-LRN-PHYS/tree/main){:target="_blank"} diff --git a/_podcast/to-update/s20e04-mlops-in-corporations-and-startups.md b/_podcast/lean-mlops-for-startups.md similarity index 95% rename from _podcast/to-update/s20e04-mlops-in-corporations-and-startups.md rename to _podcast/lean-mlops-for-startups.md index 3ab82409..c07d37fc 100644 --- a/_podcast/to-update/s20e04-mlops-in-corporations-and-startups.md +++ b/_podcast/lean-mlops-for-startups.md @@ -1,5 +1,6 @@ --- -title: "The episode’s single unifying idea is pragmatic trade‑offs: how to move fast and deliver value in ML-driven products and careers while deliberately managing the risks that speed introduces—technical debt, vendor lock‑in, operational overhead, and team burnout. Every segment circles back to the same decision framework: choose lean, observable, portable primitives and SaaS or managed services pragmatically to ship quickly; invest in minimal, automatable MLOps and instrumentation so you can iterate safely; and prioritize foundational skills, mentorship, and ownership to sustain learning and long‑term flexibility. In short, be intentional about early architectural, tooling, and career choices—opt for simplicity and visibility to accelerate outcomes today while preserving the ability to evolve, scale, and de‑risk tomorrow." +title: 'Lean MLOps for Startups: SaaS-First MVP Stack, Avoid Vendor Lock-In & Manage + Tech Debt' short: MLOps in Corporations and Startups season: 20 episode: 4 @@ -14,13 +15,24 @@ links: apple: https://podcasts.apple.com/us/podcast/mlops-in-corporations-and-startups-nemanja-radojkovic/id1541710331?i=1000699195928 spotify: https://open.spotify.com/episode/6V8gkTSz7LuPjQYC4rO019 youtube: https://www.youtube.com/watch?v=DX9c__a4jzg - -description: 'Learn Lean MLOps strategies for startups: build a SaaS-first MVP stack, avoid vendor lock-in, and manage technical debt for faster, portable ML launches.' -intro: How can an early-stage startup ship ML features fast without getting locked into cloud vendors or drowning in technical debt? In this episode, Nemanja Radojkovic—an electrical engineer turned data scientist and MLOps engineer, DataCamp instructor, and long-time practitioner—walks through pragmatic, lean MLOps strategies for startups.

We cover shoestring tactics for rapid prototyping, a SaaS‑first MVP stack and its trade‑offs, cloud credits versus migration friction, and how to avoid vendor lock‑in with managed services like Vertex AI or SageMaker. Nemanja unpacks priorities for an MVP stack, low‑code speed versus future flexibility, minimal stacks (Python, CI/CD orchestration, Dagster), and observability options (Logfire, Prometheus/Grafana, Streamlit). The conversation also addresses technical debt management, data engineering reliability, on‑premise vs cloud decisions, and distributed compute choices (Dask, Spark).

Listen to learn concrete frameworks for choosing tools, balancing portability and managed services, and practical steps to manage tech debt while moving quickly. This episode is for startup engineers and founders who need actionable guidance on lean MLOps, SaaS‑first approaches, vendor lock‑in avoidance, and building a resilient MVP stack +description: 'Learn lean MLOps for startups: build a SaaS-first MVP stack, avoid vendor + lock-in, and manage tech debt to ship faster, cut costs, and scale safely.' +intro: How can a startup implement Lean MLOps that gets models into production quickly + without incurring vendor lock-in or crushing tech debt? In this episode Nemanja + Radojkovic — an Electrical Engineer turned Data Scientist and MLOps Engineer, former + consultant at Big4 and boutique firms, DataCamp course author, and teacher of Python + and machine learning — walks through practical strategies for building a SaaS-first + MVP stack while preserving future flexibility.

We dig into the core trade-offs + of a SaaS-first approach for an MVP, patterns to avoid vendor lock-in, and pragmatic + ways to manage accumulating tech debt in machine learning systems. Nemanja draws + on hands-on experience across data science, MLOps, and product environments to explain + how startups can choose tooling, limit integration risk, and plan safe migration + paths as needs change.

Listeners will come away with concrete considerations + for designing a lean MLOps stack, assessing SaaS versus self-hosted options, and + thinking ahead about maintainability and portability — essential guidance for founders, + ML engineers, and product teams building production-ready ML on a startup timeline. dateadded: 2025-03-15 - duration: PT01H01M06S - quotableClips: - name: Episode Introduction & Topic Overview startOffset: 0 @@ -134,7 +146,6 @@ quotableClips: startOffset: 3701 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3701 endOffset: 3666 - transcript: - header: Episode Introduction & Topic Overview - line: This week, we’ll talk about MLOps in corporations versus startups. Our special @@ -1349,8 +1360,18 @@ transcript: sec: 3726 time: '1:02:06' who: Nemanja +context: 'The episode’s single unifying idea is pragmatic trade‑offs: how to move + fast and deliver value in ML-driven products and careers while deliberately managing + the risks that speed introduces—technical debt, vendor lock‑in, operational overhead, + and team burnout. Every segment circles back to the same decision framework: choose + lean, observable, portable primitives and SaaS or managed services pragmatically + to ship quickly; invest in minimal, automatable MLOps and instrumentation so you + can iterate safely; and prioritize foundational skills, mentorship, and ownership + to sustain learning and long‑term flexibility. In short, be intentional about early + architectural, tooling, and career choices—opt for simplicity and visibility to + accelerate outcomes today while preserving the ability to evolve, scale, and de‑risk + tomorrow.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/radojkovic/){:target="_blank"} diff --git a/_podcast/to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md b/_podcast/mindful-data-strategy-for-business-impact.md similarity index 95% rename from _podcast/to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md rename to _podcast/mindful-data-strategy-for-business-impact.md index 9073d00b..e002680a 100644 --- a/_podcast/to-update/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md +++ b/_podcast/mindful-data-strategy-for-business-impact.md @@ -1,7 +1,6 @@ --- -title: "Context: The episode examines a practical approach to data work—moving from engineering to product thinking, accepting imperfect data (wabi‑sabi), diagnosing trust failures, prioritizing maintenance/rollout/innovation, using simple reliability signals and feedback loops, and aligning team time and processes to measurable business impact—especially as generative AI raises readiness demands and legacy systems require pragmatic replacement. - -Core: Adopt a mindful, impact‑first data strategy that accepts and communicates inevitable imperfection, prioritizes process and measurable business outcomes over perfect tooling, and restores trust through clear signals, closed feedback loops, and disciplined allocation of maintenance, rollout, and innovation effort so data products remain reliable, scalable, and ready to deliver real ROI." +title: 'Mindful Data Strategy for Business Impact: Wabi-Sabi Approach, Data Trust + & Maintenance-Innovation Balance' short: 'Mindful Data Strategy: From Pipelines to Business Impact' season: 21 episode: 2 @@ -16,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/54B0xvUI1eQjXW0s1eqgbI youtube: https://www.youtube.com/watch?v=B76J4QkZPWs - -description: Discover how to restore data trust with data quality fixes, prioritization and generative AI readiness—KPI diagnosis, incident-driven roadmaps and rollout tips -intro: How do you restore data trust and make your organization ready for generative AI without drowning in tools or endless cleanup? In this episode, Lior Barak — author of Data is Like a Plate of Hummus, co‑host of WHAT the Data?!, and founder of Tale About Data — walks through practical approaches to data quality, prioritization, and generative AI readiness from his 12+ years building data teams.

We cover a mindful data strategy that accepts imperfection (Wabi‑sabi), the shift from engineering to product thinking, and automating data infrastructure. Lior explains common data trust failures and hallucination risks with generative models, offers diagnostic tactics for core KPI and dashboard inaccuracies, and pinpoints pipeline failure modes (ingestion, SQL logic, lineage). You’ll hear a trust‑restoration framework focused on maintenance, rollouts, and innovation, a traffic‑light dashboard for reliability, incident analysis to find recurring causes, and practical work allocation and team stress benchmarks (≈45% maintenance).

Listen to learn concrete steps to prioritize data work by business impact, measure readiness for AI by ROI and product signals, and manage legacy systems and executive ad‑hoc requests with intent and impact in mind +description: 'Discover a mindful data strategy to build data trust and balance maintenance-innovation + with a Wabi-Sabi approach: practical tactics to boost business impact.' +intro: How do you build a data strategy that drives business impact without chasing + perfection? In this episode Lior Barak — author of Data Is Like a Plate of Hummus, + co-host of the WHAT the Data?! podcast, and founder of Tale About Data — explores + a mindful data strategy that accepts imperfection, prioritizes data trust, and balances + maintenance with innovation.

Lior draws on 12+ years building data teams + and helping organizations use data for growth, with a particular focus on practical + strategies for non-business functions. Key topics include the Wabi-Sabi approach + to data (valuing usable, imperfect datasets), establishing data trust and governance, + and how to allocate resources between ongoing data maintenance and forward-looking + innovation. The conversation also touches on setting realistic expectations, reducing + technical debt, and aligning data work to measurable business outcomes.

+ If you’re responsible for data strategy, analytics, or data product decisions, this + episode provides concrete perspectives on building resilient, impact-driven data + practices—helping you prioritize work that increases trust, lowers risk, and creates + sustained business value. dateadded: 2025-08-18 - duration: PT01H06M05S - quotableClips: - name: Podcast Introduction and Episode Overview (mindful data strategy) startOffset: 0 @@ -136,7 +147,6 @@ quotableClips: startOffset: 3965 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3965 endOffset: 3965 - transcript: - header: Podcast Introduction and Episode Overview (mindful data strategy) - line: This week we'll talk about mindful data strategy and how teams can shift from @@ -1391,8 +1401,19 @@ transcript: sec: 3965 time: '1:06:05' who: Alexey ---- +context: 'Context: The episode examines a practical approach to data work—moving from + engineering to product thinking, accepting imperfect data (wabi‑sabi), diagnosing + trust failures, prioritizing maintenance/rollout/innovation, using simple reliability + signals and feedback loops, and aligning team time and processes to measurable business + impact—especially as generative AI raises readiness demands and legacy systems require + pragmatic replacement. + Core: Adopt a mindful, impact‑first data strategy that accepts and communicates + inevitable imperfection, prioritizes process and measurable business outcomes over + perfect tooling, and restores trust through clear signals, closed feedback loops, + and disciplined allocation of maintenance, rollout, and innovation effort so data + products remain reliable, scalable, and ready to deliver real ROI.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/liorbarak/){:target="_blank"} diff --git a/_podcast/to-update/s17e05-machine-learning-engineering-in-finance.md b/_podcast/mlops-and-ml-engineering-in-finance.md similarity index 96% rename from _podcast/to-update/s17e05-machine-learning-engineering-in-finance.md rename to _podcast/mlops-and-ml-engineering-in-finance.md index 917277db..87a3471b 100644 --- a/_podcast/to-update/s17e05-machine-learning-engineering-in-finance.md +++ b/_podcast/mlops-and-ml-engineering-in-finance.md @@ -1,7 +1,5 @@ --- -title: "Context: Nemanja’s story and the episode’s segments trace practical ML work in regulated finance—moving from research to ML engineering in legacy, governance-heavy environments—covering real constraints (on‑prem infra, approvals), concrete ML Ops responsibilities (CI/CD, deployment, monitoring, model/data versioning), tactical shortcuts, team and platform patterns, and the skills and career moves that enable this work. - -Core: The unifying idea is that bringing ML into production in conservative, regulated organizations succeeds not through ideal tools or big rewrites but through a pragmatic, engineering‑first, incremental approach—building minimal viable ML Ops (reproducible pipelines, environments, monitoring, simple registries), integrating with existing DevOps/governance, reusing platform patterns, and focusing on practical skills and iterative delivery to earn trust and scale ML responsibly." +title: 'MLOps in Finance: Regulated Deployment, CI/CD and Model Governance' short: Machine Learning Engineering in Finance season: 17 episode: 5 @@ -16,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/machine-learning-engineering-in-finance-nemanja-radojkovic/id1541710331?i=1000643322929 spotify: https://open.spotify.com/episode/3yQtA8EAndau1yhCFPfwtj?si=ZutO4mLlRfOz_Zgw4GujiQ youtube: https://www.youtube.com/watch?v=Nl4aibeFwiI - -description: 'Learn MLOps for finance: CI/CD & on-prem deployment with minimal viable ML - build reproducible pipelines, model registry and monitoring to ensure compliance' -intro: 'How do you deliver machine learning in highly regulated, legacy finance environments where CI/CD, on‑prem deployment, and governance constrain every decision? In this episode Nemanja Radojkovic — an electrical engineer turned data scientist and MLOps practitioner who now teaches Data Science and contributes courses to DataCamp — walks through pragmatic MLOps for finance.

We cover concrete finance use cases (AML, fraud, compliance, automated document and email processing) and the ML engineering responsibilities that matter most: CI/CD, deployment choices, and integrating ML workflows with existing DevOps and release governance. Nemanja explains working with on‑prem platforms like Hadoop and OpenShift, how to prioritize a minimal viable ML Ops stack on a shoestring (dev/test/prod environments, monitoring, model registry, data versioning, reproducible pipelines), and tactical interim solutions such as using S3 for registry/versioning.

Listeners will get actionable guidance on prototyping under regulatory constraints, team structures and reusable platform patterns, and the practical skills and beginner tech stack (Python, SQL, Pandas/Polars, cloud basics) to move models from experiment to production in finance.' +description: 'Learn MLOps for finance: model governance, compliant deployments, monitoring, + and MVP ML Ops tactics to build production-ready, auditable models.' +intro: 'How do you deploy machine learning in heavily regulated finance environments + while keeping CI/CD pipelines, model governance, and operational risk under control? + In this episode Nemanja Radojkovic—an electrical engineer turned data scientist + and MLOps practitioner who moved from Belgrade to Leuven—walks through real-world + constraints and pragmatic solutions for MLOps in finance.

Drawing on his + PhD background, consulting experience, and teaching, Nemanja covers finance use + cases such as compliance, AML, fraud detection, and document/email automation, then + drills into ML engineering responsibilities: deployment choices, CI/CD, release + management, and building trust with governance and approvals. We examine legacy + and regulatory constraints, on-premises platforms (Hadoop, OpenShift), and low-cost + MLOps strategies: minimal viable setups (dev/test/prod, monitoring, model registry, + data versioning, reproducible pipelines) and tactical workarounds like S3-based + registries.

Listeners will gain actionable guidance on adapting ML workflows + to corporate DevOps, prioritizing MLOps on a shoestring, standardizing deployment + patterns and platform reuse (FastAPI, internal libraries), and the core skills needed + for ML engineering and production readiness. Ideal for ML engineers and data teams + tackling regulated deployment, CI/CD, and model governance in finance.' dateadded: 2024-01-29 - duration: PT00H58M04S - quotableClips: - name: Episode Introduction startOffset: 0 @@ -121,7 +133,6 @@ quotableClips: startOffset: 3544 url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3544 endOffset: 3484 - transcript: - header: Episode Introduction - header: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' @@ -1275,8 +1286,20 @@ transcript: sec: 3579 time: '59:39' who: Nemanja ---- +context: 'Context: Nemanja’s story and the episode’s segments trace practical ML work + in regulated finance—moving from research to ML engineering in legacy, governance-heavy + environments—covering real constraints (on‑prem infra, approvals), concrete ML Ops + responsibilities (CI/CD, deployment, monitoring, model/data versioning), tactical + shortcuts, team and platform patterns, and the skills and career moves that enable + this work. + Core: The unifying idea is that bringing ML into production in conservative, regulated + organizations succeeds not through ideal tools or big rewrites but through a pragmatic, + engineering‑first, incremental approach—building minimal viable ML Ops (reproducible + pipelines, environments, monitoring, simple registries), integrating with existing + DevOps/governance, reusing platform patterns, and focusing on practical skills and + iterative delivery to earn trust and scale ML responsibly.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/radojkovic/){:target="_blank"} diff --git a/_podcast/to-update/s19e04-mlops-as-team.md b/_podcast/mlops-at-scale-reproducibility-adoption.md similarity index 94% rename from _podcast/to-update/s19e04-mlops-as-team.md rename to _podcast/mlops-at-scale-reproducibility-adoption.md index 8b392555..2e1f67f4 100644 --- a/_podcast/to-update/s19e04-mlops-as-team.md +++ b/_podcast/mlops-at-scale-reproducibility-adoption.md @@ -1,7 +1,5 @@ --- -title: "Context: Raphaël Hoogvliets (Eneco) walks through his journey from agriculture to data science and MLOps, illustrating real-world tradeoffs in design, team structure, tooling, and delivery while sharing concrete practices, stories, and metrics for operationalizing ML. - -Core narrative: MLOps is fundamentally about operationalizing machine learning as sustainable product engineering—building an enabling, platform-led way of working that brings cross-functional teams, pragmatic engineering practices (CI/CD, reproducibility, testing, dependency management), and iterative adoption together so organizations can balance speed versus robustness, build trust with quick wins and measured KPIs, and keep models reliably deployed and delivering business impact." +title: 'MLOps at Scale: CI/CD, Reproducibility, Model Monitoring & Adoption Strategies' short: MLOps as a Team season: 19 episode: 4 @@ -16,13 +14,26 @@ links: apple: https://podcasts.apple.com/us/podcast/mlops-as-a-team-rapha%C3%ABl-hoogvliets/id1541710331?i=1000676238840 spotify: https://open.spotify.com/episode/0Dl372MFGvN0zDa1YQx7oe?si=eCy-a4fkRtOaEe21-KDHXQ youtube: https://youtube.com/watch?v=rMq63r3zi4c - -description: 'Master MLOps: CI/CD, reproducibility, and delivery strategies to accelerate ML delivery, boost reliability, improve team efficiency, and measure business impact.' -intro: How do you keep machine learning models deployed, monitored, and maintained in production? In this episode, Raphaël Hoogvliets from Eneco — whose career journey spans agriculture to data science and MLOps — tackles that core MLOps challenge. We trace practical design choices and long‑term trade‑offs between speed and robustness, and why team coordination, evangelists, tech translators and technical leads matter when scaling ML.

Key topics include centralized MLOps as an enabling platform team, support models for product teams and ML engineers, adoption strategies centered on iteration and developer experience, and tactics for building trust through quick wins and pain‑point collection. We also cover measurable KPIs like deployment frequency and impact tracking, core practices such as CI/CD, repo structure, parameterization, testing, and reproducibility with data versioning and experiment capture. The episode reviews essential tools—experiment tracking, model registry, serving and monitoring—plus dependency and container strategies (Docker, Kubernetes, Databricks) and signals for when to introduce governance.

If you’re responsible for operationalizing machine learning, this discussion offers concrete guidance on prioritizing CI/CD, staffing the right skill mix, and choosing tools and processes to keep models reliable in production +description: Learn MLOps CI/CD and model monitoring to scale reliable deployments, + accelerate delivery, ensure reproducibility, and drive model adoption in production. +intro: 'How do you run MLOps at scale so models stay deployed, reproducible, and actually + adopted? In this episode Raphaël Hoogvliets—who leads a 12‑engineer team at Eneco + and brings a career arc from agriculture into data science and MLOps—walks through + practical approaches for CI/CD for ML, reproducibility, model monitoring, and adoption + strategy.

We cover the core trade‑offs between speed and robustness, design + choices for long‑term maintainability, and the team coordination needed to scale + ML: evangelists, tech translators, and technical leads. Raphaël explains why a centralized + MLOps platform team often works as an enabling layer, how MLOps should support product + teams, and how to drive adoption through iteration, feedback loops, and developer + experience. You’ll hear concrete practices—CI, repo structure, parameterization, + testing—plus reproducibility tactics like data versioning, traceability, and experiment + capture. We also discuss KPIs (deployment frequency and impact tracking), skill + mix, dependency management, container strategies, and real success and failure stories. +

Listen to learn actionable priorities for getting started (start with CI/CD + and solve tangible pain points), and how to measure and sustain model value through + monitoring and operational processes.' dateadded: 2024-11-16 - duration: PT01H04M07S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -124,7 +135,6 @@ quotableClips: startOffset: 3762 url: https://youtube.com/watch?v=rMq63r3zi4c&t=3762 endOffset: 3847 - transcript: - header: Podcast Introduction - line: Hi, everyone! Welcome to our event. This is brought to you by DataTalks.Club, @@ -924,4 +934,15 @@ transcript: sec: 3847 time: '1:04:07' who: Raphaël +context: 'Context: Raphaël Hoogvliets (Eneco) walks through his journey from agriculture + to data science and MLOps, illustrating real-world tradeoffs in design, team structure, + tooling, and delivery while sharing concrete practices, stories, and metrics for + operationalizing ML. + + Core narrative: MLOps is fundamentally about operationalizing machine learning as + sustainable product engineering—building an enabling, platform-led way of working + that brings cross-functional teams, pragmatic engineering practices (CI/CD, reproducibility, + testing, dependency management), and iterative adoption together so organizations + can balance speed versus robustness, build trust with quick wins and measured KPIs, + and keep models reliably deployed and delivering business impact.' --- diff --git a/_podcast/to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md similarity index 96% rename from _podcast/to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md rename to _podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md index c70f17bc..ba4320c7 100644 --- a/_podcast/to-update/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md +++ b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md @@ -1,6 +1,7 @@ --- -title: "Search today is less about keywords and more about constructing a reliable retrieval‑plus‑generation system: the core through‑line is that effective modern search combines classical IR principles (indexing, ranking, evaluation) with semantic vector representations, embedding stores or vector databases, and LLMs—stitched together by careful ingestion, orchestration, prompt design, and human‑in‑the‑loop evaluation—to deliver accurate, contextualized, and personalized answers." -short: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search Technologies' +title: 'Modern Search Systems: Vector Databases, LLMs and Semantic Retrieval' +short: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search + Technologies' season: 17 episode: 2 guests: @@ -14,13 +15,27 @@ links: apple: https://podcasts.apple.com/us/podcast/navigating-challenges-and-innovations-in-search/id1541710331?i=1000639476594 spotify: https://open.spotify.com/episode/7mUMvxP4Efyeh0lhF5CvT6?si=7qqKrsMfQxaZy435s3XIEA youtube: https://www.youtube.com/watch?v=_fbe1QyJ1PY - -description: 'Discover RAG and vector DBs strategies for search: build podcast chatbots, optimize embeddings, reduce LLM hallucinations and boost personalization.' -intro: 'How do you modernize search systems with vector search and retrieval‑augmented generation (RAG) without trading away relevance or inviting hallucinations? In this episode we talk with a search practitioner rooted in information retrieval who has worked with Solr, Lucene and the Semantic Web era and later in search consulting and teaching at Lucidworks and OpenSource Connections.

We cover the arc from classic keyword search to NLP, embeddings and vector databases (including Qdrant and plug‑and‑play vector search), and practical migration decisions: when to add vectors to an existing stack versus adopting a standalone vector DB. You’ll hear concrete guidance on RAG concepts to reduce LLM hallucinations, building a chatbot from podcast transcripts using Whisper, ingest strategies (chunking, overlap, embedding models), and orchestration with tools like LangChain. The episode also digs into prompt design, citation strategies, multi‑level RAG evaluation with human‑in‑the‑loop testing, and personalization approaches such as session‑based recommendations and re‑ranking.

Listen to gain actionable techniques for vector search, embeddings, RAG pipelines, evaluation metrics, and resources to deepen your knowledge.' +description: 'Learn vector databases, LLMs & semantic retrieval: RAG, embeddings and + vector search tactics to build accurate chatbots, personalized search and better + ranking.' +intro: How do modern search systems combine vector databases, LLMs, and semantic retrieval + to deliver relevant, reliable results—and when should you adopt each component? + In this episode Atita Arora walks through that question from both historical and + practical angles. A long‑time contributor to information retrieval projects (including + Apache OpenNLP and Quepid) and author of posts on vectors in e‑commerce and the + open‑source Chorus implementation, Atita brings hands‑on experience plus ongoing + research into evaluating RAG systems and a commitment to user‑centric metrics and + inclusivity.

We cover the evolution from Solr/Lucene and the Semantic Web + era to NLP for query‑content matching; practical vector topics such as Qdrant, plug‑and‑play + vector search, and migration tradeoffs; and end‑to‑end RAG pipelines—Whisper transcripts, + chunking and embedding strategies, LangChain orchestration, prompt design, citations, + and multi‑level evaluation with human‑in‑the‑loop testing. You’ll also hear about + session‑based recommendations, personalization approaches, and curated learning + resources like Intro to Information Retrieval and Vector Hub. Listen to gain actionable + guidance on building and evaluating vector search and retrieval‑augmented generation + systems while avoiding common pitfalls like LLM hallucinations. dateadded: 2024-01-07 - duration: PT00H59M13S - quotableClips: - name: 'Episode Introduction: search focus and guest overview' startOffset: 115 @@ -99,7 +114,6 @@ quotableClips: startOffset: 3624 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3624 endOffset: 3553 - transcript: - header: 'Episode Introduction: search focus and guest overview' - line: This week, we'll talk about search. We have a very special guest today, Atita. @@ -1222,8 +1236,13 @@ transcript: sec: 3668 time: '1:01:08' who: Atita +context: 'Search today is less about keywords and more about constructing a reliable + retrieval‑plus‑generation system: the core through‑line is that effective modern + search combines classical IR principles (indexing, ranking, evaluation) with semantic + vector representations, embedding stores or vector databases, and LLMs—stitched + together by careful ingestion, orchestration, prompt design, and human‑in‑the‑loop + evaluation—to deliver accurate, contextualized, and personalized answers.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/atitaarora/){:target="_blank"} diff --git a/_podcast/to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md similarity index 95% rename from _podcast/to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md rename to _podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md index 7174dc12..bee42e58 100644 --- a/_podcast/to-update/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md +++ b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md @@ -1,5 +1,5 @@ --- -title: "A deliberate, project-first career pivot: leveraging medical and statistical domain knowledge while learning by doing—through freelance projects, structured cohorts, public-facing content, and portfolio/dev-ops work—to build practical ML skills, visibility, and job readiness while balancing time and commitments." +title: 'From Medicine to Machine Learning: Skill Stacking, Public Learning & Freelance-Driven Career Building' short: 'From Medicine to Machine Learning: How Public Learning Turned into a Career' season: 21 episode: 3 @@ -14,13 +14,25 @@ links: apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/22Gc1bDecKA33KHAaSF9fx youtube: https://www.youtube.com/watch?v=5km62e4nDaw - -description: Learn how to build a healthcare ML portfolio, land Upwork freelance gigs and deploy Dockerized models to AWS—practical tips, capstones, and career strategies -intro: How do you go from medical school to shipping production-ready healthcare ML—and get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine learning engineer and mentor who transitioned from medicine and criminology into production ML, walks through the practical steps he used to build a healthcare ML portfolio and freelance career.

We cover his career trajectory (statistician → data analyst → data engineer), the skill progression from SPSS and R to Python, and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp and public learning—publishing exercises, leaderboards, and focused content—attracted interviews and opportunities. He also breaks down portfolio tactics (Notion notes, capstone projects using healthcare datasets), production topics (Dockerized models, AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and soft skills like English communication and handling critique.

Listeners will come away with concrete, repeatable strategies for building a healthcare machine learning portfolio, landing freelance work, and deploying models to the cloud—plus time-management and mentoring practices that make it sustainable +description: Learn how to build a healthcare ML portfolio, land Upwork freelance gigs + and deploy Dockerized models to AWS—practical tips, capstones, and career strategies +intro: How do you go from medical school to shipping production-ready healthcare ML—and + get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine + learning engineer and mentor who transitioned from medicine and criminology into + production ML, walks through the practical steps he used to build a healthcare ML + portfolio and freelance career.

We cover his career trajectory (statistician + → data analyst → data engineer), the skill progression from SPSS and R to Python, + and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp + and public learning—publishing exercises, leaderboards, and focused content—attracted + interviews and opportunities. He also breaks down portfolio tactics (Notion notes, + capstone projects using healthcare datasets), production topics (Dockerized models, + AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and + soft skills like English communication and handling critique.

Listeners + will come away with concrete, repeatable strategies for building a healthcare machine + learning portfolio, landing freelance work, and deploying models to the cloud—plus + time-management and mentoring practices that make it sustainable dateadded: 2025-08-22 - duration: PT01H01M07S - quotableClips: - name: Podcast Introduction & Event Announcements startOffset: 0 @@ -100,7 +112,6 @@ quotableClips: startOffset: 3600 url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3600 endOffset: 3667 - transcript: - header: Podcast Introduction & Event Announcements - line: Hi everyone, welcome to our event. This event is presented by Redox Club, @@ -945,8 +956,11 @@ transcript: sec: 3667 time: '1:01:07' who: Alexey +context: 'A deliberate, project-first career pivot: leveraging medical and statistical + domain knowledge while learning by doing—through freelance projects, structured + cohorts, public-facing content, and portfolio/dev-ops work—to build practical ML + skills, visibility, and job readiness while balancing time and commitments.' --- - Links: * [Twitter](https://x.com/PastorSotoB1){:target="_blank"} diff --git a/_podcast/to-update/s17e07-make-impact-through-volunteering-open-source-work.md b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md similarity index 96% rename from _podcast/to-update/s17e07-make-impact-through-volunteering-open-source-work.md rename to _podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md index 64beb161..8a594eee 100644 --- a/_podcast/to-update/s17e07-make-impact-through-volunteering-open-source-work.md +++ b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md @@ -1,5 +1,5 @@ --- -title: "Volunteering and community-driven open-source collaboration are the pivot that turns academic curiosity and nascent AI skills into real-world impact and career momentum — by embracing an MVP mindset, creative data sourcing, strategic positioning (hackathons, platforms, and pitches), and intentional networking you can build practical projects, gain mentors and referrals, and overcome resource constraints to bridge research and production." +title: 'Open Source and Volunteering: Building AI Projects and Career Momentum' short: Make an Impact Through Volunteering Open Source Work season: 17 episode: 7 @@ -14,13 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/make-an-impact-through-volunteering-open-source-work/id1541710331?i=1000646627892 spotify: https://open.spotify.com/episode/7tZSSgv1yAlnoMyB4ggQmb?si=AqDaME2QS26usoZjOEWNtQ youtube: https://www.youtube.com/watch?v=aHdaIwOEI8Q - -description: Discover volunteer open-source projects, hackathon strategy and data sourcing tips to build an AI portfolio, land referrals, and win medical imaging challenges -intro: Struggling to break into impactful AI work—what volunteer projects, hackathon tactics, and data sourcing methods actually move your career forward? In this episode Sara El‑Ateif, Google Developer Expert in Machine Learning, Google PhD Fellow and co‑founder of AI Wonder Girls, walks through her path from big data and computer vision studies to multimodal COVID‑19 research and practical volunteer projects.

Sara breaks down real examples—PTSD chatbot, trash detection, and cervical spine segmentation—showing how to source data (Open Images, creative collection, generative approaches), pitch for volunteer roles, and contribute on platforms like Omdena and Fruit Punch AI. She explains hackathon strategy—understanding judges, defining an MVP despite limited data/compute, and building deliverables with mentors—and outlines opportunity hunting via LinkedIn, social feeds, mailing lists, WIML and conference channels.

Listeners will get actionable guidance on applying to projects, roles for data engineers (data prep, pipelines, dashboards), productivity tips, and how to build a research network. Tune in to learn concrete steps to boost your AI career through open‑source volunteering, smarter hackathon participation, and better data sourcing +description: Learn open source volunteering tactics for AI projects - data sourcing, + hackathon MVP strategy, mentorship and portfolio-building to accelerate career momentum. +intro: How can volunteering in open source AI projects accelerate your career while + delivering tangible community impact? In this episode Sara El‑Ateif — Google Developer + Expert in Machine Learning, Google PhD Fellow, co‑founder of AI Wonder Girls and + Evercoach‑certified business coach — walks through practical ways to build skills + and momentum through volunteering and open source work.

We cover Sara’s + path from early AI interest to PhD research in multimodal learning and medical imaging, + plus lessons from winning a Google PhD Fellowship. Hear concrete volunteer project + case studies — a PTSD chatbot, trash detection, and cervical spine segmentation + — and learn data sourcing tactics using Open Images and creative collection. Sara + explains how to find opportunities (LinkedIn, social media, mailing lists, WIML), + differences between collaboration platforms like Omdena and Fruit Punch AI, and + how women‑led groups structure projects.

Listeners will get actionable + advice on hackathon strategy, MVP mindset under data/compute constraints, pitching + for volunteer roles, building a research network, and the data engineering tasks + that matter (pipelines, dashboards, prep). Tune in to discover how open source and + volunteering translate into practical experience, referrals, and career traction + in machine learning. dateadded: 2024-02-29 - duration: PT00H59M34S - quotableClips: - name: Podcast Introduction startOffset: 0 @@ -118,7 +132,6 @@ quotableClips: startOffset: 3624 url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3624 endOffset: 3574 - transcript: - header: Podcast Introduction - header: 'Episode Overview: Volunteering, Open Source & Community Impact' @@ -1323,8 +1336,13 @@ transcript: sec: 3677 time: '1:01:17' who: Sara +context: Volunteering and community-driven open-source collaboration are the pivot + that turns academic curiosity and nascent AI skills into real-world impact and career + momentum — by embracing an MVP mindset, creative data sourcing, strategic positioning + (hackathons, platforms, and pitches), and intentional networking you can build practical + projects, gain mentors and referrals, and overcome resource constraints to bridge + research and production. --- - Links: * [Dev and AI hackathons](https://devpost.com/){:target="_blank"} diff --git a/_podcast/to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md b/_podcast/open-source-ml-tools-strategy-and-business-models.md similarity index 93% rename from _podcast/to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md rename to _podcast/open-source-ml-tools-strategy-and-business-models.md index 699e1786..f8b26b17 100644 --- a/_podcast/to-update/s18e04-working-in-open-source-probabl-ai-and-sklearn.md +++ b/_podcast/open-source-ml-tools-strategy-and-business-models.md @@ -1,7 +1,5 @@ --- -title: "Context: This episode surveys the Scikit-Learn ecosystem, related projects (Scikit Lego, Skrub), and initiatives like Calm Code and :probabl., weaving together career stories, governance, tooling choices, content production, maintainer handoffs, CI/cost concerns, and early business models. - -Core unifying idea: Long-term health and impact of open-source machine‑learning projects depends not just on great code but on a deliberate integration of engineering excellence, community stewardship, accessible education, and sustainable operational/business practices — i.e., building pragmatic tools and clear learning paths while creating incentives (training, consulting, platform models, cost‑efficient infrastructure, and low‑pressure contributor experiences) that enable maintainers and contributors to keep projects useful, adoptable, and durable." +title: 'Open Source ML Tools: Scikit-Learn Governance, Sustainability and Business Models' short: Working in Open Source - Probabl.ai and sklearn season: 18 episode: 4 @@ -16,13 +14,26 @@ links: apple: https://podcasts.apple.com/us/podcast/working-in-open-source-probabl-ai-and-sklearn-vincent/id1541710331?i=1000654481795 spotify: https://open.spotify.com/episode/0HT3IQOaTXTMH0OdEBnw9s?si=HrLtx7QKT_amZyUbZuqRzQ youtube: https://www.youtube.com/watch?v=UPlIETGwTg8 - -description: 'Discover scalable scikit-learn ecosystems with scikit-lego and Skrub: learn GAP Encoder, contributor growth, CI optimization and DevRel sustainability.' -intro: How do you build a sustainable scikit-learn ecosystem that serves both users and contributors? In this episode, Vincent Warmerdam — Research Advocate at Rasa, open source contributor and creator of Calm Code and the Koaning blog — walks through practical decisions that keep ML tooling healthy over time. We cover scikit-lego’s origins and adoption, governance and NumFOCUS roles, and the trade-offs between adding features to core scikit-learn versus plugins.

Key topics include maintaining contributor growth and steward transitions, motivating volunteer maintainers, DevRel combined with core engineering, and demonstrable open source quality as a hiring signal. Vincent also explains Skrub’s table vectorizer and the GAP Encoder approach for clustering dirty categorical values to avoid one-hot explosion, plus examples of CI and cost optimization (custom runners, GitHub Actions) and sustainable compute choices. You’ll get actionable guidance on teaching fundamentals (Docker, pip, Git), producing interactive content, and potential business models around training and consulting. Tune in to learn concrete strategies for building, funding, and scaling scikit-learn-compatible tools and communities without sacrificing long-term sustainability +description: Discover Scikit-Learn open source business models—learn maintainer strategies, + CI cost optimization and training monetization to build sustainable projects. +intro: How can open source ML tools stay healthy, useful, and financially sustainable + while serving both researchers and industry? In this episode Vincent Warmerdam — + Research Advocate at Rasa, author of the Koaning blog, creator of the Algorithm + Whiteboard playlist, and cofounder of Calm Code — walks through the real-world tradeoffs + of scikit-learn governance, sustainability, and business models for ML tooling. +

We dig into scikit-learn’s history, NumFOCUS relationships, and the plugin‑versus‑core + strategy; practical maintainer issues like transitions, motivating volunteers, and + using open source contributions as hiring signals; and the intersection of developer + relations and core engineering. Vincent also explores Calm Code’s low‑pressure teaching + philosophy, content and monetization choices, and platform decisions (Django, contributor + hiring). Technical operations topics include CI cost optimization with custom runners + and sustainable compute examples (Leaf.cloud), plus hands‑on projects like Skrub’s + table vectorizer and GAP encoder for pragmatic tabular defaults.

Listeners + will gain actionable insights on governance models, maintaining project health, + and realistic business options — training, consulting, and partnerships — for anyone + building or stewarding open source ML tools. dateadded: 2024-05-06 - duration: PT01H15S - quotableClips: - name: Episode Overview — Open Source Focus startOffset: 0 @@ -148,7 +159,6 @@ quotableClips: startOffset: 3675 url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3675 endOffset: 3615 - transcript: - header: Episode Overview — Open Source Focus - header: Guest Reintroduction & Vincent’s Open Source Profile @@ -991,8 +1001,19 @@ transcript: sec: 3715 time: '1:01:55' who: Alexey ---- +context: 'Context: This episode surveys the Scikit-Learn ecosystem, related projects + (Scikit Lego, Skrub), and initiatives like Calm Code and :probabl., weaving together + career stories, governance, tooling choices, content production, maintainer handoffs, + CI/cost concerns, and early business models. + Core unifying idea: Long-term health and impact of open-source machine‑learning + projects depends not just on great code but on a deliberate integration of engineering + excellence, community stewardship, accessible education, and sustainable operational/business + practices — i.e., building pragmatic tools and clear learning paths while creating + incentives (training, consulting, platform models, cost‑efficient infrastructure, + and low‑pressure contributor experiences) that enable maintainers and contributors + to keep projects useful, adoptable, and durable.' +--- Links: * [probabl. YouTube channel](https://www.youtube.com/@UCIat2Cdg661wF5DQDWTQAmg){:target="_blank"} diff --git a/_podcast/to-update/s20e08-from-hackathons-to-developer-advocacy.md b/_podcast/practical-devrel-demofirst-education-and-open-source.md similarity index 96% rename from _podcast/to-update/s20e08-from-hackathons-to-developer-advocacy.md rename to _podcast/practical-devrel-demofirst-education-and-open-source.md index 5710505b..8f510039 100644 --- a/_podcast/to-update/s20e08-from-hackathons-to-developer-advocacy.md +++ b/_podcast/practical-devrel-demofirst-education-and-open-source.md @@ -1,7 +1,6 @@ --- -title: "Context: This episode weaves together practical production tips (video, audio, lighting), hands-on developer programs (hackathons, MLH fellowship), contribution and onboarding best practices, content/demo strategies, and leadership for scaling community initiatives. - -Core narrative: Empowering developer growth by building repeatable, hands-on learning and contribution pathways—well-run hackathons, mentorship-driven fellowships, clear onboarding and demo workflows, and scalable program design—so more people can learn by doing, successfully contribute to real projects, and transition into lasting technical roles." +title: 'Developer Advocacy Through Community Impact: Technical Leadership, Open Source + Mentorship & Demo-Driven Communication' short: From Hackathons To Developer Advocacy season: 20 episode: 8 @@ -16,13 +15,25 @@ links: apple: https://podcasts.apple.com/us/podcast/from-hackathons-to-developer-advocacy-will-russel/id1541710331?i=1000709634418 spotify: https://open.spotify.com/episode/4Lt785S38GuK0W2m7naRKt youtube: https://www.youtube.com/watch?v=vXbMUfHE1OE - -description: 'Master hackathons, MLH Fellowship & onboarding: organize events, judge with scoring matrices, scale open-source mentorships to onboard hireable contributors.' -intro: How do you run hackathons and scale MLH‑style open source fellowships while actually getting contributors onboarded, mentored, and judged fairly? In this episode Will Russell — Developer Advocate at Kestra who previously built open source education programs — walks through practical approaches to organizing hackathons and fellowship programs that move people from first contribution to sustained involvement.

We cover formats and online tools for running events, leadership and soft skills for coordination, judging strategies (scoring matrices, categories, tie‑breakers), and sponsor‑driven challenges. Will shares the MLH Fellowship mentorship model, contribution best practices (PR quality, Git skills), and concrete onboarding tactics for complex repos — including environment setup, maintainer collaboration, and cloud workarounds like Colab and VMs. A Willmojis case study highlights image recognition and demo workflow ideas. The conversation also addresses program scalability, budgets, accessibility for students and career changers, and how developer advocacy, documentation, and video demos support adoption.

Listen to learn actionable frameworks for organizing hackathons, onboarding contributors, and scaling MLH‑style open source fellowships so your program produces real contributions and sustainable community growth +description: Master developer advocacy, open source mentorship & demo-driven communication + to elevate technical leadership, amplify community impact & accelerate adoption. +intro: How do developer advocates create measurable community impact while balancing + technical leadership, mentorship, and clear communication? In this episode Will + Russell, Developer Advocate at Kestra, explores that question through the lens of + workflow orchestration and developer education. Will is known for his technical + video content on workflow orchestration and for building open source education programs + that help new contributors make their first pull requests.

We cover core + topics including technical leadership in community settings, practical approaches + to open source mentorship, and the power of demo-driven communication and documentation + to make complex tools approachable. Will discusses how creating targeted videos + and clear docs lowers barriers for developers and nurtures sustainable contributor + pipelines.

Listeners will come away with concrete ideas for designing open + source education programs, using demos to explain concepts, and applying developer + advocacy techniques to grow healthier communities. This episode is useful for developer + advocates, engineering managers, open source maintainers, and anyone interested + in workflow orchestration, developer education, and community-driven technical leadership. dateadded: 2025-05-26 - duration: PT01H01M29S - quotableClips: - name: Episode Opening & Guest Overview startOffset: 0 @@ -136,7 +147,6 @@ quotableClips: startOffset: 3689 url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3689 endOffset: 3689 - transcript: - header: Episode Opening & Guest Overview - line: This week we’ll discuss many topics—developer advocacy, organizing hackathons, @@ -1573,8 +1583,17 @@ transcript: sec: 3689 time: '1:01:29' who: Alexey ---- +context: 'Context: This episode weaves together practical production tips (video, + audio, lighting), hands-on developer programs (hackathons, MLH fellowship), contribution + and onboarding best practices, content/demo strategies, and leadership for scaling + community initiatives. + Core narrative: Empowering developer growth by building repeatable, hands-on learning + and contribution pathways—well-run hackathons, mentorship-driven fellowships, clear + onboarding and demo workflows, and scalable program design—so more people can learn + by doing, successfully contribute to real projects, and transition into lasting + technical roles.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/wrussell1999/){:target="_blank"} diff --git a/_podcast/to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md similarity index 96% rename from _podcast/to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md rename to _podcast/practical-generative-ai-consulting-from-expertise-to-impact.md index 64d5f520..d6d68e15 100644 --- a/_podcast/to-update/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md +++ b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md @@ -1,7 +1,5 @@ --- -title: "Context: The episode follows a journey from academic foundations in economics, Chinese, and statistics through industry research and platform data roles to independent generative-AI consultancy, touching on technical approaches (SOTA models, model‑in‑the‑loop annotation, evaluation), product and business priorities, client acquisition and pitching, entrepreneurship realities, and community support initiatives. - -Core: The unifying idea is translating deep technical expertise into pragmatic, production‑oriented generative-AI solutions that deliver measurable business impact—anchored in rigorous evaluation, stakeholder ownership, evidence-based communication (workshops, decks, case studies), continual learning, and a commitment to accessibility and mentorship while managing the practicalities of running a sustainable freelance practice." +title: 'Launching a Freelance Generative AI Business: NLP Services and Client Acquisition' short: From a Research Scientist at Amazon to a Machine learning/AI Consultant season: 16 episode: 5 @@ -16,14 +14,25 @@ links: apple: https://podcasts.apple.com/us/podcast/from-a-research-scientist-at-amazon-to-a/id1541710331?i=1000634411188 spotify: https://open.spotify.com/episode/7gJI3ds3k1vXd3m3W9iRj9?si=oG6A7BuTSjaEoH6FhvEVug youtube: https://www.youtube.com/watch?v=4RargY8iOaE - -description: 'Discover how to launch a generative AI freelance business: NLP services, model-in-the-loop annotation, pitch-deck strategies, client leads & scalable workshops.' -intro: 'How do you turn NLP research experience into a viable generative AI freelance business — and how do you actually win clients? In this episode, Verena Weber, a former Research Scientist at Alexa AI with 7+ years in machine learning and a background in statistics, walks through that transition and the practical work that sells. We cover launching a freelance generative AI business, designing and running model-in-the-loop annotation studies (why they save time and improve consistency), model evaluation strategies for stabilizing high‑traffic utterances, and how to package offerings like generative AI workshops and use‑case discovery.

Verena also breaks down the nuts-and-bolts of client acquisition: crafting pitch decks (long and short formats), positioning, evidence and rates, LinkedIn visibility, network referrals, events and mentorship. She doesn’t skip the realities of self-employment — taxes, health insurance and admin — or content strategies to showcase expertise (technical posts, papers, side projects). Listen to learn concrete steps for becoming an NLP consultant, running annotation experiments that scale, and pitching value to SMEs and product teams.' +description: 'Learn to launch a freelance generative AI business: package NLP services, + master client acquisition and pricing to win projects and scale revenue.' +intro: How do you move from research scientist to running a freelance generative AI + business focused on NLP—and actually win clients? In this episode Verena Weber, + a former Research Scientist at Alexa AI with 7+ years in machine learning and a + background in statistics, walks through that transition and what it takes to offer + NLP services as a freelancer. Verena’s mission is to help companies prepare for + the GenAI shift, and she draws on deep NLP expertise to explain which service offerings + make sense, how to position technical skills for business clients, and practical + approaches to client acquisition in the generative AI space. Listeners will get + a clear view of launching a freelance generative AI business, including how to translate + research experience into marketable NLP services, approaches to finding and engaging + clients, and what to expect when stepping out on your own. If you’re a machine learning + professional or aspiring NLP freelancer trying to build a sustainable freelance + practice in generative AI, this episode provides grounded, experience-based guidance + to help you get started. dateadded: 2023-11-12 date: 2025-11-07 - duration: PT00H59M53S - quotableClips: - name: Episode Introduction startOffset: 0 @@ -137,7 +146,6 @@ quotableClips: startOffset: 3639 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3639 endOffset: 3593 - transcript: - header: Episode Introduction - line: This week, we'll talk about being a research scientist at Amazon, and transitioning @@ -1354,6 +1362,18 @@ transcript: sec: 3656 time: '1:00:56' who: Alexey +context: 'Context: The episode follows a journey from academic foundations in economics, + Chinese, and statistics through industry research and platform data roles to independent + generative-AI consultancy, touching on technical approaches (SOTA models, model‑in‑the‑loop + annotation, evaluation), product and business priorities, client acquisition and + pitching, entrepreneurship realities, and community support initiatives. + + Core: The unifying idea is translating deep technical expertise into pragmatic, + production‑oriented generative-AI solutions that deliver measurable business impact—anchored + in rigorous evaluation, stakeholder ownership, evidence-based communication (workshops, + decks, case studies), continual learning, and a commitment to accessibility and + mentorship while managing the practicalities of running a sustainable freelance + practice.' --- Links: diff --git a/_podcast/to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md b/_podcast/practical-llm-engineering-and-rag.md similarity index 95% rename from _podcast/to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md rename to _podcast/practical-llm-engineering-and-rag.md index badf0a67..0b2901e3 100644 --- a/_podcast/to-update/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md +++ b/_podcast/practical-llm-engineering-and-rag.md @@ -1,7 +1,5 @@ --- -title: "Context: This episode surveys practical, hands‑on patterns—RAG, chunking, prompting, generator–evaluator workflows, transcript pipelines, evaluation sets, monitoring, agents, memory design, and developer tooling—drawn from moving models from prototypes into real products across consulting, DevRel, and engineering roles. - -Core: The unifying idea is pragmatic, iterative engineering of LLM‑powered systems: prioritize retrieval‑first solutions that deliver immediate business value, instrument rigorous evaluation and monitoring (gold tests, failure analysis, generator–evaluator), automate pipelines and reproducible workflows, and only escalate to agentic tooling or persistent memory once data, metrics, and clear ROI justify the added complexity—treating AI as an integrated augmentation that must be built, tested, and scaled with standard software engineering practices." +title: 'Practical LLM Engineering and RAG: Prompting, Evaluation and Real-World Workflows' short: How to Build and Evaluate AI systems in the Age of LLMs season: 22 episode: 4 @@ -16,14 +14,27 @@ links: apple: https://podcasts.apple.com/us/podcast/how-to-build-and-evaluate-ai-systems-in-the-age-of/id1541710331?i=1000733350691 spotify: https://open.spotify.com/episode/2RD2qXaYa2ZjKjuIE7Aj6O youtube: https://www.youtube.com/watch?v=eC3RNuI6ow0 - -description: Build LLM agents and RAG pipelines using prompting, transcript automation, and evaluation to scale systems - learn chunking, monitoring, and practical build steps -intro: How do you move from prototypes to reliable, scalable LLM systems that actually deliver business value?

In this episode, Hugo Bowne‑Anderson—tracing a path from biology research into Python, PyData, DataCamp curriculum and product work, then into consulting, teaching, and developer relations—walks through practical engineering and evaluation patterns for building LLM-driven workflows.

We cover prompt engineering (role prompts, structured output, timestamps), everyday LLM use cases (summaries, translation, CSV workflows), transcript pipelines (Gemini, Descript, Loom) and automation with GitHub Actions. Hugo explains the generator–evaluator pattern for automated quality control, how to design evaluation sets and failure analysis, and techniques for logging, traces, and debuggable MVPs.

You’ll hear when to prioritize RAG (retrieval-augmented generation) and chunking strategies, when to add tool calls or agents, plus a concrete email assistant build using Gmail API + RAG. The episode closes with a four‑step framework for agents and guidance on retrieval‑based vs multi‑turn memory.

If you’re building LLM systems, this conversation gives actionable tactics for prompt engineering, evaluation, scaling transcript pipelines, and deciding when to adopt agents, embeddings, and automation +description: 'Discover LLM engineering and RAG best practices: practical prompting, + evaluation methods and deployment workflows to boost accuracy and retrieval.' +intro: How do you move from experimentation to reliable, production-ready LLM engineering + and retrieval-augmented generation (RAG)? In this episode Hugo Bowne‑Anderson — + Head of Developer Relations at Outerbounds, longtime data scientist, educator, and + host of Vanishing Gradients — walks through practical patterns for building, evaluating, + and scaling real-world LLM workflows.

We cover everyday LLM use cases (summaries, + translation, CSV work), prompting best practices (role prompts, structured output, + timestamps), and transcript pipelines using Gemini, Descript, Loom and automation + with GitHub Actions. Hugo explains the generator–evaluator pattern for automated + quality control, how to design evaluation sets and failure analysis, and concrete + chunking strategies (fixed length, sliding windows, context rotation) that unlock + RAG performance. He also discusses when to add tooling or agentic capabilities, + a four‑step framework for agents, memory design tradeoffs, and a practical email + assistant example using the Gmail API plus RAG.

Listen to learn actionable + guidance on prioritizing RAG for quick business wins, building debuggable MVPs with + logging and traces, and setting up evaluation and monitoring so your LLMs deliver + dependable results in production. dateadded: 2025-10-27 date: 2025-11-07 - duration: PT01H01M30S - quotableClips: - name: Podcast Kickoff & Hugo Bowne‑Anderson Background startOffset: 0 @@ -125,7 +136,6 @@ quotableClips: startOffset: 3655 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3655 endOffset: 3690 - transcript: - header: Episode Introduction & Guest Bio - line: This week we will talk about LLMs and AI like everyone else, I guess. @@ -1225,6 +1235,18 @@ transcript: sec: 3690 time: '1:01:30' who: Alexey +context: 'Context: This episode surveys practical, hands‑on patterns—RAG, chunking, + prompting, generator–evaluator workflows, transcript pipelines, evaluation sets, + monitoring, agents, memory design, and developer tooling—drawn from moving models + from prototypes into real products across consulting, DevRel, and engineering roles. + + Core: The unifying idea is pragmatic, iterative engineering of LLM‑powered systems: + prioritize retrieval‑first solutions that deliver immediate business value, instrument + rigorous evaluation and monitoring (gold tests, failure analysis, generator–evaluator), + automate pipelines and reproducible workflows, and only escalate to agentic tooling + or persistent memory once data, metrics, and clear ROI justify the added complexity—treating + AI as an integrated augmentation that must be built, tested, and scaled with standard + software engineering practices.' --- Links: diff --git a/_podcast/to-update/s17e08-building-machine-learning-products.md b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md similarity index 96% rename from _podcast/to-update/s17e08-building-machine-learning-products.md rename to _podcast/production-ml-search-vector-search-embeddings-hybrid search.md index 5190a95b..1df35494 100644 --- a/_podcast/to-update/s17e08-building-machine-learning-products.md +++ b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md @@ -1,5 +1,5 @@ --- -title: "Modern search is best understood as a decision-making system: moving beyond brittle keyword matching to learned, shared representations (embeddings) that, when combined with traditional IR constraints (filters, recency, business rules), multimodal signals, and time-aware encodings, enable scalable, reliable retrieval and ranking. The real unifying challenge is not just model choice (LLMs vs specialized encoders) but engineering—indexing, compute/storage trade-offs, hybrid architectures, query-time weighting, operational tooling, vendor selection, and metrics-driven iteration—so that representation learning translates into measurable product and business outcomes." +title: 'Production ML Search: Embeddings, Hybrid Architectures and Scalable Indexing' short: Building Machine Learning Products season: 17 episode: 8 @@ -14,13 +14,26 @@ links: apple: https://podcasts.apple.com/us/podcast/building-machine-learning-products-reem-mahmoud/id1541710331?i=1000649393833 spotify: https://open.spotify.com/episode/4jNredXndQ2b2evgfSmD2G?si=gU2kT-zXSX27hDPgLtwMgQ youtube: https://www.youtube.com/watch?v=m45tNY-8gY8 - -description: Master vector search, embeddings & hybrid search—learn indexing, multimodal fusion, vector DB trade-offs & ops to boost relevance, latency & personalization -intro: 'How do you build vector search and hybrid retrieval that actually works in production—balancing embeddings, indexing, multimodal fusion, latency, and business constraints? In this episode, Reem Mahmoud, Director of Data Science at intervu.ai, breaks down practical approaches to vector search, hybrid retrieval, and embedding pipelines for real-world systems.

Reem guides listeners through fundamentals—text search and inverted indexes (Lucene), candidate generation and ML ranking—then dives into vector search: embedding generation, compute vs. storage trade-offs, and when to use LLMs versus specialized encoders. You’ll hear concrete advice on multimodal embeddings (text, images, CLIP), hybrid search that combines vector similarity with filters and recency, and techniques for feature fusion, time encoding, and query-time weighting. The conversation also covers vector DB selection, operationalization best practices, search metrics and A/B testing, and prototyping e-commerce personalization with embeddings.

If you’re building or evaluating search/retrieval systems, this episode offers actionable guidance on embeddings, indexing strategies, multimodal fusion, and how to translate business rules into performant hybrid retrieval—so you can iterate faster and measure impact.' +description: 'Master vector search, embeddings and hybrid search: scalable indexing, + multimodal retrieval and ranking tactics to boost relevance and reduce latency.' +intro: How do you move from prototypes to production ML search that scales and stays + relevant? In this episode Reem Mahmoud, Director of Data Science at intervu.ai, + breaks down practical approaches to building production ML search systems—focusing + on embeddings, hybrid architectures, and scalable indexing.

We cover core + concepts like inverted indexes and Lucene basics, candidate generation versus ML + ranking, and why you should avoid hand-rolling indexes. Dive into vector search + fundamentals—embeddings as shared representations, embedding pipelines, and the + trade-offs between vector compute and storage. Learn how multimodal embeddings (text, + images, CLIP) and feature fusion enable richer relevance, and how hybrid search + combines vector similarity with filters, recency, and business constraints. The + episode also explores time encoding in embeddings, query-time weighting, LLMs versus + specialized encoders, vector DB selection, and operationalization—offline tests, + A/B metrics, and enabling engineers for fast iteration.

Listen for actionable + guidance on scalable indexing strategies, choosing a vector DB, and measuring search + impact so you can design reliable production search that balances latency, relevance, + and business KPIs. dateadded: 2024-03-17 - duration: PT01H05M23S - quotableClips: - name: 'Guest Introduction: Daniel, Superlinked, and VectorHub' startOffset: 107 @@ -127,7 +140,6 @@ quotableClips: startOffset: 4008 url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=4008 endOffset: 3923 - transcript: - header: 'Guest Introduction: Daniel, Superlinked, and VectorHub' - line: This week, we'll talk about building production search systems. We have a @@ -1151,8 +1163,15 @@ transcript: sec: 4030 time: '1:07:10' who: Daniel +context: 'Modern search is best understood as a decision-making system: moving beyond + brittle keyword matching to learned, shared representations (embeddings) that, when + combined with traditional IR constraints (filters, recency, business rules), multimodal + signals, and time-aware encodings, enable scalable, reliable retrieval and ranking. + The real unifying challenge is not just model choice (LLMs vs specialized encoders) + but engineering—indexing, compute/storage trade-offs, hybrid architectures, query-time + weighting, operational tooling, vendor selection, and metrics-driven iteration—so + that representation learning translates into measurable product and business outcomes.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/reemmahmoud/recent-activity/all/){:target="_blank"} diff --git a/_podcast/to-update/s20e05-data-intensive-ai.md b/_podcast/production-ready-ai-engineering.md similarity index 94% rename from _podcast/to-update/s20e05-data-intensive-ai.md rename to _podcast/production-ready-ai-engineering.md index e4b50df0..5b104f83 100644 --- a/_podcast/to-update/s20e05-data-intensive-ai.md +++ b/_podcast/production-ready-ai-engineering.md @@ -1,7 +1,5 @@ --- -title: "Context: a practitioner’s tour through the end-to-end work of turning data and models into reliable, efficient products—from Java and data engineering foundations to AI fine-tuning, prompt craft, tooling choices, and developer workflows. - -Core: the episode’s through-line is a data‑centric engineering mindset for trustworthy, production-ready AI: rigorous testing and pipeline design to ensure data trust, deliberate choices about models and tools for cost and performance, prompt and token-efficiency techniques to make inference practical, and pragmatic engineering patterns (architecture, caching, assistants) that let teams ship AI features and sustain them—while using content and teaching as a way to refine thinking and capture business value." +title: 'Production AI Engineering: Data Pipelines, Prompt Optimization and Caching' short: Data Intensive AI season: 20 episode: 5 @@ -16,13 +14,24 @@ links: apple: https://podcasts.apple.com/us/podcast/data-intensive-ai-bartosz-mikulski/id1541710331?i=1000700288876 spotify: https://open.spotify.com/episode/0nFSU92IQDbM4C9FLvdn4z youtube: https://www.youtube.com/watch?v=BP6w_vKySN0 - -description: Master data pipeline testing and prompt engineering—learn snapshot tests, prompt compression & caching to ensure data trust and cut model costs -intro: How do you turn prototype AI into reliable production systems that stakeholders can trust? In this episode, Bartosz Mikulski — an AI and data engineer who helps move projects from demo to production, builds testing infrastructure, and teaches practitioners — walks through practical approaches to building trustworthy AI through data pipeline testing and prompt engineering.

We dig into testing strategies for data pipelines (snapshot and integration testing), tools like Great Expectations, Soda, SQL vs Spark tests, and guidance on when to use Apache Spark. Bartosz explains the data engineering role in preprocessing and fine-tuning, plus “invisible” AI use cases like augmented generation and review analysis. On the prompt side, he covers in-context learning, prompt evaluation and formatting tradeoffs, token optimization with prompt compression, and prompt caching and model efficiency (attention caching, Claude). He also discusses open-source tools (DeepSeek, Perplexity), AI-driven product patterns (lead scoring, Chrome extension architectures), and coding assistants like Cursor versus GitHub Copilot.

Listen for concrete testing practices, prompt optimization techniques (caching and compression), and tool recommendations you can apply to increase model reliability and reduce production risk +description: 'Master production AI engineering: build scalable data pipelines, optimize + prompts, and implement caching to cut latency and costs for production-ready models' +intro: How do you move AI projects from proof-of-concept to reliable production systems + while keeping prompts, pipelines, and response times under control? In this episode + Bartosz Mikulski, an AI and data engineer who specializes in productionizing AI, + breaks down the engineering work required to make models dependable beyond demos. + Bartosz explains how to design robust data pipelines, apply prompt optimization + practices, and introduce caching strategies that reduce load and improve responsiveness. + He also covers building testing infrastructure and using tests to surface issues + that block production readiness—then how to fix those issues. Listeners will get + concrete, engineering-focused insights into production AI, including practical approaches + to pipeline orchestration, prompt tuning for stability, and where caching fits in + an operational stack. Whether you’re responsible for deploying models, improving + inference reliability, or creating reproducible pipelines, this conversation offers + actionable techniques and perspectives for turning experiments into maintainable + production systems. dateadded: 2025-03-26 - duration: PT01H01M37S - quotableClips: - name: Episode Opening & Guest Overview (Data Intensive AI) startOffset: 0 @@ -120,7 +129,6 @@ quotableClips: startOffset: 3621 url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3621 endOffset: 3697 - transcript: - header: Episode Opening & Guest Overview (Data Intensive AI) - line: This week, we’ll talk about Data Intensive AI. Our special guest today is @@ -1134,8 +1142,17 @@ transcript: sec: 3697 time: '1:01:37' who: Alexey ---- +context: 'Context: a practitioner’s tour through the end-to-end work of turning data + and models into reliable, efficient products—from Java and data engineering foundations + to AI fine-tuning, prompt craft, tooling choices, and developer workflows. + Core: the episode’s through-line is a data‑centric engineering mindset for trustworthy, + production-ready AI: rigorous testing and pipeline design to ensure data trust, + deliberate choices about models and tools for cost and performance, prompt and token-efficiency + techniques to make inference practical, and pragmatic engineering patterns (architecture, + caching, assistants) that let teams ship AI features and sustain them—while using + content and teaching as a way to refine thinking and capture business value.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/mikulskibartosz/){:target="_blank"} diff --git a/_podcast/to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md similarity index 96% rename from _podcast/to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md rename to _podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md index 15b3df23..79b026e4 100644 --- a/_podcast/to-update/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md +++ b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md @@ -1,5 +1,5 @@ --- -title: "The episode’s central idea is the pragmatic, end-to-end translation of data science and ML research into real-world product impact: owning the full stack from instrumentation and data collection through real-time streaming inference, experimentation, and rollout, while making practical engineering trade-offs (hardware, platforms, team roles) and modeling human behavior and incentives to drive measurable engagement and business outcomes." +title: 'From Theme Parks to Tesla: Building Data Products Through Applied ML and Full-Stack Engineering' short: 'From Theme Parks to Tesla: Building Data Products That Work' season: 21 episode: 9 @@ -14,13 +14,26 @@ links: apple: https://podcasts.apple.com/us/podcast/from-theme-parks-to-tesla-building-data-products-that-work/id1541710331?i=1000731198436 spotify: https://open.spotify.com/episode/5dpBs4xr3zMkBDw6cTYHQE?si=pivilqeDTHOiNCBb1bFHdA youtube: https://www.youtube.com/watch?v=gXvVMvhfrIY - -description: Discover crowd modeling, queue prediction and real-time recommendations to optimize visitor flow, reduce wait times and boost engagement with smart routing -intro: 'How can theme parks use data to cut wait times and guide visitors in real time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and data engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) and automotive (Tesla) — walks through building systems that optimize visitor flow using crowd modeling, queue prediction and real-time recommendations.

We cover the core problems of modeling crowd dynamics and ride capacity, designing a next-best-action visitor routing engine, and using behavioral route modeling and probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: incentivizing app adoption to collect data, validating recommendations with employee swiping experiments and A/B tests, and running streaming pipelines for live experiments and rollout (engagement metrics and accuracy measurement). He also touches on deployment concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines — and how these engineering choices affect measurement and user experience.

Listen to learn concrete approaches for queue prediction, visitor routing, real-time processing, and experimentation so you can design and validate systems that improve throughput and guest satisfaction.' +description: Discover crowd modeling, queue prediction and real-time recommendations + to optimize visitor flow, reduce wait times and boost engagement with smart routing +intro: 'How can theme parks use data to cut wait times and guide visitors in real + time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and + data engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) + and automotive (Tesla) — walks through building systems that optimize visitor flow + using crowd modeling, queue prediction and real-time recommendations.

We + cover the core problems of modeling crowd dynamics and ride capacity, designing + a next-best-action visitor routing engine, and using behavioral route modeling and + probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: + incentivizing app adoption to collect data, validating recommendations with employee + swiping experiments and A/B tests, and running streaming pipelines for live experiments + and rollout (engagement metrics and accuracy measurement). He also touches on deployment + concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines + — and how these engineering choices affect measurement and user experience.

+ Listen to learn concrete approaches for queue prediction, visitor routing, real-time + processing, and experimentation so you can design and validate systems that improve + throughput and guest satisfaction.' dateadded: 2025-10-21 - duration: PT01H35S - quotableClips: - name: Podcast Introduction & Event Info startOffset: 0 @@ -107,7 +120,6 @@ quotableClips: startOffset: 3610 url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3610 endOffset: 3635 - transcript: - header: Podcast Introduction & Event Info - line: Hi everyone, welcome to our event. This event is brought to you by Data Docs @@ -1228,8 +1240,13 @@ transcript: sec: 3635 time: '1:00:35' who: Abouzar +context: 'The episode’s central idea is the pragmatic, end-to-end translation of data + science and ML research into real-world product impact: owning the full stack from + instrumentation and data collection through real-time streaming inference, experimentation, + and rollout, while making practical engineering trade-offs (hardware, platforms, + team roles) and modeling human behavior and incentives to drive measurable engagement + and business outcomes.' --- - Links: * [Linkedin](https://www.linkedin.com/in/abouzar-abbaspour/){:target="_blank"} diff --git a/_podcast/to-update/s16e03-collaborative-data-science-in-business.md b/_podcast/to-update/s16e03-collaborative-data-science-in-business.md deleted file mode 100644 index 21a0e063..00000000 --- a/_podcast/to-update/s16e03-collaborative-data-science-in-business.md +++ /dev/null @@ -1,1404 +0,0 @@ ---- -title: "Context: A conversation with EasyJet lead data scientist Ioannis Mesionis that covers career roots, team structure, project intake and prioritization, the end‑to‑end data‑product lifecycle (EDA, modeling, pilots, A/B tests, rollout), stakeholder engagement, estimation and cadence, communication and soft skills, and pragmatic MLOps/monitoring choices. - -Core theme: Deliver measurable business impact by treating data science as a product — combine domain knowledge and stakeholder partnership with clear intake/DoD processes, iterative MVP experimentation, and lightweight engineering and monitoring practices so models move quickly, safely, and transparently from idea to production." -short: Collaborative Data Science in Business -season: 16 -episode: 3 -guests: -- ioannismesionis -image: images/podcast/s16e03-collaborative-data-science-in-business.jpg -ids: - anchor: atatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c - youtube: 1pExOVuCF8Q -links: - anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c - apple: https://podcasts.apple.com/us/podcast/collaborative-data-science-in-business-ioannis-mesionis/id1541710331?i=1000632860980 - spotify: https://open.spotify.com/episode/46DN6rAlufvvXaqdOomoTe?si=OMPDN8m5QZWsc5kJY8IcAA - youtube: https://www.youtube.com/watch?v=1pExOVuCF8Q - -description: Discover MLOps tactics to prioritize data products, run A/B testing and enable model monitoring for faster validation, reliable rollouts and stakeholder buy-in -intro: How do you prioritize data product work, validate models in production, and keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a practical data product operating model for tackling those challenges.

Drawing on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains a four‑phase funnel with a "single front door" intake, a Definition of Done template with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR feasibility. He breaks down when to treat work as analytics vs. research, how R&D sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies for production rollout as MLOps capabilities evolve. Technical tooling and monitoring get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection — plus pragmatic dashboarding and alerting patterns. Listeners will come away with actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder engagement, and the estimation and cadence practices that make ML teams productive -dateadded: 2023-10-29 - -duration: PT01H14S - -quotableClips: -- name: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) - startOffset: 100 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=100 - endOffset: 154 -- name: Career origin & early projects (mathematics degree, master's, internship model) - startOffset: 154 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=154 - endOffset: 443 -- name: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' - startOffset: 443 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=443 - endOffset: 512 -- name: 'Stakeholder collaboration: weekly embedded meetings and observation' - startOffset: 512 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=512 - endOffset: 675 -- name: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' - startOffset: 675 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=675 - endOffset: 840 -- name: 'Operating model for data products: four-phase funnel and accountability' - startOffset: 840 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=840 - endOffset: 923 -- name: 'Project intake & prioritization: "single front door" and cross-functional - kickoff' - startOffset: 923 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=923 - endOffset: 1057 -- name: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' - startOffset: 1057 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1057 - endOffset: 1254 -- name: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' - startOffset: 1254 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1254 - endOffset: 1272 -- name: 'Data science vs analytics: choosing technical approach and leads' - startOffset: 1272 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1272 - endOffset: 1368 -- name: 'Research & development: modeling work, sprint planning and Kanban usage' - startOffset: 1368 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1368 - endOffset: 1517 -- name: 'Pilot & A/B testing: validating models against baseline KPIs and feedback - loops' - startOffset: 1517 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1517 - endOffset: 1645 -- name: 'Production rollout: spectrum of production and evolving MLOps capabilities' - startOffset: 1645 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1645 - endOffset: 1698 -- name: 'Organizational structure: domain-focused lead data scientists (scheduling, - ops, pricing)' - startOffset: 1698 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1698 - endOffset: 1821 -- name: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' - startOffset: 1821 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1821 - endOffset: 2138 -- name: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' - startOffset: 2138 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2138 - endOffset: 2297 -- name: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' - startOffset: 2297 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2297 - endOffset: 2449 -- name: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' - startOffset: 2449 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2449 - endOffset: 2493 -- name: 'Communicating technical results: simplifying concepts for non‑technical audiences' - startOffset: 2493 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2493 - endOffset: 2710 -- name: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a helper' - startOffset: 2710 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2710 - endOffset: 2918 -- name: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' - startOffset: 2918 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2918 - endOffset: 2950 -- name: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' - startOffset: 2950 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2950 - endOffset: 3213 -- name: 'Model monitoring with Evidently: drift detection and integration plans' - startOffset: 3213 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3213 - endOffset: 3311 -- name: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' - startOffset: 3311 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3311 - endOffset: 3429 -- name: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' - startOffset: 3429 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3429 - endOffset: 3660 -- name: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' - startOffset: 3660 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3660 - endOffset: 3614 - -transcript: -- header: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) -- line: This week, we'll talk about collaborative data science in business. We have - a special guest today, Ioannis. Ioannis is a lead data scientist at EasyJet, if - you’ve heard about this airline – I certainly have because I used it a couple - of times. In his role, he works on creating data products and solving business - problems. He also leads the EasyJet MLOps team. Ioannis is also one of the graduates - of our MLOps Zoomcamp. I was quite surprised that he actually took it – with his - experience, he should have been one of the instructors. [Ioannis chuckles] But - I'm pretty happy that you, Ioannis, did take the course because otherwise, we - wouldn't be talking now otherwise. Welcome! - sec: 100 - time: '1:40' - who: Alexey -- line: Yeah. Thanks for having me and for the introduction. It's been a pleasure. - sec: 150 - time: '2:30' - who: Ioannis -- header: Career origin & early projects (mathematics degree, master's, internship - model) -- line: Before we go into our main topic of business and data science, let's start - with your background. Can you tell us about your career journey so far? - sec: 154 - time: '2:34' - who: Alexey -- line: Yeah, absolutely. Education-wise, I have a bachelor’s in mathematics and a - postgraduate in data science from Essex University. It's been fun because I wasn't - always planning to become a data scientist. Essentially, I'm Greek and this is - important, because in Greece, usually when you have a bachelor’s in mathematics, - there are not many things that you can do with this degree. You either become - a teacher – which is, although exciting, wasn't something that I wanted to pursue - – or you find a way to mix it with some other things. After I finished my Bachelor’s, - I was thinking about financial mathematics or actuarial mathematics. I didn't - know what to do. - sec: 165 - time: '2:45' - who: Ioannis -- line: Luckily, I got introduced to the notion of data science by watching Netflix - – actually, the famous Sherlock Series. There was a moment when Sherlock and John - Watson were on-screen, and John Watson was impressed by Sherlock’s decision-making - skills. I remember he asked him, “How do you make decisions that fast and so accurately?” - And Sherlock replied, “You see, but you do not observe.” So that was John's problem. - That really sat well with me, and I was thinking, “I want to improve my decision-making - skills.” And this is how I started Googling around “decision-making, inference” - and all this kind of stuff. I came across data science as a profession. That was - back in 2016, I think. So yeah, I did a master’s in data science from Essex University, - followed by a three-month internship, where I was able to develop a machine learning - model to predict children who are being abused in their current environment. That - was great because it showed me the power that lies behind data science and machine - learning in general. I knew that this was what I wanted to do. - sec: 165 - time: '2:45' - who: Ioannis -- line: After the internship, I had a four-month experience working as a data scientist - consultant at a company named AKKA Technologies in Geneva, Switzerland. After - four months, I decided to move back to the UK, where I started working as a data - scientist for EasyJet, where I'm still working. I started as a graduate data scientist, - got promoted to senior data scientist, and right now, I'm still a lead data scientist, - working with business stakeholders and trying to transform Easy to become the - world's most data-driven headline. Yeah, that's pretty much me. - sec: 165 - time: '2:45' - who: Ioannis -- line: Do you get a discount at EasyJet if you want to go somewhere? - sec: 321 - time: '5:21' - who: Alexey -- line: '[chuckles] I think that''s one of the best perks that we have. [chuckles] - Yeah, the truth is that we do and it''s an excellent discount. I use it all the - time to travel to different European cities. It''s been great.' - sec: 325 - time: '5:25' - who: Ioannis -- line: Because EasyJet is… when it comes to Berlin, I don't know about the other - cities and I'm based in Berlin – it's one of the airlines I usually use when I - want to go somewhere. - sec: 340 - time: '5:40' - who: Alexey -- line: I'm happy to hear that we're doing something good, then. [chuckles] - sec: 353 - time: '5:53' - who: Ioannis -- line: Well, in terms of coverage, it's probably one of the best ones – at least - going to Italy or some other countries. Funny that you… [cross-talk] It’s funny - that you mentioned the Sherlock TV show. Have you seen…? There is another different - TV show (an American one) called Numbers. Have you seen that one? - sec: 356 - time: '5:56' - who: Alexey -- line: Oh, that's interesting. Not really. But noted. - sec: 381 - time: '6:21' - who: Ioannis -- line: It's about a mathematician who uses his skills to solve crimes. They use statistics - and data science. Well, I wouldn't call it “data science” in the sense that you - and I mean it. But still, it's quite close. - sec: 385 - time: '6:25' - who: Alexey -- line: I'm always excited to hear about these use cases where data science is being - used for good, like the project that you just mentioned – to solve crimes or the - internship that I did. I think it's great to show how data science can serve the - people or not be present to replace people’s jobs are some of the things that - you hear from time to time. - sec: 408 - time: '6:48' - who: Ioannis -- line: Yeah, so it's called Numbers. And I think the E is spelled with a 3. So, it's - like Numb3rs. - sec: 431 - time: '7:11' - who: Alexey -- line: I think it rings a bell. - sec: 440 - time: '7:20' - who: Ioannis -- header: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' -- line: Yeah. Anyways, what do you do as a lead data scientist? - sec: 443 - time: '7:23' - who: Alexey -- line: Currently, my role as a lead data scientist is a partnership with the business - stakeholders from Digital Customer and Marketing. These are the departments that - I oversee from the data science and analytics perspective. I try to understand - their pain points and translate them into data products and data solutions that - go into production and solve whatever problem we encounter at the time. You can - think of my role as having accountability for the projects to ensure that they - reach production and, of course, we meet the financial benefits that have been - agreed upon at the beginning of every financial year. - sec: 448 - time: '7:28' - who: Ioannis -- line: In practice, what do you mean when you say that you “partner with business - stakeholders from Digital Marketing”? What does it look like in practice? Is it - you proactively reaching out to them saying, “Hey, can we talk?” Or do they reach - out to you? Or is it a combination of both? What does this collaboration look - like in your case? - sec: 489 - time: '8:09' - who: Alexey -- header: 'Stakeholder collaboration: weekly embedded meetings and observation' -- line: It's a great question. Usually, one of the things that I love about EasyJet - is that it's a really friendly environment. You can think of it as me having a - close collaboration in terms of meetings, sitting with them during the business - days, and trying to understand what decisions they have to make on a daily basis - and then trying to understand, from their perspective, what their strategies are - and what their vision is for their department, and understand how data science - can support reaching their vision. This is how it looks on a day-to-day basis - – meetings and meetups, etc. - sec: 512 - time: '8:32' - who: Ioannis -- line: So they have their usual day-to-day meetings, and you’re like, “Hey, can I - join you? I just want to observe what you do.” - sec: 552 - time: '9:12' - who: Alexey -- line: Kind of, yes. We have a recurring meeting where we discuss what they're doing, - brainstorm together to have – let's call it a framework, where we discuss their - day-to-day job and what they're trying to improve and see how I can support them - with data science. - sec: 561 - time: '9:21' - who: Ioannis -- line: So you have a monthly meeting or something like that? - sec: 582 - time: '9:42' - who: Alexey -- line: Even more frequent – weekly, actually. - sec: 585 - time: '9:45' - who: Ioannis -- line: Weekly, okay. [Ioannis chuckles] There are some leaders from these departments, - and you talk to them saying, “Hey, what’s up? What are the current problems you - have? How's it going with the previous projects we implemented for you?” And things - like that. Right? - sec: 589 - time: '9:49' - who: Alexey -- line: Absolutely. The way I frame it is – I think of the heads of the different - departments, from Digital Customer and Marketing as being my best friends in the - working environment and try to understand how I can be supportive and how I can - help them. - sec: 609 - time: '10:09' - who: Ioannis -- line: So how can you be supportive? - sec: 624 - time: '10:24' - who: Alexey -- line: '[chuckles] Exactly!' - sec: 626 - time: '10:26' - who: Ioannis -- line: What does it look like? - sec: 629 - time: '10:29' - who: Alexey -- line: Usually, it involves me getting enough business knowledge. If we talk about - the Digital [department], it involves me understanding how, let's say, how the - PPC advertisements work or how the SEO organic results work, and trying to understand - what their aim is – which metrics they're interested in and what they do on a - day-to-day basis. Then I see, “You know what? If we had a predictive model that - could do X, Y, and Z, would that benefit you?” And then we have this kind of discussion - that would essentially create some clarity on the business problem that we will - then try to tackle. - sec: 633 - time: '10:33' - who: Ioannis -- header: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' -- line: I’ve heard the term “digital department” [from you] many times but to be honest, - I have no idea what it actually means. It probably means different things at different - companies, right? [Ioannis agrees] because different companies need to do different - things. In your case, you mentioned PPC advertisement – I don't know what PPC - is – Pay Per Click, right? - sec: 675 - time: '11:15' - who: Alexey -- line: Exactly. Pay Per Click. - sec: 697 - time: '11:37' - who: Ioannis -- line: So the digital department is also some marketing stuff, right? - sec: 699 - time: '11:39' - who: Alexey -- line: Exactly. Pay per click, if you think about it, these are the sponsor ads that - you see on Google. If you go on Google, and you type “flights from London Gatwick - to Berlin,” let's say, and you press “enter,” you see the 10 results that appear - on the first page of Google. What you can see there first are usually the sponsored - ads. These are the pay-per-click ads, as they’re known. The reason they're called - “pay per click” is because there is an incurred cost every time a person clicks - on that specific ad. We're trying to, in a way, optimize sponsored ads that appear - on top. And we do the same thing for SEO results – we tag the organic URLs that - appear which are usually below the sponsor ads. In a way, it’s an optimization - that we're trying to do, so that the flights that we want to promote always appear - on top and then, hence we can improve the conversion rate. - sec: 703 - time: '11:43' - who: Ioannis -- line: The other day, I was checking the cost per click in Google for keywords like - “MLOps,” or “MLOps courses”. [Ioannis chuckles] Sometimes, for more niche words, - it's like three euros per click, and then for more broad ones, it's like four - or five, which was like, “Wow, is it that expensive?” - sec: 767 - time: '12:47' - who: Alexey -- line: Yeah, yeah. [chuckles] You have to bid on the right keywords, and then become - relevant and all this kind of stuff that is happening in Google behind the curtains. - sec: 794 - time: '13:14' - who: Ioannis -- line: For you, as a lead data scientist, you need to figure out what these people - talk about, like, “What does PPC mean?” “What do people care about?” “What is - optimization?” And then, with this knowledge that you can extract from them (learn - from them) you then go and share this knowledge with the data science team and - you say, “Okay, these are the problems that these departments are struggling with. - Let's think about how we can help them.” Right? [Ioannis agrees] And then you - translate the problems into the language of data science and then, together with - the team, you work on solving this. Right? - sec: 802 - time: '13:22' - who: Alexey -- line: Exactly. Yeah, absolutely. - sec: 838 - time: '13:58' - who: Ioannis -- header: 'Operating model for data products: four-phase funnel and accountability' -- line: In addition to communicating with stakeholders, I think you mentioned other - things – you make sure that projects reach production. What does that mean for - you? Okay, you first talked with the stakeholders, you understood that these are - the pain points they have – what happens next? What do you do next as the lead - data scientist? - sec: 840 - time: '14:00' - who: Alexey -- line: As soon as I have the problem statement defined, we have an operating model - within EasyJet that really helps us to understand, first of all, what the different - steps are that we have to take to ensure that this resolution of the problem will - reach production, and then we make sure that we adhere to all these different - steps. There's a sequence that we follow. As a lead data scientist, I am accountable - for ensuring that all of these processes are being followed. We make sure that - when the data product reaches production, it will have the impact that was expected. - And yeah, that's pretty much it in terms of my role. I can talk a little bit more - about the framework if you want me to. - sec: 863 - time: '14:23' - who: Ioannis -- line: That’s quite interesting. What are these steps and what is this operating - model? - sec: 917 - time: '15:17' - who: Alexey -- header: 'Project intake & prioritization: "single front door" and cross-functional - kickoff' -- line: Yes, the operating model that we have, I think is one of the best things that - we have created in EasyJet. I had a speech about that at the MLOps Summit. The - operating model consists of different stages – I think it's four phases, if you - will, that highlight all the different steps that we need to take to ensure that - the model will reach production. The first thing is to get clarity on the problem - statement, and this is pretty much my role. We like to call this a “single front - door,” where we take a business problem or an idea into the funnel. - sec: 923 - time: '15:23' - who: Ioannis -- line: As soon as we do this, we have a meeting where all the relevant stakeholders - come together and discuss the idea a little bit more. In attendance, you would - expect people such as the business analysts and the finance team to understand - the financial benefits that might be involved with the project, a lead data scientist, - data engineers – every single person that needs to be involved in that specific - project. As soon as we do that and we understand, “You know what? There's a real - possibility of something good in this project,” we can take this on. We prioritize - based on different ideas that have been submitted over time. And then we create - something like a priority, “You know what? This problem is the most crucial one, - so let's try to work on that first.” - sec: 923 - time: '15:23' - who: Ioannis -- line: As soon as we pick up a project, we will create the so-called “Definition - of ‘Done,’” which is at the business understanding phase, where we try to understand - a little bit more about the requirements that we need pick to make this project - a success, which business KPIs we need to influence, improve, or increase or decrease, - and how we can measure the benefits. For the latter, it means, let's say, I give - you random numbers as an outcome, how do you know whether these random numbers - are good or not? So we make sure that we create a document (the Definition of - Done document) that highlights, “This is the data product. This is what production - looks like. These are the benefits that are going to come about based on this - calculation methodology.” - sec: 923 - time: '15:23' - who: Ioannis -- header: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' -- line: A large document? - sec: 1057 - time: '17:37' - who: Alexey -- line: Not that large. Usually it's a single document – we have a template. You can - think about two to three pages, tops. - sec: 1060 - time: '17:40' - who: Ioannis -- line: Two or three, okay. - sec: 1069 - time: '17:49' - who: Alexey -- line: Yeah. It's not that bad, I think. It outlines on a high level what things - we need to make sure to deliver at the end of the day so that we don't have really - much of a moving target, if you will. - sec: 1071 - time: '17:51' - who: Ioannis -- line: I assume you have some sort of a template, right? A Google Document or maybe - a Confluence page, and then you just copy this page and fill in all the things. - sec: 1085 - time: '18:05' - who: Alexey -- line: Fill in the information. Absolutely. - sec: 1096 - time: '18:16' - who: Ioannis -- line: And you do this? - sec: 1098 - time: '18:18' - who: Alexey -- line: Not me, at this stage. I oversee the entire procedure, but usually, we would - have a business analyst having workshops with the business stakeholders who are - going to be the business accountable for the project. We try to capture every - single requirement in this Definition of Done document. - sec: 1100 - time: '18:20' - who: Ioannis -- line: Here, you don't talk about machine learning yet? It’s more about, “Okay, this - is the project and this is the impact that we expect this project to achieve. - This is how we measure this impact.” Things like that, right? You don't talk about - machine learning at all at this stage. Right? - sec: 1119 - time: '18:39' - who: Alexey -- line: Nothing at all. It just captures the definition of “done”. It captures just - the “what” of the product, not the “how”. - sec: 1142 - time: '19:02' - who: Ioannis -- line: There’s no discussion of the solution at all, right? - sec: 1151 - time: '19:11' - who: Alexey -- line: Nothing whatsoever. - sec: 1159 - time: '19:19' - who: Ioannis -- line: Okay. - sec: 1163 - time: '19:23' - who: Alexey -- line: Because at the end of the day, we may have a document and we may realize down - the line that it's not something feasible. We may know what we need to do, but - after we have established all the requirements, we may realize, “You know what, - the data is not actually there, which means that this is a no-go.” When that happens, - although it doesn't happen frequently, this is a “fail fast” scenario. Then we - say, “You know what, we cannot proceed with that. Let's take the second in line.” - sec: 1162 - time: '19:22' - who: Ioannis -- line: But this happens later, right? [Ioannis agrees] At the business understanding - step you come up with this Definition of Done document for a project, which is - like two or three pages long, and then I guess you proceed to the next step, which - is, as you mentioned, checking data and things like that. - sec: 1188 - time: '19:48' - who: Alexey -- line: Exactly. As soon as everybody has signed off on this document – the business - stakeholders, data scientist (which is me, in this case) , the data engineer, - and every single person involved – then we proceed to the next phase. This is - where the data science-y involvement starts to kick in – inception. You can think - of it as the EDA (exploratory data analysis) where we try to ensure that we have - everything that we need. That includes access to the data, if the data is already - present, any GDPR concerns that we might encounter, exploring the data sources - as in different distributions and these kinds of constraints that we might have. - Yeah, that's pretty much it. - sec: 1203 - time: '20:03' - who: Ioannis -- header: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' -- line: At which stage do you actually…? You said that this is when data science kicks - in. Is this the stage when you think, “Do I even need machine learning here or - is it more like an analytical project?” - sec: 1254 - time: '20:54' - who: Alexey -- line: Absolutely. - sec: 1269 - time: '21:09' - who: Ioannis -- line: Okay. - sec: 1271 - time: '21:11' - who: Alexey -- header: 'Data science vs analytics: choosing technical approach and leads' -- line: As soon as we kick off the inception phase, this is where the data scientists - and analysts come together, and we brainstorm about the solution – we discuss - the “how”. At this point, we understand whether this is a data science project - that would involve machine learning or data analytics, or whether it's a hybrid - between the two different sub-teams (data science and analytics). - sec: 1272 - time: '21:12' - who: Ioannis -- line: To be honest, we do have some idea, when the business stakeholders discuss - the problem, and we may have already decided at this point that this is a data - science project or a data analytics one. But at the inception phase, we’re absolutely - certain that, “You know what? This is 100% a data science project,” for instance. - It’s just the confirmation that we have of when we started. - sec: 1272 - time: '21:12' - who: Ioannis -- line: And depending on whether it is a data science project or not, I guess the - next step would be different, right? - sec: 1329 - time: '22:09' - who: Alexey -- line: Absolutely, yeah. - sec: 1335 - time: '22:15' - who: Ioannis -- line: Then if it’s not a data science project, you say, “Okay, I'm a data scientist, - I cannot help you,” and then somebody else takes this over, right? - sec: 1338 - time: '22:18' - who: Alexey -- line: Not really. I’m accountable for both the data science and analytics projects. - The only difference is that if it's an analytics project, the technical lead who - will work on the project is going to be a data analyst instead of a data scientist. - I still hold the accountability for making sure that the product is delivered - end-to-end. - sec: 1347 - time: '22:27' - who: Ioannis -- header: 'Research & development: modeling work, sprint planning and Kanban usage' -- line: So what's the next step? Or is it different for different projects? - sec: 1368 - time: '22:48' - who: Alexey -- line: Not really. As soon as you have an idea and you have defined the “how” of - solving the problem statement, this is where we move into the research and development - phase. These are the hardcore modeling steps in data science, where we follow - all the different design methodologies – sprint planning, stand-ups, retrospective - – all the usual suspects are usually there, where we discuss all the different - stories that we have defined in a Kanban board, for instance. We define sprints, - “This is the goal for sprint one, sprint two.” This is where we start building - whatever that solution might look like. We also make sure that the stakeholders - are closely working with us because you have to make sure that… It's a common - problem that we're trying to tackle so you want to make sure that the business - stakeholders are part of the team and they're not just sitting around waiting - for a delivery in three to six months’ time, depending on the complexity. So we - make sure that we tackle that as a single team. - sec: 1375 - time: '22:55' - who: Ioannis -- line: So that's why you have regular (at least weekly) meetings with them, right? - You want to keep them updated on, “What is the progress? What is being solved - right now? What stage are each of the projects?” Things like that? - sec: 1448 - time: '24:08' - who: Alexey -- line: Absolutely. Also, at the end of every sprint, which is usually bi-weekly, - we have a demo where we show, “These are the things that we have delivered.” And, - if possible, we have an actual demo where they can get a sense of what we're building - and influence some of the steps that we might take on the future sprint. They - oversee the project from the beginning all the way to the end so they make sure - that what gets delivered at the end of the day is something that they will end - up using. - sec: 1462 - time: '24:22' - who: Ioannis -- line: So I guess you also give them some sort of demo – a Streamlit App or something - like this – that they can play around with so they see, “Okay, this is not what - I meant.” Or “Yeah, this is what I need. - sec: 1499 - time: '24:59' - who: Alexey -- line: Absolutely, yeah. - sec: 1513 - time: '25:13' - who: Ioannis -- header: 'Pilot & A/B testing: validating models against baseline KPIs and feedback - loops' -- line: After the R&D phase, is there anything else? - sec: 1517 - time: '25:17' - who: Alexey -- line: Yes. Then we have the pilot phase. In the Definition of Done, we have already - defined the KPIs and the baseline that we're trying to beat. Usually, there's - an existing “as-is” process that we're trying to beat with a new solution. Then - we move into the pilot phase, which usually looks like A/B testing, where we test - the “as-is” process compared to the “to be” process and ensure that the product - that we have built improves the KPI of interest. - sec: 1522 - time: '25:22' - who: Ioannis -- line: During that time, we also collect feedback from the business stakeholders - because that can influence a second iteration of the product if needed. After - the creation of the model, usually, it's the pilot phase, to ensure that we get - the benefits that we were expecting. If that succeeds, then, I guess, it's deployment. - sec: 1522 - time: '25:22' - who: Ioannis -- line: I’m just trying to come up with a joke about the “pilot phase”. [Ioannis and - Alexey laugh] I’m not creative enough. [chuckles] - sec: 1575 - time: '26:15' - who: Alexey -- line: '[laughs] I know what you mean.' - sec: 1584 - time: '26:24' - who: Ioannis -- line: 'So okay – the steps are (the phases are): first, it''s the business understanding - phase, when we come up with this Definition of Done for a project. Then it’s the - inception phase, where people actually… In the first step, you talk about the - “what” and not the “how” but in the second step, you discuss their actual solution - and you also decide if it''s a data science project or more like an analytical - project. Then, during the R&D phase, you work on the development – the research - and development of the project. Then you also talked about how exactly you do - this – all these agile techniques. At the end, there is the pilot phase, where - you take what you developed and you see if the KPIs you defined in the Definition - of Done are actually met. Right?' - sec: 1586 - time: '26:26' - who: Alexey -- line: Absolutely. Yeah, that's correct. - sec: 1642 - time: '27:22' - who: Ioannis -- header: 'Production rollout: spectrum of production and evolving MLOps capabilities' -- line: So those are the four steps that you mentioned. Is there a fifth one after - the pilot? Like, the production part? - sec: 1645 - time: '27:25' - who: Alexey -- line: It's usually the production. As you probably already know, “production” is - a spectrum. Production might mean surfacing some insights into a Tableau dashboard, - for instance. It can be some predictions being surfaced into an external tool. - That can be all sorts of different things. Depending on what this means, we have - the appropriate, let's say, production framework, which is still being developed - at the moment. Of course, MLOps is certainly still at the beginning. But yeah, - after we see that the benefits are already there and we beat the baseline, we - roll this out to the entire market, depending on the project, of course. - sec: 1652 - time: '27:32' - who: Ioannis -- header: 'Organizational structure: domain-focused lead data scientists (scheduling, - ops, pricing)' -- line: The use cases you deal with are mostly related to marketing and similar cases - – all these campaigns. - sec: 1698 - time: '28:18' - who: Alexey -- line: Нes. Mostly Digital and Marketing. - sec: 1707 - time: '28:27' - who: Ioannis -- line: So you don't try to work with the actual planes and the schedules? - sec: 1710 - time: '28:30' - who: Alexey -- line: Not myself. But that's an excellent question because, as a data scientist, - I look after Digital Customer and Marketing, but actually we have two or three - more lead data scientists, where every single one looks after a different division - of the business. So we have a lead data scientist who looks after Scheduling and - Network, and another lead data scientist who looks after the Ops when needed, - and, of course, Pricing and Revenue. - sec: 1719 - time: '28:39' - who: Ioannis -- line: I noticed that tickets became more expensive after COVID. [Ioannis laughs] - sec: 1752 - time: '29:12' - who: Alexey -- line: I have no idea about this. [laughs] No comments. - sec: 1757 - time: '29:17' - who: Ioannis -- line: Well, you have a discount, right? [chuckles] - sec: 1761 - time: '29:21' - who: Alexey -- line: Yeah. [chuckles] - sec: 1764 - time: '29:24' - who: Ioannis -- line: I remember that a trip to Italy, before COVID, cost… Sometimes it was actually - more expensive to get the bus that goes from the airport to the city than the - actual ticket. These days are gone. Now it's more expensive to travel. - sec: 1765 - time: '29:25' - who: Alexey -- line: Yeah, I guess inflation as well. Yep. - sec: 1786 - time: '29:46' - who: Ioannis -- line: I was always wondering how companies like RyanAir can keep their costs that - low – when it's like 10 euros for a ticket. But they probably cannot anymore because - now it's different. - sec: 1791 - time: '29:51' - who: Alexey -- line: Exactly. I think it's because of the different business models that different - airlines operate under. There's a specific mindset that allows, let's say, RyanAir - to operate with tickets that have an X price compared to EasyJet or Wizz Air – - different competitors, of course. - sec: 1801 - time: '30:01' - who: Ioannis -- header: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' -- line: You already talked a little bit about Agile methodologies that you use during - the R&D phase and I was wondering if maybe you can talk more about this? How do - you structure your day-to-day work when it comes to working on data science projects? - In my experience, I remember… It was some time ago, and we tried Scrum. Maybe - I'll take a step back. My background was originally a Java developer, and Scrum - works well for well-defined developed software engineering projects. - sec: 1821 - time: '30:21' - who: Alexey -- line: But when it comes to data science, it's a little bit more ambiguous, because - you don't know whether what you will have at the end (the thing you build) will - work or not. In software engineering, it's usually less nondeterministic, let's - say. Usually, you know that you will eventually build the thing that solves the - problem, you just don't always know how long it will take. - sec: 1821 - time: '30:21' - who: Alexey -- line: When it comes to data science, you not only don't know how long it will take, - but you also don't know whether it will actually work in the end. [Ioannis agrees] - How do you structure your processes around this problem? You mentioned agile sprint - planning and Kanban – so I'm curious to know in more detail how exactly you structure - the work. - sec: 1821 - time: '30:21' - who: Alexey -- line: Yes, absolutely. Of course, I was working as a technical lead (as a senior - data scientist) which means that, now, as a lead data scientist, I don't schedule - all the agile ceremonies. But as a technical lead, when I was a senior, I did - have that experience. What I was following was all the different agile methodologies - that have been introduced – I was making sure to stick with them. What you said - about being ambiguous is actually true. Because in data science, you don't really - know what you're building until you go and actually build it. This is when you - realize whether it works or not. - sec: 1911 - time: '31:51' - who: Ioannis -- line: So what we try to do to make the process a little bit simpler – to ensure - that it's working – is we have the notion of MVPs (minimum viable products) which - means that, in the Definition of Done document, we have the list of all the requirements - that we know we have to build, which means that we kind of already have a sense - of what we're building and which direction that we'll be taking. And because we - know what we're building, it's a bit easier to estimate the time that it might - take for us to deliver a single requirement or a single feature. That doesn't - mean that we're always following Scrum – personally, I'm an advocate of Kanban, - because of the complexities that have to do with data science and machine learning. - But usually, we’re pretty good at estimating whether a specific feature is going - to take, let's say, a week and a half. Even though we may not strictly follow - the Scrum methodology, we actually have a Kanban board, and we try to put some - timelines into our schedule to ensure that, “You know what? We'll have something - built by the end of this two-week sprint.” - sec: 1911 - time: '31:51' - who: Ioannis -- line: Of course, we do this with all the different agile ceremonies that we mentioned - – we have sprint planning, which ensures that we have the different complexities - allocated to the different stories. Of course, there are many ways to do that. - At the end of the day, we do have some sense of how long something is going to - take because of the notion of MVP, and we try to stick to these two-week sprints. - sec: 1911 - time: '31:51' - who: Ioannis -- line: So you group all your work into these two-week sprints and at the beginning - of each sprint, you do some sort of planning where you decide, “Okay, for these - two weeks (for this sprint) we take this, this, and this. It will take probably - the entire two weeks to do.” Right? And then during the week… - sec: 2064 - time: '34:24' - who: Alexey -- line: Exactly, depending on the resources. - sec: 2091 - time: '34:51' - who: Ioannis -- line: The resources are the people who work on this, right? - sec: 2094 - time: '34:54' - who: Alexey -- line: Yeah. Something to add here, which also helps us estimate the different stores - and how much they're going to take, also comes at the inception phase. At the - inception phase, we dive into the data and try to understand a little bit about - the quality of the data, how much preprocessing we might have to do, or how much - time a specific implementation might take depending on the complexity of the project. - The inception phase also gives us an understanding of how much time this specific - implementation is going to take. That helps us estimate the timing a bit. - sec: 2100 - time: '35:00' - who: Ioannis -- header: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' -- line: Can you maybe walk us through the entire sprint? So, the sprint starts with - planning and I think it ends with a demo – what happens in between? - sec: 2138 - time: '35:38' - who: Alexey -- line: Yes. In between, we have daily stand-ups. Of course, it can be a written stand-up, - or an actual 15-minute stand-up, usually in the morning, where the entire team - comes together and we say, “I've been working on this story. This is the progress - I’ve made so far. This is the plan that I'm going to work on today (or for the - next couple of days). These are the blockers (if any) that I'm encountering at - the moment.” Usually, when this happens, you have a senior member jump in to support - – we make sure that all the blockers are removed so we can deliver the project - or the feature on time. - sec: 2147 - time: '35:47' - who: Ioannis -- line: Of course, depending on the complexity of the project, that can be an everyday - stand-up or every other day – it really depends. But I think what works the best, - according to my experience, is having two stand-ups per week so that it gives - time for the people to work on the different stories. And, of course, if something - goes wrong, you can always reach out to a teammate to ask for support. That's - pretty much it in terms of stand-up. And of course… [cross-talk] - sec: 2147 - time: '35:47' - who: Ioannis -- line: It’s not a very heavy process, right? What I understood is that you have this - estimate – the start of the sprint where you estimate. Then you have some stand-up - meetings during the week. Then, at the end, you have the demo. Right? That's basically - the process. So it's not very heavy. [Ioannis agrees] Because I know in Scrum, - there are all sorts of other things like grooming. I don't even remember what - else, but I remember that the backlog grooming can get quite heavy if you follow - the book and try to implement everything. - sec: 2222 - time: '37:02' - who: Alexey -- line: That's true. But I think the notion of Agile is actually being agile and seeing - what works for your team and what doesn't. We have tried with different meetings, - according to what has been proposed over time. But we have identified that this - framework that we have works great for our team and we follow this specific framework. - One of the things that Ben Diaz, who is the Director of the Data Science and Analytics - team, says is, “We have to be agile at being agile.” I think that summarizes everything. - [chuckles] - sec: 2262 - time: '37:42' - who: Ioannis -- header: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' -- line: What does estimating look like for you? Do you use something like PlanningPoker - or things like that? - sec: 2297 - time: '38:17' - who: Alexey -- line: It depends. Different teams use different techniques. We have T-shirt sizing, - sometimes we follow the Fibonacci sequence to allocate points. We also have Scrum - masters who support us in that way. We make sure that we don't use days as a way - of estimation. So, whatever has worked for the different team members over time, - it's usually the technical leader of the project who decides which method they - want to use. - sec: 2306 - time: '38:26' - who: Ioannis -- line: Yeah, interesting. So you do some sort of planning poker, right? Or? - sec: 2337 - time: '38:57' - who: Alexey -- line: Yeah, yeah. - sec: 2344 - time: '39:04' - who: Ioannis -- line: And what does it look like? I imagine that there's a meeting, and in this - meeting, you have different people –you, a scrum master, project lead, data scientists - can implement this, and then somebody (for example, you, as the project lead) - says, “Now, let's talk about this task (this story) that we are going to take - in this sprint, which is about changing the color or changing the chart on this - dashboard (or whatever).” Right? - sec: 2346 - time: '39:06' - who: Alexey -- line: Yeah, whatever that may be. - sec: 2377 - time: '39:37' - who: Ioannis -- line: Everyone says, “Okay, I think this is a very easy task.” Right? - sec: 2379 - time: '39:39' - who: Alexey -- line: Exactly, that you put that number on top. Depending on which one you think - is the most complex, you put the corresponding numbers. Yeah, this is pretty much - it. Every single team member… Of course, there are always outliers, but usually, - you have all the different stories and you say, “Okay, which one do we think is - the most complex one?” This gets allocated with that specific number, and then - we increase the complexity depending on the methodology that we use. - sec: 2384 - time: '39:44' - who: Ioannis -- line: Yeah, interesting. In your experience, does it work well? - sec: 2410 - time: '40:10' - who: Alexey -- line: I think so. There have been examples where it has worked out perfectly and, - of course, there are always [chuckles] the bad examples where you can see that - you're quite tough when it comes to timelines. But I think the bottom line is - that you have to adjust and be mindful of the fact that not everything is expected - to go well on every single project. As soon as you manage your expectations, I - think you're good. - sec: 2415 - time: '40:15' - who: Ioannis -- header: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' -- line: When it comes to business stakeholders, I assume you don't invite them to - your stand-ups, but you probably invite them to demos, right? - sec: 2449 - time: '40:49' - who: Alexey -- line: Yes, that's correct. I think that's a great way for the business stakeholders - to get a sense of what we're building because they can get an early interaction - with the tool and the direction that we're taking. They also feel like a part - of the team and that makes them more engaged in what we're building and quickly - sense that we're a team and we're trying to tackle this problem together instead - of us acting like consultants, “This is what we're building for you. Just use - it.” - sec: 2461 - time: '41:01' - who: Ioannis -- header: 'Communicating technical results: simplifying concepts for non‑technical - audiences' -- line: I also imagine that the business stakeholders – it could be the Head of Marketing - or Head of Digital, or some other Head – don't necessarily know what every C-curve - means or precision-recall and things like that. [Ioannis agrees] When it comes - to demos that are maybe a little bit more technical, they sit there and are just - like, “Okay, I don't understand this, but I trust that you’re doing your work.” - How do you deal with this – when stakeholders do not necessarily understand what - the team is talking about? Or do you maybe educate the stakeholders, educate the - team, or both? What helps? - sec: 2493 - time: '41:33' - who: Alexey -- line: I think, in cases like that, you really have to be a chameleon and this is - where soft skills come into place. When we have a demo session at the end of every - sprint, we have to make sure that we never use technical language with them, because - you have to adjust your context for a non-technical audience. I don't think there's - been a single project where we have thrown some technical jargon, if you will, - at all. - sec: 2535 - time: '42:15' - who: Ioannis -- line: You educate the team members. You can say, “Look, if you say ‘ROC curve,’ - they will be like, ‘Okay, what is that?’” So you teach them how they can present - findings, the projects, and the demos, in a way that stakeholders will understand. - sec: 2572 - time: '42:52' - who: Alexey -- line: Exactly. We never use any technical language with them. And if there's something - that you need to explain that might require some technical knowledge, we always - make sure that we use examples that can be easily interpretable compared to a - technical implementation that you have seen. For instance, if you think about - recommender systems and you want to understand how a specific person is closely - related to another, you wouldn’t say, “As a measure of understanding how close - two individuals are, we use the Keegan distance.” - sec: 2594 - time: '43:14' - who: Ioannis -- line: Instead, you put forward two examples where you say, “You see that these two - people look similar?” And you don't really need to define similar in this context, - because they can see that all the different roles, for instance, look the same, - compared to another individual that is completely on a different cluster. So when - you want to explain these kinds of technical details, you can always use an example - that would make sense for a non-technical audience. - sec: 2594 - time: '43:14' - who: Ioannis -- line: Well, I assume that this is also a skill – presenting your findings in a way - that non-technical people can understand. [Ioannis agrees] It can be even more - difficult to learn this skill, to master this skill – let's say, even more difficult - than learning machine learning, at least for technical people. - sec: 2659 - time: '44:19' - who: Alexey -- line: Potentially, yes. [laughs] - sec: 2679 - time: '44:39' - who: Ioannis -- line: People who are used to terminals and notebooks and all this stuff – going - in and presenting something to business stakeholders might not be something that - they're used to doing. So how do you educate people? How do you help them learn - this skill or master this skill? - sec: 2680 - time: '44:40' - who: Alexey -- header: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a - helper' -- line: I don't think there's an easy way. I think this comes with experience and - just making sure that you always enhance your soft skills. One of the things that - usually helps is thinking about all the different inner sentences that people - usually say, “Pitch it to me like I'm a five-year-old.” Or I think Einstein had - said, “If you can’t explain something in simple terms, you don't know it that - well.” So, I guess it's just a matter of reminding people that the people that - we have on the other side of the call don't have the technical experience that - you have, so try to speak their language and explain what you're doing like you're - speaking to a five-year-old. I guess there's no easy way to do this, it just comes - with experience and constant feedback, of course. - sec: 2710 - time: '45:10' - who: Ioannis -- line: And I guess having a five-year-old helps. [chuckles] - sec: 2764 - time: '46:04' - who: Alexey -- line: Yeah. [laughs] I can only imagine. - sec: 2775 - time: '46:15' - who: Ioannis -- line: Maybe if you don't have a kid who's five years old, you have no idea how much - knowledge they actually have. [Ioannis agrees, chuckles] I have a son. He's seven - years old. He sometimes asks me things like how GPS works. And I have no idea. - Let's say if I go on the internet and type, “How does GPS work?” then the explanation - would be super technical. Then I think, “Okay, how do you explain this to my son?” - So it's a skill. Well, one hack I found quite useful is just asking ChatGPT. I - guess everyone uses this now. - sec: 2777 - time: '46:17' - who: Alexey -- line: Oh, yeah, of course. Absolutely. I still remember the days when ChatGPT wasn't - out – I remember, I was a graduate data scientist at the time. I got the opportunity - to present something to business stakeholders. I think this is when he found out, - not in a nice way, that my ways of presenting and soft skills are not as good - [as I thought]. I remember there was a really cringe moment where I was trying - to explain why having 99% accuracy as a wider term doesn't mean anything unless - you know about the balance with the labels. Yeah, I think it didn't go well. I - think this pushed me a little bit to try to understand how I can present to someone - who doesn't have technical expertise. I think it comes with experience at the - end of the day. - sec: 2818 - time: '46:58' - who: Ioannis -- line: Actually, we can think of ourselves as five-year-old kids too, when it comes - to learning new things. For example, when I read this article about how GPS works, - I'm clueless. Okay, there are a bunch of us that try to explain it, but I don't - really understand what's happening there. So the explanation that ChatGPT gave - to my son was actually helpful for me to also understand that. I don't know if - I should say that, but maybe we can think of stakeholders as kids. [chuckles] - sec: 2865 - time: '47:45' - who: Alexey -- line: '[laughs] Yeah, I think I know what you mean. I''m really happy that all the - stakeholders that we have at EasyJet are really literate in terms of data science - and mathematics. That makes our work really, really easy. So I''m so thankful - for that.' - sec: 2903 - time: '48:23' - who: Ioannis -- header: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' -- line: Yeah. Great. Also, I actually wanted to spend a bit of time talking about - the MLOps Zoomcamp course, because I was… - sec: 2918 - time: '48:38' - who: Alexey -- line: Yeah, of course! - sec: 2927 - time: '48:47' - who: Ioannis -- line: I was really surprised when I looked at your background – I thought, “Why - would Ioannis even consider it?” Because with your experience – you're already - doing all the things you talked about right now – I'm wondering, what inspired - you to take our course? Why did you decide to take it? - sec: 2929 - time: '48:49' - who: Alexey -- header: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' -- line: Yeah, absolutely. The thing is, as a lead data scientist, my role has become - a little bit more managerial compared to the amount of time that I have to spend - doing technical stuff. And if you ask me, having a bachelor of mathematics, I'm - a geek at heart, which means that every opportunity I get to get my hands dirty - with some data and build something myself – I always take it. MLOps specifically - is, from my experience – I'm usually involved in, let's say, building the models - and I didn't get much exposure to the productionization side of things. I was - just intrigued by the course and the content. Of course, I was using MLflow, but - then we had Prefect – the data engineering team – and we have been using airflow. - And I'm like, “Let me get into that engineering side of things a little bit more - and also get the opportunity to get my hands dirty.” I think this is what clicked - for me. And I'm like, “Yeah, let me go for it.” - sec: 2950 - time: '49:10' - who: Ioannis -- line: Well, as somebody who was a lead data scientist in the past, one problem for - me was always time. [Ioannis chuckles] With all this stakeholder management, how - do I actually find time to still be hands-on and experiment with things? [Ioannis - agrees] And then sometimes, I wanted to take a course, but then I didn't have - time, because there’s only 40 hours that you spend at work. How did you solve - this problem? - sec: 3022 - time: '50:22' - who: Alexey -- line: Yeah, that's a great question. I think one of the good things about my decision - to become a data scientist is that I genuinely love the profession. I would be - a data scientist as a hobby if my day job was something different. This means - that even when I finish my work, I don't feel drained from all the information - that I had to go through throughout the day. - sec: 3053 - time: '50:53' - who: Ioannis -- line: I genuinely enjoy working as a data scientist, which means that I consider - that as an activity rather than, let's say, something that will consume my time. - So yeah, it was just great. I had my morning cup of coffee, and during the weekends, - I took my laptop, went to a nice coffee place and just watched your courses and - tried to do the assignments. It's been fun. And I got a little experience out - of it, to be honest. So yeah, it was just great. - sec: 3053 - time: '50:53' - who: Ioannis -- line: So instead of watching Netflix, you watched the courses. - sec: 3112 - time: '51:52' - who: Alexey -- line: What was that? - sec: 3117 - time: '51:57' - who: Ioannis -- line: Instead of watching Netflix, you watched the courses. Or… Maybe in addition - to. - sec: 3118 - time: '51:58' - who: Alexey -- line: Yes! [laughs] Absolutely. - sec: 3121 - time: '52:01' - who: Ioannis -- line: Okay. Well, it sounded like the course was useful for you, right? Was it mostly - like… I don't know if I should call it that – entertainment? Or more like self-educating? - Or did you also get something out of this course and apply it at work? - sec: 3124 - time: '52:04' - who: Alexey -- line: It was a little bit of both. It was entertainment in the sense that I got - confirmation that what I'm doing is correct. But also, I got the opportunity to - play with technologies that I otherwise wouldn't have time to. One of the examples - is Prefect, for instance. Because as a lead data scientist, I’m not that involved - in the engineering side of things, so I wouldn’t get the opportunity to play with - Airflow or Prefect. So I think it had a good balance of both – getting the confirmation - that what I'm doing is correct, but also learning something new. This is really - important because as you mentioned in the beginning, I'm leading the MLOps team - within EasyJet. Even though I give the guidance and have an influence on where - we're going as a data science and analytics team with our MLOps journey, it was - great for me to understand a little bit about the technical landscape. I feel - that that's the best way to influence a specific direction. So that really worked - well. - sec: 3144 - time: '52:24' - who: Ioannis -- header: 'Model monitoring with Evidently: drift detection and integration plans' -- line: Actually, before our conversation (before our interview) I had a chat with - Elena from Evidently and she said, “Oh, Ioannis is coming to your podcast? Make - sure to ask about Evidently!” [chuckles] - sec: 3213 - time: '53:33' - who: Alexey -- line: Absolutely. Evidently, I think – and I'm not afraid to say this, but I think - Evidently is the best Python library out there for model monitoring. This is something… - the final assignment that I did for the MLOps Zoomcamp also gave me the opportunity - to play with the Evidently library a little bit more. I had the time to play with - Evidently, I think, two years ago, when it was still, in a way, the dev version. - I remember the first time that I reached out to them, because I said, “You know - what? I have implemented that and it doesn't look correct.” There was actually - a bug and this is how the networking kicked in. But yeah, Evidently – absolutely - the best Python library for model monitoring. - sec: 3228 - time: '53:48' - who: Ioannis -- line: Do you use it at EasyJet as well? - sec: 3280 - time: '54:40' - who: Alexey -- line: Absolutely. We will use it to their sense of embedding that within our MLOps - framework. It's still a work in progress but we have made tremendous progress - throughout all these years. I think, especially now that we're trying to define - our MLOps capabilities, Evidently is the best thing that could have happened to - me and to EasyJet to that extent. - sec: 3283 - time: '54:43' - who: Ioannis -- header: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' -- line: Just curious – I know Evidently, right now, has its own dashboard, but what - you do is probably based on some sort of other monitoring framework, like Grafana - or something like that, right? - sec: 3311 - time: '55:11' - who: Alexey -- line: Yeah, I mean, right now we're thinking about using the Tableau dashboard and - I have a proof of concept that I'm about to present to the EasyJet MLOps team. - But before that, because I had already implemented a proof of concept, we weren't - using Grafana – we didn't have the UI. To be honest, I had implemented a custom - function that would trigger an email alert to the technical lead of the project - in case there was data drift or model drift detected. It was, I think, two to - three years ago. - sec: 3325 - time: '55:25' - who: Ioannis -- line: You mentioned Tableau, and it's interesting how versatile this tool is. [Ioannis - chuckles and agrees] It's not just a dashboard, you can even build simple, rudimentary - monitoring in Tableau. I remember we had problems with data quality and then our - analyst quickly came up with a dashboard that shows how many records there are - each day in the important tables. Then, what he did next was configure Tableau - to send an alert if the number for one of the days was less than expected. He - did that in like 30 minutes or something. That was amazing. - sec: 3361 - time: '56:01' - who: Alexey -- line: Okay. That's great. It indeed sounds amazing. Goodness. - sec: 3404 - time: '56:44' - who: Ioannis -- line: I mean, at the end, it's just a bunch of SQL queries and then knowing where - to put these queries and which button to click to create an alert, he knew how - to do this. Not everyone knows that. But it was a quick and dirty solution that - worked pretty well. It's amazing. - sec: 3407 - time: '56:47' - who: Alexey -- line: Yeah, that's good. It's always exciting when someone delivers something that - fancy in such a short period of time. - sec: 3423 - time: '57:03' - who: Ioannis -- header: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' -- line: Yeah, I think we should be finishing soon. So maybe I'll ask you one thing. - We talked a lot about communicating with business stakeholders, we also talked - about Agile processes. We talked a little bit about MLOps. Are there any good - resources that you can recommend to our listeners who want to learn more about - these topics? - sec: 3429 - time: '57:09' - who: Alexey -- line: About which topic specifically? - sec: 3456 - time: '57:36' - who: Ioannis -- line: Well, about any of those that we discussed – let's say, about processes, about - communicating with business stakeholders? When you were learning how to do your - job well, maybe you came across some books or courses that helped you. - sec: 3459 - time: '57:39' - who: Alexey -- line: There is a single resource that I would recommend to every single aspiring - data scientist/data analyst to watch out for. I'm not sure if you know Cassie - Kozyrkov – she’s the Decision Intelligence Advocate for Google, at least she used - to be – she resigned. But Cassie Kozyrkov and her course on YouTube, Making Friends - with Machine Learning, I think, is the best resource out there, in order to understand - how you can communicate technical details to a non-technical audience. I think - the way she speaks and expresses these kinds of technical details in such a nice - and direct way, is one of the best skills that someone can get. And I think, watching - her YouTube videos helped me to really understand “What would be the best way - to explain a technical term to someone that is not familiar with my world and - data science in general?” - sec: 3478 - time: '57:58' - who: Ioannis -- line: I spent, I think, countless hours watching her videos, trying to analyze the - way that she approaches things, terms, or explains how linear regression works. - So if you want, Cassie Kozyrkov from Decision Intelligence from Google – her YouTube - videos, Making Friends with Machine Learning. At least this is how to communicate - to a non-technical audience. When it comes to technical details, I think different - books like, Pattern Recognition from Gibson is one of the best books that you - can go with. It's really heavy, so you have to make sure that you're comfortable - with mathematics. - sec: 3478 - time: '57:58' - who: Ioannis -- line: In many senses – because I remember we used this book for my machine learning - classes and it was heavy for the class too. [chuckles] - sec: 3584 - time: '59:44' - who: Alexey -- line: It was heavy, indeed. But I'm telling you, if you spend time and you actually - focus – let's say you have a two-hour block of time and you go through that, it's - one of the best things that you read to understand the mathematics behind machine - learning and how it really works. Of course, LinkedIn helps a lot with different - posts and resources that are being recommended. I think on a day-to-day basis, - LinkedIn is my go-to resource website. - sec: 3594 - time: '59:54' - who: Ioannis -- line: Cassie… I think this is how I know her – from LinkedIn. I don't know if she's - active anymore, but she used to be quite active on LinkedIn and this is where - I went to see her content. - sec: 3622 - time: '1:00:22' - who: Alexey -- line: She is amazing, yeah – podcast, YouTube, LinkedIn, of course. I think she - was all over the place. I think now she's building something on her own. This - is why she left Google. And I'm really interested to see what this is going to - be. I know this is about decision-making and decision intelligence, which is something - she has established on her own. So yeah, I'm really looking forward to seeing - her content. - sec: 3636 - time: '1:00:36' - who: Ioannis -- header: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' -- line: Yeah. Thanks, Ioannis, for joining us today, and for sharing all that you - shared with us today. Yeah, it was amazing. Thanks for finding time. And thanks, - everyone else, too, for joining us and being active here. I think… I actually - forgot – we had only one question that I accidentally forgot to mention. Is it - okay, Ioannis, if Dave reaches out to you on LinkedIn and asks this question? - sec: 3660 - time: '1:01:00' - who: Alexey -- line: Yeah, absolutely. I'm always open. I'm super active on LinkedIn. Any question, - whatever that may be – feel free to reach out on LinkedIn and I’ll make sure to - get back to you. - sec: 3694 - time: '1:01:34' - who: Ioannis -- line: Okay, thanks. And with that, I guess we’re finished. - sec: 3707 - time: '1:01:47' - who: Alexey -- line: Amazing. Thanks for having me! - sec: 3711 - time: '1:01:51' - who: Ioannis -- line: Yeah. Thanks. Bye, everyone. - sec: 3714 - time: '1:01:54' - who: Alexey ---- - -Links: - -* [LinkedIn](https://www.linkedin.com/in/ioannis-mesionis/){:target="_blank"} -* [Github](https://github.com/ioannismesionis){:target="_blank"} -* [Website](https://ioannismesionis.github.io/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md b/_podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md deleted file mode 100644 index 22669225..00000000 --- a/_podcast/to-update/s16e04-from-marketing-to-product-owner-in-search.md +++ /dev/null @@ -1,1065 +0,0 @@ ---- -title: "Context: A marketer-turned-product owner describes moving from performance marketing into product roles at AUTODOC, relocating to Germany, learning product ownership on the job, forming a dedicated e-commerce search team, choosing processes (Scrum vs Kanban), structuring one-on-ones, recruiting remotely, upskilling in search and NLP, and leveraging marketing strengths for user insight, internal influence, and roadmapping—plus recommended resources and closing advice on breaking barriers and continual learning. - -Core narrative: Adaptive translational leadership—using marketing-honed user empathy, communication, and persuasion as the bridge to technical product impact: learning rapidly on the job, shaping pragmatic team structures and processes, recruiting and upskilling to close technical gaps (search/NLP), and continuously experimenting to deliver user-centered e‑commerce solutions." -short: From Marketing to Product Owner in Search -season: 16 -episode: 4 -guests: -- lerakaimashnikova -image: images/podcast/s16e04-from-marketing-to-product-owner-in-search.jpg -ids: - anchor: atatalksclub/episodes/From-Marketing-to-Product-Owner-in-Search---Lera-Kaimashnkova-e2b33qt - youtube: -HbQQ_bVdfE -links: - anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Marketing-to-Product-Owner-in-Search---Lera-Kaimashnkova-e2b33qt - apple: https://podcasts.apple.com/us/podcast/from-marketing-to-product-owner-in-search-lera-kaimashn%D1%96kova/id1541710331?i=1000633617858 - spotify: https://open.spotify.com/episode/540Mzul8eaulfqettzAHJH?si=OJWEa8NqSIaviV3zMyzL6Q - youtube: https://www.youtube.com/watch?v=-HbQQ_bVdfE - -description: Discover e-commerce search with Elasticsearch & NLP, plus product owner tactics, roadmap prioritization, team-building and hiring tips to boost conversions -intro: 'How do you move from performance marketing into a product role and build scalable e-commerce search with Elasticsearch and NLP? In this episode, Lera Kaimashnіkova — an e-commerce Product Owner focused on site search optimization, analytics, and conversion — walks through that exact journey. She explains transitioning from B2B marketing to product ownership, landing a PO role at AUTODOC, and relocating to Germany while learning the craft on the job.

You''ll hear practical approaches to structuring one‑on‑ones, owning monitoring, roadmaps, and prioritization, and choosing Scrum for deliveries vs. Kanban for investigations. Lera covers forming a dedicated e‑commerce search team and the technical side of relevance engineering: Elasticsearch, autocomplete, search filters, vehicle selector and part fitment flows, plus NLP and information retrieval learnings from Haystack and ChatGPT experimentation. She also discusses recruiting remote developers, why marketing backgrounds are valuable for product roles, and resources for communication, strategy, and experimentation.

If you’re responsible for site search, product discovery, or making the leap into product ownership, this episode delivers concrete tactics for improving relevance, boosting conversion rates, and growing your technical and team capabilities.' -dateadded: 2023-11-05 - -duration: PT01H02M19S - -quotableClips: -- name: Podcast Introduction & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=0 - endOffset: 111 -- name: 'Background: Transition from performance marketing to product roles' - startOffset: 111 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=111 - endOffset: 134 -- name: 'Marketing Experience: B2B e‑commerce, lead acquisition, branding' - startOffset: 134 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=134 - endOffset: 566 -- name: Landing Product Owner Role at AUTODOC Despite Non‑traditional Fit - startOffset: 566 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=566 - endOffset: 623 -- name: 'Relocation: Moving from Ukraine to Germany during 2020' - startOffset: 623 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=623 - endOffset: 704 -- name: 'Transition Challenges: Learning product ownership on the job' - startOffset: 704 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=704 - endOffset: 996 -- name: 'One‑on‑Ones: Structuring meetings to align with engineers and QA' - startOffset: 996 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=996 - endOffset: 1347 -- name: 'Product Owner Scope: Monitoring, roadmap, prioritization, team operations' - startOffset: 1347 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1347 - endOffset: 1727 -- name: 'Process Choices: Scrum for deliveries vs Kanban for investigations' - startOffset: 1727 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1727 - endOffset: 1790 -- name: 'Team Building: Forming a dedicated e‑commerce search team' - startOffset: 1790 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1790 - endOffset: 2072 -- name: 'Search Expertise: Relevant Search book, Elasticsearch, and relevance as business - context' - startOffset: 2072 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2072 - endOffset: 2348 -- name: 'User Journey: Vehicle selector, part fitment, and contextual search flows' - startOffset: 2348 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2348 - endOffset: 2571 -- name: 'Technical Upskilling: NLP, information retrieval, Haystack conference, and - ChatGPT' - startOffset: 2571 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2571 - endOffset: 2802 -- name: 'Recruiting: Remote roles, office hubs, and open developer positions' - startOffset: 2802 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2802 - endOffset: 2909 -- name: 'Hiring Criteria: Why marketing backgrounds are valued for product roles' - startOffset: 2909 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2909 - endOffset: 3136 -- name: 'Marketing Strengths: User understanding, internal PR, and pitching roadmaps' - startOffset: 3136 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3136 - endOffset: 3486 -- name: 'Recommended Resources: Communication, Professional Product Owner, strategy, - experimentation' - startOffset: 3486 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3486 - endOffset: 3775 -- name: 'Closing Advice: Breaking mental barriers and committing to continuous learning' - startOffset: 3775 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3775 - endOffset: 3739 - -transcript: -- header: Podcast Introduction & Guest Welcome -- line: This week, we'll talk about transitioning from marketing to being a product - owner in search. And we have a very special guest today Valeria. Valeria is a - product owner with a focus on e-commerce, site search optimization, analytics, - team management, and product development – and I think there are more things that - I omitted because your biography is quite long and extensive. Today, Valeria will - share her experience with us. Welcome! - sec: 79 - time: '1:19' - who: Alexey -- line: Thank you so much. Thank you for joining. - sec: 108 - time: '1:48' - who: Lera -- header: 'Background: Transition from performance marketing to product roles' -- line: Before we go into our main topic of transitioning from marketing to being - a product owner, let's start with your background. Can you tell us about your - career journey so far? - sec: 111 - time: '1:51' - who: Alexey -- line: What do you mean by career journey? - sec: 121 - time: '2:01' - who: Lera -- line: Your journey of how you ended up in the place where you are right now. What - did you do before? - sec: 126 - time: '2:06' - who: Alexey -- header: 'Marketing Experience: B2B e‑commerce, lead acquisition, branding' -- line: Oh, okay – fine. Well, as you learned before, I started with marketing. Actually, - I was always focused on IT. Working in Product was my vision of my career, but - to start being a product manager/product owner, you need to start from somewhere. - Basically, there was an opportunity to join Ringostat, which is a SaaS that provides - analytics and call-tracking for marketers. They had an open position of marketer - – it was like performance marketing, so I was in charge of doing promotions and - doing some events. My responsibility was to acquire leads – to generate leads - and to make them our customers. - sec: 134 - time: '2:14' - who: Lera -- line: For example, some very interesting stuff that I did – our customers were B2B - (other companies) and marketing the work for e-commerce as well. I did some interesting - events, for example, there was a rating of PPC agencies. That's how I grabbed - the attention of all our customers and provided something of value for them. For - this rating stuff, we also acquired leads. My first step in Performance Marketing - was acquiring leads and making promotions and all this advertising stuff. So after - Ringostat, I went to another company. It was a logistic holding. We have eight - companies inside this holding. Basically, it was about transportation, about oil - (we have gas stations) and they also have e-commerce that sells car parts. That - was a little bit of a journey for my career. - sec: 134 - time: '2:14' - who: Lera -- line: Basically, it was a small marketing department and we also did pretty much - everything. I was also in charge of Performance Marketing and we did some promotions - for each company. For example, we did a promotion for a logistics company, made - events, and so on. Also, it was like internal outsourcing marketing for all these - companies in the holding. The other great task was to make branding for gas stations. - I also loved this task because I was analyzing and making a “job to be done” framework, - creating great messaging in our branding, and we released this branding. The other - task was to launch a website that sells car parts. We were limited in our assortment - – we just sold tires. This e-commerce was also B2B because we sold to logistic - companies. It was stuff for trucks. We sold batteries for huge trucks, tires for - them, and oil, and all this stuff. I worked there for one year. It was based in - Odesa, Ukraine. - sec: 134 - time: '2:14' - who: Lera -- line: As you know, in 2020, the war in Ukraine started. Because I was in marketing, - it's become… I realized I needed to move to Europe, because of the situation there. - That's how I left my job at the previous company. I realized it was marketing - for the internal market in Ukraine, and so I would need to change my whole career - – to change everything I was doing – and to move to another country to focus more - on the international position. This was where the challenge began. Basically, - I didn’t really have experience being a product owner. I didn't have much experience - in e-commerce because my e-commerce experience was for B2B users. It was not about - promotions and stuff. I didn't have much of this experience. That's where the - journey was about breaking barriers in your head. - sec: 134 - time: '2:14' - who: Lera -- line: So, I was trying to find a new job because I lost mine. I did several interviews, - and while I was doing interviews, I actually started learning to code. I was learning, - for example, JavaScript, HTML, and CSS, while I was in this transition period - – I wanted to learn to code more. I really had time for this, too. It was mind-blowing - for me at first. It was hard. I really struggled to learn how to code. But basically, - I ended up in a position where I could code a website, for example. When I was - looking for a new job I used this time to learn new stuff – it was hard stuff - for me because it was completely different from what I did before. - sec: 134 - time: '2:14' - who: Lera -- line: How I joined AUTODOC is a great story. I just tried different channels of - how to find a new job and I wrote to my colleague, with whom we used to work at - Ringostat. I just wrote to him, “How are you doing? How are you doing with this - whole situation in Ukraine? Where are you now?” And he said, “I'm working at AUTODOC - right now.” And asked, “Yeah, actually I'm looking for jobs. Maybe you have some - open positions, guys?” And he said “Yeah, why not? I should ask people.” The recruiter - sent me a job description and it had nothing to actually do with my knowledge - – it was so different. - sec: 134 - time: '2:14' - who: Lera -- header: Landing Product Owner Role at AUTODOC Despite Non‑traditional Fit -- line: It was the product owner position, right? - sec: 566 - time: '9:26' - who: Alexey -- line: Yeah. Yeah, it was a product owner position. The requirements were something - like being a product owner in e-commerce for two years, knowing a lot about car - parts, and everything like that. [cross-talk] - sec: 568 - time: '9:28' - who: Lera -- line: But you knew something about car parts already from your previous logistics - experience. [Lera agrees] So it was still a match, to some extent, right? - sec: 580 - time: '9:40' - who: Alexey -- line: Yes. I use this experience to sell myself [chuckles] and to be more relevant. - We went to some interviews and I think they liked me. So that's how I joined AUTODOC. - From the beginning, at AUTODOC, I can share my challenges regarding how to transform - a new position from the lack of… [cross-talk] - sec: 587 - time: '9:47' - who: Lera -- line: Yeah, I definitely have questions about that. - sec: 620 - time: '10:20' - who: Alexey -- line: Yeah. Okay. - sec: 622 - time: '10:22' - who: Lera -- header: 'Relocation: Moving from Ukraine to Germany during 2020' -- line: I'm just curious. When the war started and you left Ukraine, did he go immediately - to Berlin? Or did you go to some other place? - sec: 623 - time: '10:23' - who: Alexey -- line: No, no. It was hard to find an apartment, actually. Maybe you know something - about this. We started from a small town in Germany, but then I realized it was - too small to live there, so we went to Breslau. And from Breslau, then I moved - to Berlin. But I moved to Berlin while already having been employed at AUTODOC. - sec: 631 - time: '10:31' - who: Lera -- line: Aha, so you have offices in both Breslau and Berlin. - sec: 658 - time: '10:58' - who: Alexey -- line: We don't have offices in Breslau, it's remote. - sec: 662 - time: '11:02' - who: Lera -- line: Okay. So you lived in Breslau, you contacted your ex-colleague from the previous - company, and he forwarded you a job description. You didn't really fit this job - description, but you still tried to interview and got this job, and then eventually - moved to Berlin. - sec: 666 - time: '11:06' - who: Alexey -- line: Yeah, that's true. - sec: 682 - time: '11:22' - who: Lera -- line: Okay. I think I interrupted you. You wanted to tell us about the challenges, - right? Your prior experience was in performance marketing. [Lera agrees] You were - talking about things like PPC campaigns, which is… What does it mean, actually? - PPC? - sec: 684 - time: '11:24' - who: Alexey -- line: Pay per click. - sec: 703 - time: '11:43' - who: Lera -- header: 'Transition Challenges: Learning product ownership on the job' -- line: Pay per click! Exactly. And now you're a product owner, right? Which is… I - don't know how related it is to Performance Marketing. Maybe there is some relation, - maybe not. But I'm curious, what were the challenges? How did you transition? - What did you need to do for this? - sec: 704 - time: '11:44' - who: Alexey -- line: Yeah, that's an interesting part. The first challenge was that I needed to - learn a lot. I spent… I worked a lot. I worked until late-late evening to become - an expert in this field. I learned from very different perspectives. For example, - first, I needed to learn what product ownership is. I took a bunch of courses. - For example, there was a special course for product ownership in Ukraine – I wanted - to take a deep dive into this role and what it actually means and… [cross-talk] - sec: 726 - time: '12:06' - who: Lera -- line: But you were already employed. [Lera agrees] So you already got hired and - you thought, “Okay, what is this product owner thing?” [chuckles] “Let me check.” - sec: 775 - time: '12:55' - who: Alexey -- line: '[chuckles] Yeah, that''s a funny part. But I really did a lot to learn. I - read so many books. For example, there is a book called Professional Product Owner. - It describes what the role of product owner is. Basically, the product owner is - a role in Scrum. But in our company, the product owner is more of a product manager. - It''s just a title, but we do pretty much everything that the product manager - does. Currently, we are in a tech transformation, and our titles would actually - be product managers. Really, the first thing I did was take a deep dive into what - the product owner role is and what this person does. I went to courses, and I - was reading books – I’ve done a lot of this. I have some knowledge on how… For - example, in my previous companies [of employment], people were doing the same - thing.' - sec: 784 - time: '13:04' - who: Lera -- line: At Ringostat, we had pretty much the same positions. It was the Visionary - Officer and the Project Manager. I basically grabbed some patterns from those - people and put those patterns into doing this role. Then I took a really deeper - dive into this role. The second challenge… I also wanted to say that, when you - transition from another position, one thing that would be super helpful is to - build connections with your colleagues and your coworkers because those people - will help you. They will teach you. My tip for this is to learn from everyone. - I learned a lot from Quality Assurance Engineers. They taught me about the product. - I also learned a lot from engineers – from them, I learned the logic of the product - and the search. I also learned from business analysts and I learned a lot from - data analysts. - sec: 784 - time: '13:04' - who: Lera -- line: I communicate with a cross-functional team and I spend really, really quality - time in one-on-one meetings. So build connections, learn from them, and deeply - understand the product. If I were to give a tip to myself back when I started, - it would be to communicate more – to do more one-on-ones and communicate with - different people, at different levels. They will all generate new insights – they - will teach you how to be more insightful and more productive. Learn internal stuff - in the company to understand the context. What else do I need to share? Basically, - about how I learned data – for example, data analysis. - sec: 784 - time: '13:04' - who: Lera -- header: 'One‑on‑Ones: Structuring meetings to align with engineers and QA' -- line: I'm curious about a few things. You mentioned you had two problems. The first - problem was that you needed to learn a lot. What you did to solve that was take - courses, and worked into late evenings – you tried to immerse yourself in the - job to learn as much as possible. Then the second thing was building connections. - You would speak with pretty much everyone in the company to learn from them – - how they work, what they work on, what kind of problems they have, and learn about - the company. I think this thing (this communication) these one-on-one meetings - are really important for everyone, regardless of the role, be it product manager, - product owner, data scientist, software engineer – anyone. So I'm just curious, - for these one-on-one meetings – let's say you want to speak with a QA engineer - or a business analyst or a data analyst. - sec: 996 - time: '16:36' - who: Alexey -- line: How do you structure this meeting? First, I guess you need to approach a person - saying, “Hey, Martin, (or Hey, Theresa). I want to have a one-on-one meeting with - you.” Right? So you agree to a meeting. But then, what happens next? How do you - structure this meeting? How do you get the most out of this meeting? Is it just - free-form or do you have some structure? - sec: 996 - time: '16:36' - who: Alexey -- line: Yeah, I want to share a specific case. When I joined, they already had development - going on – they had a specific feature to deliver. Basically, I realized that - we kind of had a miscommunication with one developer – we just didn't hear each - other. I understood that we could not find common ground – I said one thing, he - said another, and we were losing time. We did communications in JIRA and the communication - just wasn't aligned. So I decided to set up a one-on-one meeting to fix the situation - and find common ground. - sec: 1075 - time: '17:55' - who: Lera -- line: With the developer, right? - sec: 1132 - time: '18:52' - who: Alexey -- line: 'With the developer – right. The structure was like this: I will try to remember - the questions I asked him. For example, I asked him what his vision of the best - product owner was – what did he expect from me, being the product owner? The second - question was, “Who do you consider to be the best product owner in the company?” - He described what he expected from me and he described a great product owner who - was already in our company – whom I needed to learn from.' - sec: 1133 - time: '18:53' - who: Lera -- line: What I learned from him was that a product owner was supposed to be more confident - and pushier – to be a leader – and he said that was lacking for me. This was great - feedback. Developers need a person (a product owner) who will be a great leader. - Also, we were making some small talk – we talked about things outside of the job. - I actually learned that he loved tennis and I also play tennis, so we made some - small talk about tennis. - sec: 1133 - time: '18:53' - who: Lera -- line: You connected on this basis. - sec: 1223 - time: '20:23' - who: Alexey -- line: '[chuckles] Sorry?' - sec: 1226 - time: '20:26' - who: Lera -- line: You connected because you share the same hobby. So you made a connection. - sec: 1227 - time: '20:27' - who: Alexey -- line: Yeah. I asked him to give me some tips on how to improve my job and what I - do [for him]. He shared his opinion. I also asked how we could improve our work - process and what he thought our gaps were. I just wanted to collect his feedback. - This feedback was really important to me, naturally, and I really wanted him to - just share his concerns and everything. After everything, after this meeting, - this person said, “Yeah, okay. I understand you.” He wasn't angry anymore. - sec: 1230 - time: '20:30' - who: Lera -- line: He understood that I just came there and I was a newbie. He said, “Okay, you - will learn. Okay, Valerie – I'm fine with you. It's okay.” So in one hour, you - can build the connection and that's how you take it step-by-step and become a - better specialist – more mature, more confident. You learn more. Now, if you compare - me with the person I was in the beginning – it’s two different people. [chuckles] - sec: 1230 - time: '20:30' - who: Lera -- line: So you needed to find common ground with the developer and you couldn't find - it. Maybe this is more like a “step back” question because we did not really discuss - what product owners do. I was wondering, why was it important to have good communication - with this developer? What's the role of a product owner? - sec: 1313 - time: '21:53' - who: Alexey -- line: Yeah, we didn’t discuss this. [chuckles] - sec: 1337 - time: '22:17' - who: Lera -- line: You said that you took a lot of courses to figure out what your job actually - is. So, what is your job? - sec: 1339 - time: '22:19' - who: Alexey -- header: 'Product Owner Scope: Monitoring, roadmap, prioritization, team operations' -- line: '[chuckles] It''s a funny one. The funniest thing is how [different] product - owners are from what they’re considered to be in literature and how they are in - real life. I think it''s kind of different. I will share with you the reality - of e-commerce, actually. As I said before, I''m in charge of search, which is - when you type something and search in e-commerce. One part of that is monitoring - the metrics. Basically, I''m in charge of keeping an eye on the existing products. - We are e-commerce, so we already have our search working. The first thing I need - to do is monitor our day-to-day metrics – our conversion rate, the search popularity, - and all this stuff. This is one part of my job. If there are some critical bugs, - I need to fix them somehow – to find a developer and to avoid making the company - losing money.' - sec: 1347 - time: '22:27' - who: Lera -- line: I'm in charge of making sure that the functionality works fine. Sometimes - we do have some issues, for example, with updating products. I also need to control. - This is simply one part. The other part (the real product owner/product manager - part) is we need to have a clear vision of where we are going – where our product - is going. You need to have a detailed roadmap with those JIRA Epics and User Stories. - The product owner prioritizes… The product owner is a value maximizer – this person - needs to prioritize the most valuable features and say, “We’re doing this first, - and the next feature would be this one.” Basically, it’s roadmapping, planning, - defining the vision of the product and the strategy – that’s basically the product - owner’s role. - sec: 1347 - time: '22:27' - who: Lera -- line: The other thing I need to do is team management. I think I can say that I - sometimes perform the role of a Scrum Master or Project Manager, which is how - you just organize the work to deliver some features. Frankly speaking, we don’t - have unstoppable development – what I want to say is that we have some initiatives - that we deliver. For example, this year, there was some time when we didn't actively - work on search because the company had different initiatives, such as migrating - from a regular website to an adaptive website. I was taking on the responsibility - of transitioning the existing website to an adaptive website. I didn't participate - as a Scrum Master in this kind of initiative – I was doing a different role. - sec: 1347 - time: '22:27' - who: Lera -- line: Sometimes we have initiatives that are very dedicated to search, for example, - delivering filters, or a new auto-complete. For these kinds of parts, I take on - the role of Scrum Master to organize our development process. I also do this kind - of stuff. I mentioned roadmapping, but I didn't mention that I do research as - well. Basically, I monitor competitors, I read a lot about search – I was very - surprised that search is such a deep, deep topic. You need to know about machine - learning, natural [language] processing, you need to know about algorithms, about - Elasticsearch, and how to do the autocomplete, you need to know about product - structure and all the attributes of the searches. - sec: 1347 - time: '22:27' - who: Lera -- line: Also, you need to know the users, actually – what's the business context of - search, what problems are users trying to solve by searching for car parts, for - example? You need to know different query types. For example, users can search - in different ways – they can search by part numbers (OEM numbers) or they can - search by part name, or they just type something like, “I want brake pads Brembo - on my Audi a4,” and you need to know how to make your search understand all the - semantics. - sec: 1347 - time: '22:27' - who: Lera -- line: That’s a lot. [Lera agrees] You’ve been talking for 10 minutes and I'm taking - notes – the entire page is filled with what you do. [chuckles] - sec: 1664 - time: '27:44' - who: Alexey -- line: Yeah, I'm jumping a lot. I want to structure it a little bit. First, it's - to monitor how things are going right now, under the current project. Second, - you need to do research and understand users, understand competitors, and understand - patterns of users’ searches. The other part is team management – how to organize - the work. [cross-talk] - sec: 1672 - time: '27:52' - who: Lera -- line: You follow Scrum, right? All these estimating meetings, daily stand-ups, retrospectives - – you organize all that, right? - sec: 1710 - time: '28:30' - who: Alexey -- header: 'Process Choices: Scrum for deliveries vs Kanban for investigations' -- line: Yes, depending on the initiatives we have. If we, for example, have to deliver - filters – we need two developers and two QA engineers, and that's pretty much - it. For this kind of development process, for example, we did stand-ups, planning, - and retrospectives. But for the other initiative, we needed to investigate new - technologies, so you don't really need all these ceremonies. You just do Kanban, - where it's “needs to be done,” “in progress,” “done”. For different initiatives, - it’s different types of… - sec: 1727 - time: '28:47' - who: Lera -- line: From what I understood, you work with multiple teams, right? - sec: 1787 - time: '29:47' - who: Alexey -- header: 'Team Building: Forming a dedicated e‑commerce search team' -- line: Yeah, that's the interesting part. Because right now, we don't really have - a super-dedicated search team. But I'm looking forward to having one. Before we - just took developers for some initiatives. For example, for some kind of initiative, - you just grab some people – to deliver this feature, you need those people and - you acquire those people – you have this initiative and you deliver it. But we - are going to have dedicated teams – for example, dedicated to search – because - now, in our roadmap, we have natural language processing, we have machine learning, - and I think it requires a lot of context understanding. That's why I’m trying - to form a team around search right now. - sec: 1790 - time: '29:50' - who: Lera -- line: So, right now, in addition to all these things you mentioned, you're also - building a search team. - sec: 1855 - time: '30:55' - who: Alexey -- line: Yeah! [chuckles] - sec: 1860 - time: '31:00' - who: Lera -- line: Okay, I just want to summarize what you said. First, you monitor business - health (search health) if I can say that. [Lera agrees] The second thing is, you're - doing research, you're talking with users, you're watching what competitors are - doing so you know if you need any new features or things like that. Then you do - this team operational stuff, which is Scrum/Kanban – all these processes and rituals. - Then you mentioned a more strategic part, or defining a clear vision of where - you're going, and then from that vision, building a roadmap – that's another thing. - sec: 1862 - time: '31:02' - who: Alexey -- line: Then, I remember you talked about actually learning all these things – learning - about NLP, learning about machine learning. And now I think I understand why – - because you need to build a team and you want to know what kind of things they - need to know. Right? What kind of experience you need in the team, what kind of - knowledge you need in the team – you need to know that in order to build the team. - sec: 1862 - time: '31:02' - who: Alexey -- line: Yeah. For example, I need to know natural language processing is data science - stuff. Actually, it's not just me. You might have the impression that I do all - this stuff and it's like a T-shaped person. But we still have, for example, the - research team – at AUTODOC, there is the research department and they do in-depth - analytics and all this stuff. So I don't do it by myself. I don't do user interviews, - as you said – we have a special department for that. And also we have… [cross-talk] - sec: 1936 - time: '32:16' - who: Lera -- line: You still need to know that, “There was this interview, and this is the outcome - of this interview. These were the questions.” And perhaps you even watch the videos - of the interview to see how users actually use the app and then you see, “Okay, - something is wrong here. Maybe we should change the flow of the search.” Right? - sec: 1977 - time: '32:57' - who: Alexey -- line: Yeah, that’s true. My message was that I don't do it by myself, but we have - a special department for this. But I still need to read these studies and everything. - sec: 1994 - time: '33:14' - who: Lera -- line: It’s just that you don't code yourself, but you need to translate what the - researchers found into what actually needs to happen. You’re this glue that kind - of links these departments. Right? - sec: 2007 - time: '33:27' - who: Alexey -- line: Yeah. I say that being a product manager is like being a mini-CEO of the product. - Maybe you've heard this and it's true – you’re kind of responsible for everything - and if you have some kind of issues with the product, you need to react. That's - a tricky position, I know. But it's still very interesting. Some people like this - – more generalists and T-shaped people – I like to learn from different perspectives. - So I think this is fine for me, to not be a super-narrow specialist, but more - of a wide person. - sec: 2025 - time: '33:45' - who: Lera -- header: 'Search Expertise: Relevant Search book, Elasticsearch, and relevance as - business context' -- line: How do you keep up with all that? You mentioned that, in addition to all that, - you also need to learn about all this machine learning stuff. Actually, this is - how I found you. There was a post that you made about Relevant Search – the book. - Duke, the author, liked the post and it appeared in my feed. Duke was already - a guest multiple times at DataTalks.Club. I saw his reaction and then I read the - post and I thought, “Hmm… Interesting. I should invite Valeria.” So why did you… - sec: 2072 - time: '34:32' - who: Alexey -- line: The book is called Relevant Search and, as far as I remember, the book is - about – it's a very technical book. [Lera agrees] It's about using Elasticsearch. - It's Elasticsearch, right? It’s a search engine for building searches. So how - did you come across this book? Why did you decide to read it? - sec: 2072 - time: '34:32' - who: Alexey -- line: I think it's good to mention. It’s because I see that I need to understand - all this stuff to communicate with developers, for example. I was in some communities - of Elastic, and I saw that people were sharing this book. Actually, this book - is promoted to be a good one – like the Bible of Search or something. I found - that it's pretty useful to read about search. Regarding why I wrote this post - – because, in the introduction of this book, they just pointed out some of my - pain points. They just described the importance of search and how hard search - is. [chuckles] - sec: 2128 - time: '35:28' - who: Lera -- line: The main point is relevance, which is very connected to the business context. - You need to educate developers to understand the business context because you - cannot just build a relevant search from scratch – you need to tune the search - to be relevant. You also need to give the developers this context – what is relevant - for users, for example. I really like this thought, this expression. We need to - work on relevance. You cannot just grab some universal search and it will be super - fine. You need to tune it for your business. That's what I wanted to post on LinkedIn - – this thought that I really liked. The second part of the post was about cross-functional - collaboration. So it's not about requirement-driven development, it's more about - a product mindset – everyone should understand, as I said, the business context - and why it's important for users to get these kinds of results. - sec: 2128 - time: '35:28' - who: Lera -- line: I expect my developers to have a deep understanding of the business context. - I actually create those meetings, where I try to explain to developers why we're - doing this, what the problems of the users we’re solving, and what search actually - means. Even today, I will have this meeting and I want to show them, for example, - that the users can search by catalog – they can navigate by catalog, and it’s - a different way to search for car parts by text. I want them to understand this, - that it's not about just text, it's about users solving their problems. - sec: 2128 - time: '35:28' - who: Lera -- line: I imagine, if we’re talking about this domain of car parts, it's a pretty - complex one. Let's say you have a specific car model/make – Volkswagen, for example - (I'm not really into cars) – I don't know, some specific model. - sec: 2313 - time: '38:33' - who: Alexey -- line: It’s okay. That's fine. [chuckles] - sec: 2327 - time: '38:47' - who: Lera -- line: And then you need a specific part that would fit this exact car, right? You - not only need to know the name of this car part, but also the model and make of - the car for which you need the part. I can imagine that it becomes pretty complex - at some point, right? - sec: 2329 - time: '38:49' - who: Alexey -- header: 'User Journey: Vehicle selector, part fitment, and contextual search flows' -- line: Well, for this point, we have the user journey. The users need to… For example, - we have this cool feature, where you just insert your license plate number and - our vehicle selector will identify the vehicle by this license plate. For some - countries, we have this feature, and I think it's super user-friendly. You just - type 6 points. Now, when your car is identified on our website, you can just search - for stuff and you will have only the car parts suitable for your car. It works - like this. You first identify your car and then you can search for stuff and the - products that our website shows you fit your car. - sec: 2348 - time: '39:08' - who: Lera -- line: Well, you need to have this business context – you need to know this business - context in order to arrive at this solution, right? Without it, you would come - up with just a general search bar and you will be like, “Oh, I don't know. I need - a compressor for…” I don't know if there is such a thing. I know that there are - compressors in fridges. [chuckles] Whatever part name for whatever model and then, - “Okay, why is it not working? Why is it showing me this part for another Volkswagen?” - Right? - sec: 2411 - time: '40:11' - who: Alexey -- line: '[chuckles] Yeah. Right now, at AUTODOC, we still have some gaps. But there’s - a roadmap to fill those gaps. [chuckles] For example, you can buy a product without - identifying the car – you can’t do it right now. And it''s our task to solve – - to make it more user-friendly, to provide users the ability… We need to make a - website to make it clearer and understand users need to pick a car to find the - right one. It''s quite a common pattern. You can search, for example, “brake disc - for Audi” and that''s it, without specifying which Audi. We are now making machine - learning features with natural language processing that will identify the car - part maker or model in a search query and offer the user more information about - the car to make the whole journey easier. We have this feature in development.' - sec: 2441 - time: '40:41' - who: Lera -- line: This is quite a technical feature. It includes knowing what natural language - processing is, what parsing is, and extracting things from there. How technical - do you need to be? How much do you, as a product owner, need to know about that? - Is it more like a “nice to have” or is it actually a very important skill in your - case? - sec: 2539 - time: '42:19' - who: Alexey -- line: Yeah. I also went to… maybe you know this conference – it is about search, - called Haystack. - sec: 2559 - time: '42:39' - who: Lera -- line: Yeah. Is it also in Berlin? - sec: 2568 - time: '42:48' - who: Alexey -- header: 'Technical Upskilling: NLP, information retrieval, Haystack conference, - and ChatGPT' -- line: Yeah! It's in Berlin. It's a really technical conference. It’s for developers. - and I also go there to understand the technical aspects. You know what helps? - ChatGPT helps a lot, actually. What I do is, for example, I'm reading a technical - book and I don't really understand what information retrieval is. So I just go - to ChatGPT and say, “Hey, ChatGPT! What is information retrieval in the context - of car part search?” Ah, inverted index! Not information retrieval – inverted - index! “What does inverted index mean in the context of car part search? Explain - it to me like I’m 5.” [chuckles] I'm just kidding. - sec: 2571 - time: '42:51' - who: Lera -- line: That’s a very important part, right? “Explain like I’m five.” [chuckles] - sec: 2620 - time: '43:40' - who: Alexey -- line: Yeah, so it gives a simple explanation – ChatGPT gives me very simple examples - I can understand. That's a cool part. I think the more technical of a person you - are, the better. I'm trying to dig deep into technical stuff – I really try to - understand what natural language processing is from a technical standpoint. I - think the more you are in tech, the better – but still, the basics help you to - communicate with people and to give them context, for general understanding. - sec: 2623 - time: '43:43' - who: Lera -- line: For example, I need to know what a natural language processing task does. - I know it's spelling correction, I know it's named entity recognition – the natural - processing makes tokenization of the query, whether it's a brand name or a car - part name or its number – this task calls to natural language processing. So yeah, - I think it’s mandatory to learn the basics but the more you are in tech, the better. - sec: 2623 - time: '43:43' - who: Lera -- line: I guess it also depends on the field of your work. Since you work in e-commerce - search, for you, it makes sense to go and learn about search. If somebody works - in some other domain, maybe they would need to learn about some other things. - But still, for a product owner, it's important to know the technical parts of… - I'm trying to think of an example. - sec: 2709 - time: '45:09' - who: Alexey -- line: Let's say that it’s a product owner in the moderation team and the moderation - team uses machine learning to identify things that shouldn't be posted on the - website. For them, it's probably important to know how machine learning can be - used for this and what machine learning actually is. This is similar to your case. - [Lera agrees] You mentioned you found this book when you were in a search community. - [Lera agrees] So you were already a part of technical search communities, and - you came across this book. - sec: 2709 - time: '45:09' - who: Alexey -- line: Yeah, in Telegram, they have an Elasticsearch group, and I am part of this - group. Actually, we are always looking forward to developers and that's why. There - are people sharing and this is how I learned. On GitHub, I also saw a bunch of - books about Elasticsearch, and this book was the first on the list. So that's - how I realized that I really need to read this one because there’s so much social - proof on it. [chuckles] - sec: 2767 - time: '46:07' - who: Lera -- header: 'Recruiting: Remote roles, office hubs, and open developer positions' -- line: Yeah. You said you're looking for developers right now? - sec: 2802 - time: '46:42' - who: Alexey -- line: Yeah, we are. [chuckles] - sec: 2807 - time: '46:47' - who: Lera -- line: So what's the profile that you're looking for? You mentioned that it's a fully - remote position, right? I heard in our community, in DataTalks.Club, many people - say, “Hey, I really want to have a fully remote job, but it's always US-based - (remote but in the US).” So what kind of remote…? Or is it not remote? - sec: 2809 - time: '46:49' - who: Alexey -- line: We have offices across Europe, across Ukraine, and even in other countries. - If you live in the city, you can go to the office – it's fine. We do have an office - in Berlin. Our tech hub is actually in Lisbon. This previous week, I was there - at a product event. You can work remotely, but you can also go to the office if - you live in the city [where there is one]. - sec: 2832 - time: '47:12' - who: Lera -- line: You lived in Breslau and you worked and you worked remotely, right? So that’s - also possible. - sec: 2863 - time: '47:43' - who: Alexey -- line: Yeah. What I did was… I actually invest, I think, in traveling around Europe - and seeing people offline and connecting with them. Because I think it's also - crucial to make these offline connections. I've pretty much been in some offices - at AUTODOC already. - sec: 2868 - time: '47:48' - who: Lera -- line: Well, please share the links to the job descriptions. Maybe there is somebody - who is an experienced search engineer, who's listening to us right now and would - love to join your team. - sec: 2895 - time: '48:15' - who: Alexey -- line: Yeah, yeah. Why not? - sec: 2906 - time: '48:26' - who: Lera -- header: 'Hiring Criteria: Why marketing backgrounds are valued for product roles' -- line: I see that we have some questions. The question is – maybe we already answered - that – “Did you have the Scrum Master experience and project management experience - before this job? Or did you pick it up on the job and learn as you went?” - sec: 2909 - time: '48:29' - who: Alexey -- line: Yeah, I learned as I went. But, as I mentioned before, I had a great pattern. - Even at the start, we had a great project manager – she was also a Scrum Master. - Before coming [to AUTODOC], I already knew all about Scrum ceremonies. I saw how - people did stand-ups, how people did retrospectives, how they did planning, and - I've seen it and I have great patterns of how it can be done. What I did was just - read the Scrum guide and I also went to Scrum training. - sec: 2926 - time: '48:46' - who: Lera -- line: I knew just the theoretical part of this and I tried to learn it on the fly. - You just need to be a great talker, I think. If you know the basics, you just - need to be a great public speaker for this role, because you always need to keep - your guys motivated and involved in the project. And you do it by being this energetic - person – being this serving leader. So you learn the basics and then you… Basically, - to sum it up, I have previous experience of how people did it and I saw how it - worked. Then I read books about Scrum – not just the Scrum guide. I went to Scrum - training. And I just started doing it. - sec: 2926 - time: '48:46' - who: Lera -- line: For me, the most interesting part is that they decided to hire you even though - you had no experience in these areas, which probably means that maybe being an - experienced Scrum Master is not important – and you already proved that – you - can pick the skills up on the job. But do you know what they actually looked at - when deciding to hire you? What kind of skills were they interested in when making - this decision? - sec: 3049 - time: '50:49' - who: Alexey -- line: Historically, at AUTODOC, people that are now in a product ownership position - came from marketing. My boss also came from marketing, and her boss came from - marketing. So that's why – those people always had a marketing background. It's - okay to transition from marketing to product ownership. Probably, if we had product - owners come from developers, they would never have actually hired me because they - would be biased about me. I think that played a big role. Because historically, - product owners at AUTODOC came from marketing. - sec: 3078 - time: '51:18' - who: Lera -- line: Which skills, that you already had from your marketing past, helped you in - your current role? - sec: 3129 - time: '52:09' - who: Alexey -- header: 'Marketing Strengths: User understanding, internal PR, and pitching roadmaps' -- line: The first one is understanding… I think what’s common in marketing and product - management is understanding the user. For example, doing “job to be done” frameworks, - understanding customer journeys, understanding customer likes, pains, cases, and - customer needs – all this customer development stuff is present in both marketing - and in product. Basically, I already had this kind of knowledge and it's cool. - The second one I want to share is very interesting – when you work in a super - huge company like AUTODOC (we have 3000 people) it's not the same as working at - a small startup. - sec: 3136 - time: '52:16' - who: Lera -- line: What the difference is – people don't know you, and people don't know about - your product either. I think the crucial role of a product manager in a big company - is to be a PR manager of your product – to be a marketer of your product, an internal - marketer. Make people learn about your product, and about the benefits of your - product. There is a certain department in a company that’s responsible for the - search. In a small company, everybody knows and it's obvious, but in a super-huge - company, it's not obvious. You just need to shine. That's why I post on LinkedIn, - that's why I traveled to communicate with other people. - sec: 3136 - time: '52:16' - who: Lera -- line: Because when it comes to working in a huge company, you need to scale your - brand awareness and the awareness of your project. That's where marketing helped - me, because I used my social media to talk about search, and used these techniques - to educate people about my product. We are all working remotely, so information… - We are a little bit isolated. For me, LinkedIn posts are like a virtual version - of communication in our office. - sec: 3136 - time: '52:16' - who: Lera -- line: That's why you post selfies, right? - sec: 3301 - time: '55:01' - who: Alexey -- line: Yeah, it's kind of my trick. I understand the importance. Across a big company, - you need to talk more about your product publicly. That generates… For example, - people from assortment come to me to solve certain problems. It generates new - connections and new ideas. That's why I think it's crucial. To grow in a big company, - you need to be a little bit of a PR manager of yourself, your product, and your - team as well. - sec: 3304 - time: '55:04' - who: Lera -- line: That's how your marketing skills helped, right? [Lera agrees] Because you - already knew how to market something, “Okay! Let's think about what I can do to - be more noticeable within the company.” Right? [Lera agrees] “Let’s run a PPC - campaign.” [laughs] - sec: 3353 - time: '55:53' - who: Alexey -- line: '[laughs] Just kidding. The other thing I wanted to add is that you also need - to sell your ideas. Because when the company is huge and there are so many initiatives, - the top management needs to pick some initiatives that will generate more revenue. - That’s how prioritization works. Sometimes you don''t really know how this feature - will deliver money in the future – you’re selling the future. For project management, - you need to be a great salesperson. You need to encourage and to believe in this - perfect future – to generate revenue and stuff.' - sec: 3369 - time: '56:09' - who: Lera -- line: Because you’re selling the future, sometimes it’s obvious what the outcome - of this feature will be. Making presentations helps. I took this from marketing. - Understanding what information should be shown, how you can encourage people to - listen to you – to listen to your new idea. Basically, make a pitch. It's all - about marketing – how you make a pitch. For me, it's the same as when I was working - in B2B marketing – it's how we pitched our product to the B2B users. We just have - this presentation describing what the benefits of this product are. That’s the - same thing I do in this company. I explain what the benefits from my product are. - sec: 3369 - time: '56:09' - who: Lera -- header: 'Recommended Resources: Communication, Professional Product Owner, strategy, - experimentation' -- line: Yeah, thanks. That's a very comprehensive answer. I see that we are almost - running out of time. I still wanted to ask you one thing. You told us that you - needed to learn a lot – you took a lot of courses and you read a lot of books. - We already talked about one book – Relevant Search. Are there any other resources - that you would recommend to listeners who want to learn more about product ownership - and this topic? - sec: 3486 - time: '58:06' - who: Alexey -- line: Actually, it won’t be a typical answer. But I think the crucial take away - from everything I talked about in this interview – the most crucial skill for - the product owner is communication. How you build connections and how you talk, - how good of a manager you are, and how you can encourage people. Basically, I - would recommend you read books about communication, if you haven't before. Books - like Dale Carnegie, or about negotiation – some books about… I don't really know - what it is in English, I read it in another language. It's like, “You Can Negotiate - Everything,” or something like that. Robert Cialdini – about how to influence. - All these books are about communication. I also read a book about communication - called Aikido. A funny one. - sec: 3516 - time: '58:36' - who: Lera -- line: So, I think you really need to master your communication skills. That’s the - first one. There are books related to product ownership – there is a book called - Professional Product Owner. It's recommended by Scrum. You can go to Scrum.org - and you will see this book. I read it and it's pretty valuable. It gives you a - picture of what the product owner is. Also, there's a special book for Scrum – - to understand the basics of Scrum – Philosophy of Scrum, I think. I think it's - also good to read this one. What else? - sec: 3516 - time: '58:36' - who: Lera -- line: Some books related to your specific topic. For example, I read about search. - I also remember one book about strategy called Strategize. It’s also very connected - to product ownership. User Story Mapping also goes to the product ownership basket. - Also some books related to, for example, e-commerce. It is like a book about experimentation - and A/B testing. It’s also a very interesting one. I haven't read a book about - car parts, actually. [laughs] - sec: 3516 - time: '58:36' - who: Lera -- line: Not yet, maybe. [chuckles] - sec: 3694 - time: '1:01:34' - who: Alexey -- line: Not yet. [chuckles] We actually have specialists that are more knowledgeable - in cars – you can grab them into a meeting and they will explain stuff to you. - I think you can divide some skills you will develop and grab some books for the - skills, like communication, product management by itself, some books about your - features (like search) and about e-commerce, and about your business domain. When - you’re working, it's also fine to have some knowledge. - sec: 3695 - time: '1:01:35' - who: Lera -- line: One other tip that you shared with us was learning from everyone – scheduling - meetings with engineers, QA engineers, business analysts – and just asking them… - What did you ask? “What do you expect from the product owner? What’s the best - product owner in the company?” Then, also make some small talk to build a connection - and, I guess, learn about what they do. I really liked that part. I took a note - on it. - sec: 3734 - time: '1:02:14' - who: Alexey -- line: '[chuckles] Really cool.' - sec: 3762 - time: '1:02:42' - who: Lera -- line: Yeah. I mean, I take a lot of notes. Four sheets here. Anyway, We should be - wrapping up. Is there anything you want to mention before we finish? - sec: 3764 - time: '1:02:44' - who: Alexey -- header: 'Closing Advice: Breaking mental barriers and committing to continuous learning' -- line: Yeah. I just want to give some general advice. For me, this is a story about - how the barriers in your head can be broken. Yeah, I didn't have the same experience - that I did before but it's possible to learn. It's possible. If you want to change - your career, if you want to be in a different position, it's possible. You just - need to break down the barriers in your head. It will be difficult for the first - half a year. Yes. But if you have this dream, you can do it. It's possible. - sec: 3775 - time: '1:02:55' - who: Lera -- line: So the main barrier is in your head. Right? [Lera agrees] Yeah. Okay! Thanks - a lot! I really enjoyed this interview. It was great. Thanks for coming. Thanks - for joining us. Thanks for sharing your experience with us. And thanks, everyone, - for joining and asking questions. It was amazing. Thank you. Have a great week, - everyone. Bye. - sec: 3818 - time: '1:03:38' - who: Alexey ---- - -Links: - -* [Post](https://www.linkedin.com/posts/leracaiman_elasticsearch-ecommerce-activity-7106615081588674560-5WQO){:target="_blank"} \ No newline at end of file diff --git a/_podcast/to-update/s20e03-trends-in-data-engineering.md b/_podcast/trends-in-modern-data-engineering.md similarity index 90% rename from _podcast/to-update/s20e03-trends-in-data-engineering.md rename to _podcast/trends-in-modern-data-engineering.md index 49735877..e71b7d68 100644 --- a/_podcast/to-update/s20e03-trends-in-data-engineering.md +++ b/_podcast/trends-in-modern-data-engineering.md @@ -1,7 +1,5 @@ --- -title: "Context — This episode traces the practical and technological shifts shaping data engineering today: the rise of open-source, standards-driven building blocks (table formats like Iceberg/Delta, catalogs, DuckDB), new orchestration and workflow patterns, AI-driven tooling, specialization in governance/quality/streaming, and emerging marketplaces and platforms (like DLT Plus) that package reusable data products. - -Core theme — Data engineering is transitioning from monolithic, vendor-locked stacks to a composable, metadata-first ecosystem: teams win by adopting open, portable standards (headless table formats, catalogs, embeddable query engines), designing interoperable, cost-efficient pipelines, and packaging reusable data products—while practitioners pivot toward specialization and metadata-aware, SQL/Python-first skills to build and govern interoperable, AI-enabled data workflows." +title: 'Modern Data Engineering: Iceberg, Delta Lake & AI-Powered Pipelines' short: Trends in Data Engineering season: 20 episode: 3 @@ -16,13 +14,24 @@ links: apple: https://podcasts.apple.com/us/podcast/trends-in-data-engineering-adrian-brudaru/id1541710331?i=1000698294801 spotify: https://open.spotify.com/episode/35QbCW6Evqk1EPMKUDGGdv youtube: https://www.youtube.com/watch?v=AlCFKbFIEM8 - -description: Discover Apache Iceberg, DuckDB & AI-powered pipelines - learn cost-efficient table formats, orchestration tactics and a career roadmap for data engineers -intro: How do you future‑proof data engineering against vendor lock‑in, rising AI demand, and exploding metadata complexity? In this episode, Adrian Brudaru — a former business analyst turned freelancer and co‑founder of DLT — walks through practical choices for building resilient, cost‑efficient pipelines. Adrian explains DLT as a Python‑based ingestion standard, the DLT Plus vision and marketplace for reusable data products, and why the industry is shifting toward specialization in governance, data quality, and streaming.

Key topics include adopting Apache Iceberg as a table format (Parquet storage and reduced vendor lock‑in), the role of data catalogs and metadata tooling (AWS Glue and peers), and how DuckDB enables embeddable local OLAP and portable query execution. We cover cost‑efficient patterns—DuckDB with GitHub Actions and headless table formats—dbt’s influence and alternatives like SQLMesh, orchestration choices (Airflow, Prefect, Dagster, GitHub Actions), and the 2025 trend of AI integration in pipelines and AI agents. Listeners will get actionable guidance on tool selection, beginner and transition roadmaps, and when Iceberg, DuckDB, or AI‑powered approaches make sense for their data engineering stack +description: Master Iceberg, Delta Lake and AI-powered pipelines to build scalable, + governed data lakehouses—optimize ETL, boost real-time analytics and ML performance. +intro: How can engineering teams build reliable, scalable lakehouse pipelines that + combine transactional table formats with AI-driven automation? In this episode Adrian + Brudaru—an economics-trained analyst turned freelance data practitioner and co-founder + of a data company focused on open source tooling—joins us to explore the realities + of modern data engineering.

Adrian draws on years of startup and freelance + experience and a current mission to democratise data engineering through open source + to discuss the practical trade-offs between Iceberg and Delta Lake, how table formats + fit into a data lakehouse architecture, and where AI can augment pipeline development + and observability. Key topics include selecting the right table format for versioning + and governance, integrating AI-powered features into ETL/ELT workflows, and the + role of open source tools in scaling data platforms.

Listen to gain grounded + perspectives on Iceberg, Delta Lake, AI-powered pipelines, and data pipeline best + practices—especially useful for data engineers, architects, and engineering managers + evaluating lakehouse strategies or looking to adopt open source solutions. dateadded: 2025-03-14 - duration: PT01H02M16S - quotableClips: - name: Episode opening & guest introduction startOffset: 1 @@ -134,7 +143,6 @@ quotableClips: startOffset: 3679 url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3679 endOffset: 3736 - transcript: - header: Episode opening & guest introduction - line: This week, we’ll talk about trends in data engineering. Our special guest @@ -638,8 +646,19 @@ transcript: sec: 3737 time: '1:02:17' who: Alexey ---- +context: 'Context — This episode traces the practical and technological shifts shaping + data engineering today: the rise of open-source, standards-driven building blocks + (table formats like Iceberg/Delta, catalogs, DuckDB), new orchestration and workflow + patterns, AI-driven tooling, specialization in governance/quality/streaming, and + emerging marketplaces and platforms (like DLT Plus) that package reusable data products. + Core theme — Data engineering is transitioning from monolithic, vendor-locked stacks + to a composable, metadata-first ecosystem: teams win by adopting open, portable + standards (headless table formats, catalogs, embeddable query engines), designing + interoperable, cost-efficient pipelines, and packaging reusable data products—while + practitioners pivot toward specialization and metadata-aware, SQL/Python-first skills + to build and govern interoperable, AI-enabled data workflows.' +--- Links: * [Linkedin](https://www.linkedin.com/in/data-team/){:target="_blank"} diff --git a/_podcast/to-update/s19e01-using-data-to-create-liveable-cities.md b/_podcast/urban-data-science.md similarity index 94% rename from _podcast/to-update/s19e01-using-data-to-create-liveable-cities.md rename to _podcast/urban-data-science.md index 9f8a1b9f..c4209841 100644 --- a/_podcast/to-update/s19e01-using-data-to-create-liveable-cities.md +++ b/_podcast/urban-data-science.md @@ -1,7 +1,5 @@ --- -title: "Context: Cities are complex systems where transport, land use, public space and technology intersect; practitioners juggle short-term operations and long-term planning using streams of sensor, fare, and observational data while balancing livability, equity and privacy. - -Core narrative: A human-centered, data-driven approach to urban mobility—building robust, privacy-aware data pipelines that integrate real-time sensors, fare systems and observational sources, and leveraging predictive models and generative-AI interfaces—enables actionable insights for both operational responsiveness and strategic planning, ultimately shaping walkable, equitable, and livable cities through transparent open data, rigorous data quality, and interdisciplinary collaboration." +title: 'Urban Data Science: Transport Analytics, Sensors and Liveable Cities' short: Using Data to Create Liveable Cities season: 19 episode: 1 @@ -16,13 +14,26 @@ links: apple: https://podcasts.apple.com/us/podcast/using-data-to-create-liveable-cities-rachel-lim/id1541710331?i=1000675373908 spotify: https://open.spotify.com/episode/1z7jdogto8i4Zk6Zh1vDxE?si=KCg2Iq1US0SKwFCKasGqUg youtube: https://www.youtube.com/watch?v=VXQIGHUWeL0 - -description: Discover urban transport data, real-time monitoring and demand forecasting techniques—learn data pipelines, text-to-SQL and AI tools to improve planning & ops -intro: How can urban transport data and AI be combined to enable real-time monitoring, accurate demand forecasting, and reliable data pipelines for city operations and planning? In this episode, Rachel Lim, an urban data scientist with a geography background and a master’s in urban data science, walks through practical approaches that bridge urban design and data engineering. We cover the core data sources—GPS, sensors, fare card systems, ride‑hailing feeds—and where computer vision complements missing passenger-flow data. Rachel explains planning horizons from short‑term operational response (traffic marshals, cameras, event analytics like F1) to long‑term infrastructure planning driven by travel demand forecasting. She breaks down data pipelines and warehousing, real‑time stacks (Kafka, Apache Spark, sensors, APIs), journey logic, anomaly detection, and privacy-preserving publishing to open portals like data.gov.sg and DataMall. The episode also explores generative AI and Text‑to‑SQL workflows, prompt safety, synthetic data, and conversational search for natural‑language access to datasets. Listeners will gain actionable insight into building and governing transport data systems that support real‑time monitoring, demand forecasting, and scalable analytics +description: Discover urban data science, transport analytics & sensors for livable + cities - real-time monitoring, fare-card insights, data pipelines, AI tools. +intro: 'How can cities use transport analytics, sensors and AI to become more liveable? + In this episode Rachel Lim, an urban data scientist with a geography background + and a master’s in urban data science, walks through practical ways data informs + transport planning and placemaking. We cover core data sources—GPS, sensors, fare + card systems, ride‑hailing logs and computer vision for passenger flow—plus travel + demand forecasting, real‑time monitoring (including event analytics like F1), and + operational responses such as traffic marshals and recovery services.

Rachel + explains data engineering realities—Kafka, Apache Spark, real‑time APIs, data pipelines + and warehousing—alongside journey logic, fare computation and data quality management. + She also explores emerging tools: generative AI for natural‑language access, text‑to‑SQL + architectures, synthetic data, and privacy practices for publishing masked datasets. + The conversation highlights Singapore’s planning context, open data portals (data.gov.sg, + DataMall), and project ideas for learners using parking and taxi datasets.

+ Listen to learn which transport analytics and sensor strategies produce actionable + insights, how to set up robust data pipelines, and where to start hands‑on projects + to build liveable cities.' dateadded: 2024-11-06 - duration: PT00H51M32S - quotableClips: - name: Episode Introduction startOffset: 0 @@ -144,7 +155,6 @@ quotableClips: startOffset: 3162 url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=3162 endOffset: 3092 - transcript: - header: Episode Introduction - header: 'Guest Introduction: Rachel Lim, urban data scientist' @@ -916,8 +926,18 @@ transcript: sec: 3208 time: '53:28' who: Alexey ---- +context: 'Context: Cities are complex systems where transport, land use, public space + and technology intersect; practitioners juggle short-term operations and long-term + planning using streams of sensor, fare, and observational data while balancing livability, + equity and privacy. + Core narrative: A human-centered, data-driven approach to urban mobility—building + robust, privacy-aware data pipelines that integrate real-time sensors, fare systems + and observational sources, and leveraging predictive models and generative-AI interfaces—enables + actionable insights for both operational responsiveness and strategic planning, + ultimately shaping walkable, equitable, and livable cities through transparent open + data, rigorous data quality, and interdisciplinary collaboration.' +--- Links: * [Dynamic Datasets](https://datamall.lta.gov.sg/content/datamall/en/dynamic-data.html){:target="_blank"} \ No newline at end of file diff --git a/scripts/add_context_from_title.py b/scripts/add_context_from_title.py new file mode 100644 index 00000000..93459003 --- /dev/null +++ b/scripts/add_context_from_title.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +""" +Script to add 'context' field to podcast episodes by copying the 'title' field. +Processes all markdown files in _podcast/to-update directory. +Inserts the 'context' field right after the 'title' field. +""" + +import os +import re +from pathlib import Path + + +def extract_yaml_and_content(file_content): + """ + Extract YAML front matter and remaining content from a markdown file. + + Returns: + tuple: (yaml_lines, remaining_content) + """ + # Match YAML front matter between --- delimiters + pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$' + match = re.match(pattern, file_content, re.DOTALL) + + if not match: + raise ValueError("No YAML front matter found") + + yaml_content = match.group(1) + remaining_content = match.group(2) + + # Split YAML into lines + yaml_lines = yaml_content.split('\n') + + return yaml_lines, remaining_content + + +def rebuild_file(yaml_lines, remaining_content): + """ + Rebuild the markdown file with updated YAML front matter. + + Args: + yaml_lines: List of YAML lines + remaining_content: Content after YAML front matter + + Returns: + str: Complete file content + """ + yaml_str = '\n'.join(yaml_lines) + return f"---\n{yaml_str}\n---\n{remaining_content}" + + +def find_title_lines(yaml_lines): + """ + Find the line indices where the title field starts and ends. + + Returns: + tuple: (start_index, end_index) or (None, None) if not found + """ + title_start = None + title_end = None + + i = 0 + while i < len(yaml_lines): + line = yaml_lines[i] + + # Check if this line starts with "title:" + if line.startswith('title:'): + title_start = i + + # Find where the title value ends (check if it's multi-line) + # Multi-line values are indented + j = i + 1 + while j < len(yaml_lines): + next_line = yaml_lines[j] + # If the next line starts with a space/tab, it's part of the title + # If it starts with a letter/number (new field), title ends + if next_line and not next_line[0].isspace() and next_line[0] != '-': + title_end = j - 1 + break + j += 1 + + # If we reached the end without finding a new field + if title_end is None: + title_end = len(yaml_lines) - 1 + + return title_start, title_end + + i += 1 + + return None, None + + +def extract_title_value(yaml_lines, start_idx, end_idx): + """ + Extract the complete title value from the YAML lines. + + Returns: + str: The complete title value + """ + if start_idx is None: + return None + + # Get the title lines + title_lines = yaml_lines[start_idx:end_idx + 1] + + # Join them back together + return '\n'.join(title_lines) + + +def process_episode(file_path): + """ + Process a single episode file: add 'context' field from 'title' if it doesn't exist. + + Args: + file_path: Path to the markdown file + + Returns: + bool: True if file was modified, False otherwise + """ + print(f"Processing: {file_path.name}") + + # Read file + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Extract YAML and content + try: + yaml_lines, remaining_content = extract_yaml_and_content(content) + except Exception as e: + print(f" ⚠️ Error parsing {file_path.name}: {e}") + return False + + # Check if context field already exists + has_context = any(line.startswith('context:') for line in yaml_lines) + if has_context: + print(f" ℹ️ Already has 'context' field, skipping") + return False + + # Find title field + title_start, title_end = find_title_lines(yaml_lines) + if title_start is None: + print(f" ⚠️ No 'title' field found, skipping") + return False + + # Extract title value + title_section = extract_title_value(yaml_lines, title_start, title_end) + + # Create context field by replacing "title:" with "context:" + context_section = title_section.replace('title:', 'context:', 1) + context_lines = context_section.split('\n') + + # Insert context lines right after the title section + insert_position = title_end + 1 + for i, context_line in enumerate(context_lines): + yaml_lines.insert(insert_position + i, context_line) + + # Rebuild file + new_content = rebuild_file(yaml_lines, remaining_content) + + # Write back + with open(file_path, 'w', encoding='utf-8') as f: + f.write(new_content) + + print(f" ✅ Added 'context' field") + return True + + +def main(): + """Main function to process all episodes in to-update directory.""" + # Get the script directory and navigate to podcast directory + script_dir = Path(__file__).parent + podcast_dir = script_dir.parent / '_podcast' / 'to-update' + + if not podcast_dir.exists(): + print(f"❌ Directory not found: {podcast_dir}") + return + + print(f"Processing episodes in: {podcast_dir}\n") + + # Get all markdown files + md_files = sorted(podcast_dir.glob('*.md')) + + if not md_files: + print("No markdown files found") + return + + modified_count = 0 + skipped_count = 0 + + for file_path in md_files: + if process_episode(file_path): + modified_count += 1 + else: + skipped_count += 1 + + print(f"\n{'='*60}") + print(f"Summary:") + print(f" Modified: {modified_count} files") + print(f" Skipped: {skipped_count} files") + print(f" Total: {len(md_files)} files") + print(f"{'='*60}") + + +if __name__ == '__main__': + main() + From e4c72decfc891375b1f4bd49e29a9427cb571432 Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Tue, 18 Nov 2025 11:49:08 +0100 Subject: [PATCH 3/9] Fixing anchor ids --- _podcast/ai-for-ecology-biodiversity-and-conservation.md | 2 +- ...-infrastructure-hybrid-cloud-on-prem-distributed-training.md | 2 +- .../algorithmic-trading-with-python-and-machine-learning.md | 2 +- _podcast/applied-llm-research-and-career-growth-in-practice.md | 2 +- _podcast/bayesian-modeling-workflows-and-tools.md | 2 +- _podcast/becoming-data-freelancer.md | 2 +- _podcast/building-ai-digital-health-startups.md | 2 +- ...s.md => building-and-scaling-ai-data-products-with-mlops.md} | 0 ...and-scaling-data-engineering-systems-for-fraud-detection.md} | 2 +- _podcast/{build-data-team.md => building-data-team.md} | 0 _podcast/building-domestic-risk-assessment-tool.md | 2 +- ....md => building-explainable-and-actionable-ai-ml-systems.md} | 2 +- _podcast/building-production-ml-platform-and-mlops-team.md | 2 +- _podcast/building-production-search-systems.md | 2 +- .../building-scalable-and-reliable-machine-learning-systems.md | 2 +- _podcast/causal-inference-for-machine-learning.md | 2 +- _podcast/community-building-and-teaching-in-ai-tech.md | 2 +- _podcast/data-strategy-and-dataops-for-ai-powered-products.md | 2 +- _podcast/dataops-for-data-engineering.md | 2 +- ...b-building-sustainable-data-community-3-years-anniversary.md | 2 +- _podcast/datatalksclub-scaling-and-free-courses.md | 2 +- ...-llms-in-production-fine-tuning-retrieval-open-source-api.md | 2 +- _podcast/fairness-in-ai-ml-engineering.md | 2 +- ...cience-portfolio-open-source-computer-vision-transformers.md | 2 +- .../from-data-freelancer-to-startup-open-source-products.md | 2 +- ...s-to-data-engineering-automation-open-source-volunteering.md | 2 +- _podcast/from-iot-data-engineering-to-leading-data-architect.md | 2 +- ...on-collider-to-data-science-research-software-engineering.md | 2 +- ...gineering-to-vp-of-machine-learning-applied-ml-leadership.md | 2 +- _podcast/generative-ai-chatbots-in-production-security.md | 2 +- ...md => hiring-and-managing-data-science-teams-in-b2b-saas.md} | 0 _podcast/human-centered-ai-automatic-speech-recognition.md | 2 +- _podcast/interpretable-machine-learning.md | 2 +- _podcast/investing-in-open-source-developer-tools.md | 2 +- ...job-search-strategy-in-tech-projects-skills-cv-networking.md | 2 +- _podcast/kaggle-grandmaster-to-production-ml-and-education.md | 2 +- _podcast/knowledge-graphs-and-llms-for-automotive-rnd.md | 2 +- _podcast/lean-mlops-for-startups.md | 2 +- ... => learning-machine-learning-self-taught-bioinformatics.md} | 0 ... => mentoring-in-tech-how-to-find-and-become-a-mentor.md.md} | 0 _podcast/ml-system-design.md | 2 +- _podcast/mlops-and-ml-engineering-in-finance.md | 2 +- _podcast/mlops-at-scale-reproducibility-adoption.md | 2 +- ...n-search-systems-vector-databases-llms-semantic-retrieval.md | 2 +- ...n-source-and-volunteering-in-ai-for-data-ml-career-growth.md | 2 +- _podcast/open-source-ml-tools-strategy-and-business-models.md | 2 +- ...actical-generative-ai-consulting-from-expertise-to-impact.md | 2 +- _podcast/practical-llm-use-cases-and-product-patterns.md | 2 +- _podcast/pragmatic-and-standardized-mlops.md | 2 +- ...oduction-ml-search-vector-search-embeddings-hybrid search.md | 2 +- _podcast/production-ready-ai-engineering.md | 2 +- .../remote-data-engineering-work-and-building-iot-platforms.md | 2 +- ...=> scaling-data-engineering-teams-self-service-platforms.md} | 0 ...gy.md => scaling-enterprise-ai-mlops-data-first-strategy.md} | 0 .../{solo-data-scientist.md => solopreneur-data-scientist.md} | 0 ...ionals.md => solopreneur-developer-and-data-professional.md} | 0 _podcast/trends-in-modern-data-engineering.md | 2 +- _podcast/urban-data-science.md | 2 +- 58 files changed, 49 insertions(+), 49 deletions(-) rename _podcast/{build-and-scale-ai-data-products-with-mlops.md => building-and-scaling-ai-data-products-with-mlops.md} (100%) rename _podcast/{build-and-scale-data-engineering-systems-for-fraud-detection.md => building-and-scaling-data-engineering-systems-for-fraud-detection.md} (99%) rename _podcast/{build-data-team.md => building-data-team.md} (100%) rename _podcast/{build-explainable-and-actionable-ai-ml-systems.md => building-explainable-and-actionable-ai-ml-systems.md} (99%) rename _podcast/{hire-and-manage-data-science-teams-in-b2b-saas.md => hiring-and-managing-data-science-teams-in-b2b-saas.md} (100%) rename _podcast/{learn-machine-learning-self-taught-bioinformatics.md => learning-machine-learning-self-taught-bioinformatics.md} (100%) rename _podcast/{tech-mentoring-how-to-find-and-become-a-mentor.md.md => mentoring-in-tech-how-to-find-and-become-a-mentor.md.md} (100%) rename _podcast/{scale-data-engineering-teams-self-service-platforms.md => scaling-data-engineering-teams-self-service-platforms.md} (100%) rename _podcast/{scale-enterprise-ai-mlops-data-first-strategy.md => scaling-enterprise-ai-mlops-data-first-strategy.md} (100%) rename _podcast/{solo-data-scientist.md => solopreneur-data-scientist.md} (100%) rename _podcast/{solopreneurship-for-developers-and-data-professionals.md => solopreneur-developer-and-data-professional.md} (100%) diff --git a/_podcast/ai-for-ecology-biodiversity-and-conservation.md b/_podcast/ai-for-ecology-biodiversity-and-conservation.md index 98a65fed..8ea8a8cc 100644 --- a/_podcast/ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/ai-for-ecology-biodiversity-and-conservation.md @@ -8,7 +8,7 @@ guests: - tanyabergerwolf image: images/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.jpg ids: - anchor: atatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi + anchor: datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi youtube: 30tTrozbAkg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi diff --git a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md index 3f36f467..d09b887d 100644 --- a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md +++ b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md @@ -8,7 +8,7 @@ guests: - andreycheptsov image: images/podcast/s20e01-trends-in-ai-infrastructure.jpg ids: - anchor: atalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 + anchor: datatalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 youtube: 1aMuynlLM3o links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 diff --git a/_podcast/algorithmic-trading-with-python-and-machine-learning.md b/_podcast/algorithmic-trading-with-python-and-machine-learning.md index 717bd1ae..af3b38c4 100644 --- a/_podcast/algorithmic-trading-with-python-and-machine-learning.md +++ b/_podcast/algorithmic-trading-with-python-and-machine-learning.md @@ -7,7 +7,7 @@ guests: - ivanbrigida image: images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg ids: - anchor: atatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 + anchor: datatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 youtube: NThHAEIazFk links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 diff --git a/_podcast/applied-llm-research-and-career-growth-in-practice.md b/_podcast/applied-llm-research-and-career-growth-in-practice.md index b91622a0..13458a74 100644 --- a/_podcast/applied-llm-research-and-career-growth-in-practice.md +++ b/_podcast/applied-llm-research-and-career-growth-in-practice.md @@ -8,7 +8,7 @@ guests: - lavanyagupta image: images/podcast/s20e07-build-strong-career-in-data.jpg ids: - anchor: atalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p + anchor: datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p youtube: ekG5zJioyFs links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p diff --git a/_podcast/bayesian-modeling-workflows-and-tools.md b/_podcast/bayesian-modeling-workflows-and-tools.md index 1e51cc07..06e864ca 100644 --- a/_podcast/bayesian-modeling-workflows-and-tools.md +++ b/_podcast/bayesian-modeling-workflows-and-tools.md @@ -7,7 +7,7 @@ guests: - robzinkov image: images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg ids: - anchor: atatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 + anchor: datatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 youtube: kcKvUSInm-M links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 diff --git a/_podcast/becoming-data-freelancer.md b/_podcast/becoming-data-freelancer.md index 2effdfb3..a8c34a59 100644 --- a/_podcast/becoming-data-freelancer.md +++ b/_podcast/becoming-data-freelancer.md @@ -7,7 +7,7 @@ guests: - dimitrivisnadi image: images/podcast/s16e09-become-data-freelancer.jpg ids: - anchor: atatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 + anchor: datatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 youtube: R_EnSa9aZtE links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 diff --git a/_podcast/building-ai-digital-health-startups.md b/_podcast/building-ai-digital-health-startups.md index 718d93a9..41f66d98 100644 --- a/_podcast/building-ai-digital-health-startups.md +++ b/_podcast/building-ai-digital-health-startups.md @@ -7,7 +7,7 @@ guests: - mariabruckert image: images/podcast/s16e08-ai-for-digital-health.jpg ids: - anchor: atatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc + anchor: datatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc youtube: whpkDmVVGUE links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc diff --git a/_podcast/build-and-scale-ai-data-products-with-mlops.md b/_podcast/building-and-scaling-ai-data-products-with-mlops.md similarity index 100% rename from _podcast/build-and-scale-ai-data-products-with-mlops.md rename to _podcast/building-and-scaling-ai-data-products-with-mlops.md diff --git a/_podcast/build-and-scale-data-engineering-systems-for-fraud-detection.md b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md similarity index 99% rename from _podcast/build-and-scale-data-engineering-systems-for-fraud-detection.md rename to _podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md index 87b23e03..2340cf6e 100644 --- a/_podcast/build-and-scale-data-engineering-systems-for-fraud-detection.md +++ b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md @@ -7,7 +7,7 @@ guests: - angelaramirez image: images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg ids: - anchor: atatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab + anchor: datatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab youtube: ZXNKjrrKU_I links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab diff --git a/_podcast/build-data-team.md b/_podcast/building-data-team.md similarity index 100% rename from _podcast/build-data-team.md rename to _podcast/building-data-team.md diff --git a/_podcast/building-domestic-risk-assessment-tool.md b/_podcast/building-domestic-risk-assessment-tool.md index 99e2096a..b91edbe3 100644 --- a/_podcast/building-domestic-risk-assessment-tool.md +++ b/_podcast/building-domestic-risk-assessment-tool.md @@ -8,7 +8,7 @@ guests: - sabinafirtala image: images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg ids: - anchor: lub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i + anchor: datatalksclub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i youtube: CpWlBAmD9ok links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i diff --git a/_podcast/build-explainable-and-actionable-ai-ml-systems.md b/_podcast/building-explainable-and-actionable-ai-ml-systems.md similarity index 99% rename from _podcast/build-explainable-and-actionable-ai-ml-systems.md rename to _podcast/building-explainable-and-actionable-ai-ml-systems.md index f9b8f739..d5711eb0 100644 --- a/_podcast/build-explainable-and-actionable-ai-ml-systems.md +++ b/_podcast/building-explainable-and-actionable-ai-ml-systems.md @@ -7,7 +7,7 @@ guests: - polinamosolova image: images/podcast/s14e09-interpretable-ai-and-ml.jpg ids: - anchor: atatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq + anchor: datatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq youtube: EQcY83VA0Us links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq diff --git a/_podcast/building-production-ml-platform-and-mlops-team.md b/_podcast/building-production-ml-platform-and-mlops-team.md index a6259ad0..8b7c93dd 100644 --- a/_podcast/building-production-ml-platform-and-mlops-team.md +++ b/_podcast/building-production-ml-platform-and-mlops-team.md @@ -7,7 +7,7 @@ guests: - simonstiebellehner image: images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg ids: - anchor: atatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c + anchor: datatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c youtube: CB1YIsxQRtc links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c diff --git a/_podcast/building-production-search-systems.md b/_podcast/building-production-search-systems.md index 51c89b06..ae1c5a93 100644 --- a/_podcast/building-production-search-systems.md +++ b/_podcast/building-production-search-systems.md @@ -7,7 +7,7 @@ guests: - danielsvonava image: images/podcast/s17e09-building-production-search-systems.jpg ids: - anchor: atatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh + anchor: datatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh youtube: gEmSrknGKDE links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh diff --git a/_podcast/building-scalable-and-reliable-machine-learning-systems.md b/_podcast/building-scalable-and-reliable-machine-learning-systems.md index b967de30..1b0b2bad 100644 --- a/_podcast/building-scalable-and-reliable-machine-learning-systems.md +++ b/_podcast/building-scalable-and-reliable-machine-learning-systems.md @@ -7,7 +7,7 @@ guests: - arsenykravchenko image: images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg ids: - anchor: atatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q + anchor: datatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q youtube: i-pIdekjUow links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q diff --git a/_podcast/causal-inference-for-machine-learning.md b/_podcast/causal-inference-for-machine-learning.md index e62fe5b1..b59937be 100644 --- a/_podcast/causal-inference-for-machine-learning.md +++ b/_podcast/causal-inference-for-machine-learning.md @@ -7,7 +7,7 @@ guests: - aleksandermolak image: images/podcast/s15e06-democratizing-causality.jpg ids: - anchor: atatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh + anchor: datatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh youtube: 0I2FHH95Ofs links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh diff --git a/_podcast/community-building-and-teaching-in-ai-tech.md b/_podcast/community-building-and-teaching-in-ai-tech.md index bc199b1d..c25872f7 100644 --- a/_podcast/community-building-and-teaching-in-ai-tech.md +++ b/_podcast/community-building-and-teaching-in-ai-tech.md @@ -8,7 +8,7 @@ guests: - erumafzal image: images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg ids: - anchor: lub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r + anchor: datatalksclub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r youtube: 7SLd5V7z3xQ links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r diff --git a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md index a93e5f38..40123c87 100644 --- a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md +++ b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md @@ -7,7 +7,7 @@ guests: - boyanangelov image: images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg ids: - anchor: atatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete + anchor: datatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete youtube: jGbfeYdlCiQ links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete diff --git a/_podcast/dataops-for-data-engineering.md b/_podcast/dataops-for-data-engineering.md index 1524f049..21794875 100644 --- a/_podcast/dataops-for-data-engineering.md +++ b/_podcast/dataops-for-data-engineering.md @@ -8,7 +8,7 @@ guests: - christopherbergh image: images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg ids: - anchor: atatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f + anchor: datatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f youtube: HzGpIxV8HtA links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f diff --git a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md index 936fd0a3..10a12cab 100644 --- a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md +++ b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md @@ -9,7 +9,7 @@ guests: - johannabayer image: images/podcast/s16e01-datatalks-club-anniversary-interview.jpg ids: - anchor: atatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo + anchor: datatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo youtube: nCqwZT9zA0M links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo diff --git a/_podcast/datatalksclub-scaling-and-free-courses.md b/_podcast/datatalksclub-scaling-and-free-courses.md index 8a939e0a..9c1ece63 100644 --- a/_podcast/datatalksclub-scaling-and-free-courses.md +++ b/_podcast/datatalksclub-scaling-and-free-courses.md @@ -8,7 +8,7 @@ guests: - alexeygrigorev image: images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg ids: - anchor: atatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 + anchor: datatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 youtube: GHbeXIKnkLQ links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 diff --git a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md index f686fbf3..14926211 100644 --- a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md +++ b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md @@ -7,7 +7,7 @@ guests: - meryemarik image: images/podcast/s15e03-llms-for-everyone.jpg ids: - anchor: atatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf + anchor: datatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf youtube: 6dn6uZFkk04 links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf diff --git a/_podcast/fairness-in-ai-ml-engineering.md b/_podcast/fairness-in-ai-ml-engineering.md index cee83509..a9381f75 100644 --- a/_podcast/fairness-in-ai-ml-engineering.md +++ b/_podcast/fairness-in-ai-ml-engineering.md @@ -8,7 +8,7 @@ guests: - tamaraatanasoska image: images/podcast/s19e09-linguistics-and-fairness.jpg ids: - anchor: atalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 + anchor: datatalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 youtube: sXU9vMDBjmk links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 diff --git a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md index a61f7ef2..e395edb9 100644 --- a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md +++ b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md @@ -8,7 +8,7 @@ guests: - isabellabicalho image: images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg ids: - anchor: atalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura + anchor: datatalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura youtube: GifY8Zn-pnU links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura diff --git a/_podcast/from-data-freelancer-to-startup-open-source-products.md b/_podcast/from-data-freelancer-to-startup-open-source-products.md index 256483b3..79d54494 100644 --- a/_podcast/from-data-freelancer-to-startup-open-source-products.md +++ b/_podcast/from-data-freelancer-to-startup-open-source-products.md @@ -7,7 +7,7 @@ guests: - adrianbrudaru image: images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg ids: - anchor: atatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k + anchor: datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k youtube: vOpEQiCsaLw links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k diff --git a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md index 4e9c9c91..a9132ad9 100644 --- a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md +++ b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md @@ -8,7 +8,7 @@ guests: - agitajaunzeme image: images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg ids: - anchor: atalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv + anchor: datatalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv youtube: QKWu5-6_6TE links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv diff --git a/_podcast/from-iot-data-engineering-to-leading-data-architect.md b/_podcast/from-iot-data-engineering-to-leading-data-architect.md index 8c3949c8..b9cd26f1 100644 --- a/_podcast/from-iot-data-engineering-to-leading-data-architect.md +++ b/_podcast/from-iot-data-engineering-to-leading-data-architect.md @@ -7,7 +7,7 @@ guests: - loicmagnien image: images/podcast/s15e08-from-data-manager-to-data-architect.jpg ids: - anchor: atatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 + anchor: datatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 youtube: qWG--iYO2uc links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 diff --git a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md index e13e35d0..5879b045 100644 --- a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md +++ b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md @@ -8,7 +8,7 @@ guests: - anastasiakaravdina image: images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg ids: - anchor: atalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth + anchor: datatalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth youtube: kV0ZDy2UtJA links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth diff --git a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md index 49e7a789..edefe978 100644 --- a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md +++ b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md @@ -8,7 +8,7 @@ guests: - jackblandin image: images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg ids: - anchor: atatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk + anchor: datatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk youtube: su2M058m3Lw links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk diff --git a/_podcast/generative-ai-chatbots-in-production-security.md b/_podcast/generative-ai-chatbots-in-production-security.md index 0908fdb8..fd125454 100644 --- a/_podcast/generative-ai-chatbots-in-production-security.md +++ b/_podcast/generative-ai-chatbots-in-production-security.md @@ -8,7 +8,7 @@ guests: - mariasukhareva image: images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg ids: - anchor: atalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 + anchor: datatalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 youtube: bT7-HRNCltk links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 diff --git a/_podcast/hire-and-manage-data-science-teams-in-b2b-saas.md b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md similarity index 100% rename from _podcast/hire-and-manage-data-science-teams-in-b2b-saas.md rename to _podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md diff --git a/_podcast/human-centered-ai-automatic-speech-recognition.md b/_podcast/human-centered-ai-automatic-speech-recognition.md index 3d147743..ba956c31 100644 --- a/_podcast/human-centered-ai-automatic-speech-recognition.md +++ b/_podcast/human-centered-ai-automatic-speech-recognition.md @@ -7,7 +7,7 @@ guests: - katarzynaforemniak image: images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg ids: - anchor: atatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 + anchor: datatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 youtube: yTZ4cddD7DU links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 diff --git a/_podcast/interpretable-machine-learning.md b/_podcast/interpretable-machine-learning.md index 06577acf..1b6c0109 100644 --- a/_podcast/interpretable-machine-learning.md +++ b/_podcast/interpretable-machine-learning.md @@ -6,7 +6,7 @@ guests: - christophmolnar image: images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg ids: - anchor: atatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 + anchor: datatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 youtube: LBuGzyOkx7c links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 diff --git a/_podcast/investing-in-open-source-developer-tools.md b/_podcast/investing-in-open-source-developer-tools.md index acf2e418..eed452d0 100644 --- a/_podcast/investing-in-open-source-developer-tools.md +++ b/_podcast/investing-in-open-source-developer-tools.md @@ -7,7 +7,7 @@ guests: - belawiertz image: images/podcast/s15e02-investing-in-open-source-data-tools.jpg ids: - anchor: atatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 + anchor: datatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 youtube: 7Bg1JQLnCao links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 diff --git a/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md index 7037e5de..7e33f68f 100644 --- a/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md +++ b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md @@ -7,7 +7,7 @@ guests: - sarahmestiri image: images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg ids: - anchor: atatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 + anchor: datatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 youtube: PchwbIs0tOg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 diff --git a/_podcast/kaggle-grandmaster-to-production-ml-and-education.md b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md index 43f52f89..43c47fda 100644 --- a/_podcast/kaggle-grandmaster-to-production-ml-and-education.md +++ b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md @@ -8,7 +8,7 @@ guests: - alexanderguschin image: images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg ids: - anchor: atalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 + anchor: datatalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 youtube: NfAJAr7FvyY&t links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 diff --git a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md index 1228f5c1..c9a88e47 100644 --- a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md +++ b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md @@ -7,7 +7,7 @@ guests: - anahitapakiman image: images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg ids: - anchor: atatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 + anchor: datatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 youtube: YncdlUscUOo links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 diff --git a/_podcast/lean-mlops-for-startups.md b/_podcast/lean-mlops-for-startups.md index c07d37fc..326b3895 100644 --- a/_podcast/lean-mlops-for-startups.md +++ b/_podcast/lean-mlops-for-startups.md @@ -8,7 +8,7 @@ guests: - nemanjaradojkovic image: images/podcast/s20e04-mlops-in-corporations-and-startups.jpg ids: - anchor: atalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 + anchor: datatalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 youtube: DX9c__a4jzg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 diff --git a/_podcast/learn-machine-learning-self-taught-bioinformatics.md b/_podcast/learning-machine-learning-self-taught-bioinformatics.md similarity index 100% rename from _podcast/learn-machine-learning-self-taught-bioinformatics.md rename to _podcast/learning-machine-learning-self-taught-bioinformatics.md diff --git a/_podcast/tech-mentoring-how-to-find-and-become-a-mentor.md.md b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md similarity index 100% rename from _podcast/tech-mentoring-how-to-find-and-become-a-mentor.md.md rename to _podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md diff --git a/_podcast/ml-system-design.md b/_podcast/ml-system-design.md index 7ba823e8..f2b2c265 100644 --- a/_podcast/ml-system-design.md +++ b/_podcast/ml-system-design.md @@ -7,7 +7,7 @@ guests: - valeriybabushkin image: images/podcast/s15e01-why-machine-learning-design-broken.jpg ids: - anchor: atatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o + anchor: datatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o youtube: 6YBMU6475KQ links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o diff --git a/_podcast/mlops-and-ml-engineering-in-finance.md b/_podcast/mlops-and-ml-engineering-in-finance.md index 87a3471b..13083db9 100644 --- a/_podcast/mlops-and-ml-engineering-in-finance.md +++ b/_podcast/mlops-and-ml-engineering-in-finance.md @@ -7,7 +7,7 @@ guests: - nemanjaradojkovic image: images/podcast/s17e05-machine-learning-engineering-in-finance.jpg ids: - anchor: atatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 + anchor: datatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 youtube: Nl4aibeFwiI links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 diff --git a/_podcast/mlops-at-scale-reproducibility-adoption.md b/_podcast/mlops-at-scale-reproducibility-adoption.md index 2e1f67f4..fd723757 100644 --- a/_podcast/mlops-at-scale-reproducibility-adoption.md +++ b/_podcast/mlops-at-scale-reproducibility-adoption.md @@ -7,7 +7,7 @@ guests: - raphaelhoogvliets image: images/podcast/s19e04-mlops-as-team.jpg ids: - anchor: atalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr + anchor: datatalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr youtube: rMq63r3zi4c links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr diff --git a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md index ba4320c7..23ba422d 100644 --- a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md +++ b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md @@ -8,7 +8,7 @@ guests: - atitaarora image: images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg ids: - anchor: atatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps + anchor: datatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps youtube: _fbe1QyJ1PY links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps diff --git a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md index 8a594eee..58821a95 100644 --- a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md +++ b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md @@ -7,7 +7,7 @@ guests: - saraelateif image: images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg ids: - anchor: atatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan + anchor: datatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan youtube: aHdaIwOEI8Q links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan diff --git a/_podcast/open-source-ml-tools-strategy-and-business-models.md b/_podcast/open-source-ml-tools-strategy-and-business-models.md index f8b26b17..aebcf554 100644 --- a/_podcast/open-source-ml-tools-strategy-and-business-models.md +++ b/_podcast/open-source-ml-tools-strategy-and-business-models.md @@ -7,7 +7,7 @@ guests: - vincentwarmerdam image: images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg ids: - anchor: atatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs + anchor: datatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs youtube: UPlIETGwTg8 links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs diff --git a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md index d6d68e15..52f64ecf 100644 --- a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md +++ b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md @@ -7,7 +7,7 @@ guests: - verenaweber image: images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg ids: - anchor: atatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr + anchor: datatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr youtube: 4RargY8iOaE links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr diff --git a/_podcast/practical-llm-use-cases-and-product-patterns.md b/_podcast/practical-llm-use-cases-and-product-patterns.md index c42ded32..93eddf03 100644 --- a/_podcast/practical-llm-use-cases-and-product-patterns.md +++ b/_podcast/practical-llm-use-cases-and-product-patterns.md @@ -7,7 +7,7 @@ guests: - sandrakublik image: images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg ids: - anchor: atatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 + anchor: datatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 youtube: bM6AR4A-f98 links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 diff --git a/_podcast/pragmatic-and-standardized-mlops.md b/_podcast/pragmatic-and-standardized-mlops.md index e17a9b6a..a2ed6f05 100644 --- a/_podcast/pragmatic-and-standardized-mlops.md +++ b/_podcast/pragmatic-and-standardized-mlops.md @@ -7,7 +7,7 @@ guests: - mariavechtomova image: images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg ids: - anchor: lub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv + anchor: datatalksclub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv youtube: q3DTR3Od1MA links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv diff --git a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md index 1df35494..beee1eef 100644 --- a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md +++ b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md @@ -7,7 +7,7 @@ guests: - reemmahmoud image: images/podcast/s17e08-building-machine-learning-products.jpg ids: - anchor: atatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd + anchor: datatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd youtube: m45tNY-8gY8 links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd diff --git a/_podcast/production-ready-ai-engineering.md b/_podcast/production-ready-ai-engineering.md index 5b104f83..7ad3313c 100644 --- a/_podcast/production-ready-ai-engineering.md +++ b/_podcast/production-ready-ai-engineering.md @@ -7,7 +7,7 @@ guests: - bartoszmikulski image: images/podcast/s20e05-data-intensive-ai.jpg ids: - anchor: atalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi + anchor: datatalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi youtube: BP6w_vKySN0 links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi diff --git a/_podcast/remote-data-engineering-work-and-building-iot-platforms.md b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md index ae18c8a0..e2a27586 100644 --- a/_podcast/remote-data-engineering-work-and-building-iot-platforms.md +++ b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md @@ -7,7 +7,7 @@ guests: - josemaria image: images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg ids: - anchor: atatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c + anchor: datatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c youtube: UX7UShEioKc links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c diff --git a/_podcast/scale-data-engineering-teams-self-service-platforms.md b/_podcast/scaling-data-engineering-teams-self-service-platforms.md similarity index 100% rename from _podcast/scale-data-engineering-teams-self-service-platforms.md rename to _podcast/scaling-data-engineering-teams-self-service-platforms.md diff --git a/_podcast/scale-enterprise-ai-mlops-data-first-strategy.md b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md similarity index 100% rename from _podcast/scale-enterprise-ai-mlops-data-first-strategy.md rename to _podcast/scaling-enterprise-ai-mlops-data-first-strategy.md diff --git a/_podcast/solo-data-scientist.md b/_podcast/solopreneur-data-scientist.md similarity index 100% rename from _podcast/solo-data-scientist.md rename to _podcast/solopreneur-data-scientist.md diff --git a/_podcast/solopreneurship-for-developers-and-data-professionals.md b/_podcast/solopreneur-developer-and-data-professional.md similarity index 100% rename from _podcast/solopreneurship-for-developers-and-data-professionals.md rename to _podcast/solopreneur-developer-and-data-professional.md diff --git a/_podcast/trends-in-modern-data-engineering.md b/_podcast/trends-in-modern-data-engineering.md index e71b7d68..d1f442be 100644 --- a/_podcast/trends-in-modern-data-engineering.md +++ b/_podcast/trends-in-modern-data-engineering.md @@ -7,7 +7,7 @@ guests: - adrianbrudaru image: images/podcast/s20e03-trends-in-data-engineering.jpg ids: - anchor: atalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae + anchor: datatalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae youtube: AlCFKbFIEM8 links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae diff --git a/_podcast/urban-data-science.md b/_podcast/urban-data-science.md index c4209841..94773f5a 100644 --- a/_podcast/urban-data-science.md +++ b/_podcast/urban-data-science.md @@ -7,7 +7,7 @@ guests: - rachellim image: images/podcast/s19e01-using-data-to-create-liveable-cities.jpg ids: - anchor: atatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup + anchor: datatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup youtube: VXQIGHUWeL0 links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup From 7ec0230b6aa2fc3adbec31c12cf281cca8c9044d Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Tue, 18 Nov 2025 11:56:11 +0100 Subject: [PATCH 4/9] Fix dashes --- ...r-ecology-biodiversity-and-conservation.md | 12 +- ...-in-healthcare-and-digital-therapeutics.md | 30 +-- ...brid-cloud-on-prem-distributed-training.md | 48 ++-- ...rading-with-python-and-machine-learning.md | 46 ++-- ...lgorithms-data-structures-for-engineers.md | 2 +- ...-research-and-career-growth-in-practice.md | 4 +- .../bayesian-modeling-workflows-and-tools.md | 12 +- ...big-data-analytics-and-postdoc-research.md | 26 +- .../big-data-engineer-vs-data-scientist.md | 2 +- ...rmatics-worflows-tools-and-data-science.md | 14 +- ...ngineering-tooling-retrieval-evaluation.md | 34 +-- .../building-ai-digital-health-startups.md | 30 +-- ...and-scaling-ai-data-products-with-mlops.md | 18 +- _podcast/building-and-scaling-data-team.md | 18 +- _podcast/building-data-team.md | 54 ++-- .../building-domestic-risk-assessment-tool.md | 8 +- ...xplainable-and-actionable-ai-ml-systems.md | 26 +- ...ing-healthcare-machine-learning-systems.md | 28 +- ...communities-diversity-and-career-growth.md | 34 +-- _podcast/building-mlops-startup.md | 24 +- ...ce-data-product-for-identity-resolution.md | 4 +- ...g-production-ml-platform-and-mlops-team.md | 18 +- .../causal-inference-for-machine-learning.md | 4 +- ...munity-building-and-teaching-in-ai-tech.md | 18 +- ...ng-leadership-and-modern-data-platforms.md | 6 +- ...ata-engineering-tools-modern-data-stack.md | 30 +-- ...gy-market-demand-and-client-acquisition.md | 4 +- ...ivacy-engineering-gdpr-machine-learning.md | 22 +- ...ity-data-observability-data-reliability.md | 2 +- .../data-science-leadership-hiring-mlops.md | 26 +- ...-science-manager-vs-expert-hiring-guide.md | 2 +- ...a-science-team-structure-and-org-design.md | 46 ++-- ...ndie-hacker-bootstrapping-side-projects.md | 2 +- ...egy-and-dataops-for-ai-powered-products.md | 6 +- ...nd-gitops-best-practices-for-data-teams.md | 30 +-- _podcast/dataops-for-data-engineering.md | 14 +- ...-principles-and-scalable-data-platforms.md | 36 +-- ...able-data-community-3-years-anniversary.md | 16 +- .../datatalksclub-scaling-and-free-courses.md | 6 +- ...n-fine-tuning-retrieval-open-source-api.md | 46 ++-- ...eveloper-personal-brand-learn-in-public.md | 36 +-- .../devrel-data-science-open-source-tools.md | 20 +- .../devrel-open-source-machine-learning.md | 36 +-- _podcast/fairness-in-ai-ml-engineering.md | 52 ++-- ...ng-model-monitoring-and-data-governance.md | 4 +- ...ce-data-engineering-pricing-and-clients.md | 4 +- _podcast/freelancing-in-machine-learning.md | 18 +- ...pen-source-computer-vision-transformers.md | 8 +- ...ision-research-to-autonomous-driving-ai.md | 8 +- ...elancer-to-startup-open-source-products.md | 62 ++--- ...ing-automation-open-source-volunteering.md | 4 +- _podcast/from-game-ai-to-modern-ai-agents.md | 12 +- ...a-science-research-software-engineering.md | 4 +- ...ytics-engineering-sql-dbt-career-switch.md | 12 +- .../from-math-graduate-to-data-analytics.md | 6 +- ...cs-to-computer-vision-career-transition.md | 2 +- ...ductor-data-to-applied-machine-learning.md | 8 +- ...gineering-to-leading-data-science-teams.md | 6 +- ...-machine-learning-applied-ml-leadership.md | 40 +-- _podcast/get-data-scientist-job.md | 30 +-- ...junior-data-job-and-transferable-skills.md | 26 +- ...managing-data-science-teams-in-b2b-saas.md | 4 +- _podcast/how-to-break-into-data-science.md | 30 +-- .../how-to-grow-your-ml-engineering-career.md | 26 +- ...to-switch-to-ml-tech-without-experience.md | 20 +- ...entered-ai-automatic-speech-recognition.md | 58 ++--- ...man-centered-mlops-and-model-monitoring.md | 28 +- _podcast/interpretable-machine-learning.md | 40 +-- ...edge-graphs-and-llms-for-automotive-rnd.md | 2 +- ...data-product-adoption-modern-data-stack.md | 6 +- _podcast/lean-mlops-for-startups.md | 40 +-- ...g-engineering-production-best-practices.md | 12 +- ...for-asteroid-mining-and-water-detection.md | 2 +- ...ting-attribution-marketing-mix-modeling.md | 2 +- ...oney-with-machine-learning-roles-skills.md | 10 +- ...tech-how-to-find-and-become-a-mentor.md.md | 10 +- ...ndful-data-strategy-for-business-impact.md | 20 +- ...uct-manager-and-mlops-platform-strategy.md | 4 +- .../mlops-and-ml-engineering-in-finance.md | 4 +- ...mlops-at-scale-reproducibility-adoption.md | 10 +- ...ture-stores-feature-stores-feast-tecton.md | 10 +- _podcast/mlops-kubeflow-model-monitoring.md | 16 +- ...ector-databases-llms-semantic-retrieval.md | 50 ++-- ...set-creation-annotation-tools-workflows.md | 26 +- .../nlp-team-hiring-and-production-mlops.md | 26 +- ...teering-in-ai-for-data-ml-career-growth.md | 8 +- ...e-ml-tools-strategy-and-business-models.md | 12 +- ...-ai-consulting-from-expertise-to-impact.md | 32 +-- _podcast/practical-llm-engineering-and-rag.md | 20 +- _podcast/pragmatic-and-standardized-mlops.md | 26 +- ...duction-ml-pipelines-with-aws-and-kafka.md | 14 +- ...-vector-search-embeddings-hybrid search.md | 4 +- _podcast/production-ready-ai-engineering.md | 2 +- _podcast/project-manager-to-data-scientist.md | 12 +- .../public-speaking-for-data-scientists.md | 10 +- ...search-to-production-ml-systems-roadmap.md | 2 +- ...sponsible-explainable-ai-bias-detection.md | 38 +-- ...ngineering-teams-self-service-platforms.md | 36 +-- _podcast/solopreneur-data-scientist.md | 8 +- ...aching-mentoring-data-analytics-fintech.md | 2 +- _podcast/urban-data-science.md | 10 +- ...machine-learning-concepts-to-explain-ml.md | 22 +- podcast-errors-found.md | 242 ++++++++++++++++++ 103 files changed, 1202 insertions(+), 960 deletions(-) create mode 100644 podcast-errors-found.md diff --git a/_podcast/ai-for-ecology-biodiversity-and-conservation.md b/_podcast/ai-for-ecology-biodiversity-and-conservation.md index 8ea8a8cc..7fa385ef 100644 --- a/_podcast/ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/ai-for-ecology-biodiversity-and-conservation.md @@ -19,10 +19,10 @@ description: Discover AI-driven computer vision and remote sensing strategies to biodiversity monitoring, improve species ID, and inform conservation policy. intro: How can AI help close critical data gaps in biodiversity monitoring and turn images and sensor data into actionable conservation decisions? In this episode Tanya - Berger‑Wolf, a computational ecologist, director of TDAI@OSU, and co‑founder of + Berger-Wolf, a computational ecologist, director of TDAI@OSU, and co-founder of the Wildbook project (Wild Me), walks through practical applications of AI for ecology, biodiversity monitoring, and conservation.

We cover core techniques—computer - vision, machine learning, and remote sensing—and their use in image‑based monitoring + vision, machine learning, and remote sensing—and their use in image-based monitoring with camera traps, drones, and species identification. Tanya explains individual identification and longitudinal tracking, habitat mapping and change detection, and the data challenges of labeling, class imbalance, and sparse observations. The @@ -60,7 +60,7 @@ quotableClips: startOffset: 630 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=630 endOffset: 840 -- name: 'Individual Identification & Tracking: Photo‑ID and Longitudinal Monitoring' +- name: 'Individual Identification & Tracking: Photo-ID and Longitudinal Monitoring' startOffset: 840 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=840 endOffset: 1020 @@ -84,7 +84,7 @@ quotableClips: startOffset: 1740 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=1740 endOffset: 1920 -- name: 'Scalable Platforms: Wildbook and Large‑Scale Biodiversity Monitoring Tools' +- name: 'Scalable Platforms: Wildbook and Large-Scale Biodiversity Monitoring Tools' startOffset: 1920 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=1920 endOffset: 2130 @@ -104,7 +104,7 @@ quotableClips: startOffset: 2670 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=2670 endOffset: 2820 -- name: 'Edge Deployment: Low‑Power Devices, Field Constraints, and Real‑Time Alerts' +- name: 'Edge Deployment: Low-Power Devices, Field Constraints, and Real-Time Alerts' startOffset: 2820 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=2820 endOffset: 2970 @@ -112,7 +112,7 @@ quotableClips: startOffset: 2970 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=2970 endOffset: 3150 -- name: 'Funding & Sustainability: Maintaining Long‑Term Monitoring Systems' +- name: 'Funding & Sustainability: Maintaining Long-Term Monitoring Systems' startOffset: 3150 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3150 endOffset: 3330 diff --git a/_podcast/ai-in-healthcare-and-digital-therapeutics.md b/_podcast/ai-in-healthcare-and-digital-therapeutics.md index e9a8ff79..0df232db 100644 --- a/_podcast/ai-in-healthcare-and-digital-therapeutics.md +++ b/_podcast/ai-in-healthcare-and-digital-therapeutics.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=IDzhmmKeNG4 description: 'Learn to build data teams and ethical AI in healthcare: actionable personalization, A/B testing for digital therapeutics, GDPR-safe experiments.' -intro: How can AI power effective digital therapeutics while balancing personalization, rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — Director of Data, Analytics, and AI with a track record building ML and data teams at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches for AI in healthcare and digital therapeutics.

We cover how machine learning is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s gamified digital therapeutics and quality‑of‑life goals; behavioral design that minimizes in‑app time; and engagement strategies like charity incentives versus leaderboards. Stefan explains building the analytics foundation—data pipelines, dashboards, and experimentation capabilities—and why A/B testing and agenda‑driven recommender systems are core to personalization. He also tackles data privacy and ethics (GDPR/HIPAA, de‑identification), remote monitoring with wearables, clinical trials versus app experiments, managing medical risk, and hiring and scaling data, ML, and engineering teams.

Listen to get concrete frameworks for building data teams, running safe, measurable experiments, designing personalized interventions, and embedding ethical safeguards into AI-driven digital therapeutics +intro: How can AI power effective digital therapeutics while balancing personalization, rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — Director of Data, Analytics, and AI with a track record building ML and data teams at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches for AI in healthcare and digital therapeutics.

We cover how machine learning is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s gamified digital therapeutics and quality-of-life goals; behavioral design that minimizes in-app time; and engagement strategies like charity incentives versus leaderboards. Stefan explains building the analytics foundation—data pipelines, dashboards, and experimentation capabilities—and why A/B testing and agenda-driven recommender systems are core to personalization. He also tackles data privacy and ethics (GDPR/HIPAA, de-identification), remote monitoring with wearables, clinical trials versus app experiments, managing medical risk, and hiring and scaling data, ML, and engineering teams.

Listen to get concrete frameworks for building data teams, running safe, measurable experiments, designing personalized interventions, and embedding ethical safeguards into AI-driven digital therapeutics topics: - machine learning - healthcare @@ -41,16 +41,16 @@ quotableClips: startOffset: 367 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=367 endOffset: 602 -- name: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality‑of‑Life +- name: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality-of-Life Goals' startOffset: 602 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=602 endOffset: 904 -- name: 'Behavioral Design & Habit Formation: Low In‑App Time Strategy' +- name: 'Behavioral Design & Habit Formation: Low In-App Time Strategy' startOffset: 904 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=904 endOffset: 1167 -- name: 'Building Data Culture: Metrics, Buy‑in, and Responsible Experimentation' +- name: 'Building Data Culture: Metrics, Buy-in, and Responsible Experimentation' startOffset: 1167 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1167 endOffset: 1543 @@ -62,15 +62,15 @@ quotableClips: startOffset: 1622 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1622 endOffset: 1773 -- name: 'Remote Monitoring & Wearables: Activity and Heart‑Rate Variability' +- name: 'Remote Monitoring & Wearables: Activity and Heart-Rate Variability' startOffset: 1773 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1773 endOffset: 1901 -- name: 'Data Privacy & Ethics: GDPR/HIPAA, De‑identification, and Empathy' +- name: 'Data Privacy & Ethics: GDPR/HIPAA, De-identification, and Empathy' startOffset: 1901 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1901 endOffset: 2139 -- name: 'Personalization Strategy: Agenda‑Driven Recommender Systems' +- name: 'Personalization Strategy: Agenda-Driven Recommender Systems' startOffset: 2139 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2139 endOffset: 2397 @@ -86,7 +86,7 @@ quotableClips: startOffset: 2729 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2729 endOffset: 2965 -- name: 'Data‑Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' +- name: 'Data-Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' startOffset: 2965 url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2965 endOffset: 3115 @@ -298,7 +298,7 @@ transcript: sec: 593 time: '9:53' who: Stefan -- header: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality‑of‑Life +- header: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality-of-Life Goals' - line: So basically every scientist becomes the target audience. Before this episode, I was doing a little bit of research about the company where you work right now @@ -376,7 +376,7 @@ transcript: sec: 874 time: '14:34' who: Alexey -- header: 'Behavioral Design & Habit Formation: Low In‑App Time Strategy' +- header: 'Behavioral Design & Habit Formation: Low In-App Time Strategy' - line: Yes, yes. But at the same time, there are critical differences. We don't want to keep you in the app for hours, because most of the activity you need to do is outside of the app. So that is a very interesting difference between the two @@ -468,7 +468,7 @@ transcript: sec: 1119 time: '18:39' who: Alexey -- header: 'Building Data Culture: Metrics, Buy‑in, and Responsible Experimentation' +- header: 'Building Data Culture: Metrics, Buy-in, and Responsible Experimentation' - line: Exactly. I think it's much more similar than you would think in the beginning. You basically have a program – some kind of solution – and you're in a company where you really want to create this data-driven culture from the data science @@ -678,7 +678,7 @@ transcript: sec: 1767 time: '29:27' who: Alexey -- header: 'Remote Monitoring & Wearables: Activity and Heart‑Rate Variability' +- header: 'Remote Monitoring & Wearables: Activity and Heart-Rate Variability' - line: Yeah, [reluctantly] I mean – you should start there. I think that should always be the approach – start with something simple. Then you have data and then you have everything in place to automate it. Don't try to automate out of thin air. @@ -712,7 +712,7 @@ transcript: sec: 1852 time: '30:52' who: Stefan -- header: 'Data Privacy & Ethics: GDPR/HIPAA, De‑identification, and Empathy' +- header: 'Data Privacy & Ethics: GDPR/HIPAA, De-identification, and Empathy' - line: We have a question. I mentioned that healthcare is quite a regulated area. And usually in healthcare, people take questions about data privacy and this kind of stuff very seriously. Does it change the way you work? You have to keep these @@ -786,7 +786,7 @@ transcript: sec: 2100 time: '35:00' who: Stefan -- header: 'Personalization Strategy: Agenda‑Driven Recommender Systems' +- header: 'Personalization Strategy: Agenda-Driven Recommender Systems' - line: Okay. I wanted to go back to what we were talking about. You said that the app is based on the customer profile – patient profile – it makes different recommendations, or personalized recommendations, based on that. Can you maybe tell us a bit more @@ -1045,7 +1045,7 @@ transcript: sec: 2921 time: '48:41' who: Alexey -- header: 'Data‑Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' +- header: 'Data-Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' - line: No, not at all. All of these people are very data-driven just by nature. The biggest challenges may be to tell a medical doctor, “Okay, now we're testing a feature in the app. Let's just test it.” “What?! No, no. Wait!” [laughs] When diff --git a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md index d09b887d..ecf3cb27 100644 --- a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md +++ b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md @@ -31,7 +31,7 @@ intro: How has the rise of ChatGPT reshaped the infrastructure needed to build a when an open source approach like dstack is appropriate, what to evaluate in orchestration tools, and how to balance performance, cost, and control as you scale AI projects post-ChatGPT. This episode is for engineering leaders and ML infrastructure teams - seeking actionable insights on AI infrastructure, orchestration tools, on‑prem economics, + seeking actionable insights on AI infrastructure, orchestration tools, on-prem economics, and distributed training best practices. dateadded: 2025-02-26 duration: PT01H06M04S @@ -48,11 +48,11 @@ quotableClips: startOffset: 327 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=327 endOffset: 505 -- name: Cloud vs On‑Prem Costs and MLOps Limitations (SageMaker example) +- name: Cloud vs On-Prem Costs and MLOps Limitations (SageMaker example) startOffset: 505 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=505 endOffset: 600 -- name: Cloud-to-On‑Prem Realities in the Post‑ChatGPT Era +- name: Cloud-to-On-Prem Realities in the Post-ChatGPT Era startOffset: 600 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=600 endOffset: 778 @@ -60,7 +60,7 @@ quotableClips: startOffset: 778 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=778 endOffset: 1053 -- name: 'Open vs Proprietary Models: Business Models and Trade‑Offs' +- name: 'Open vs Proprietary Models: Business Models and Trade-Offs' startOffset: 1053 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1053 endOffset: 1297 @@ -80,7 +80,7 @@ quotableClips: startOffset: 2255 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2255 endOffset: 2370 -- name: Fine‑Tuning & Serving Models for Non–AI‑First Companies +- name: Fine-Tuning & Serving Models for Non–AI-First Companies startOffset: 2370 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2370 endOffset: 2836 @@ -92,15 +92,15 @@ quotableClips: startOffset: 3059 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3059 endOffset: 3116 -- name: 'Hybrid Infrastructure Outlook: Cloud Dominance and On‑Prem Nuances' +- name: 'Hybrid Infrastructure Outlook: Cloud Dominance and On-Prem Nuances' startOffset: 3116 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3116 endOffset: 3271 -- name: 'On‑Prem GPU Coordination: SSH, Resource Contention, and Real Examples' +- name: 'On-Prem GPU Coordination: SSH, Resource Contention, and Real Examples' startOffset: 3271 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3271 endOffset: 3413 -- name: 'Bare‑Metal as a Service: Provisioning, Automation, and Firmware Management' +- name: 'Bare-Metal as a Service: Provisioning, Automation, and Firmware Management' startOffset: 3413 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3413 endOffset: 3487 @@ -112,11 +112,11 @@ quotableClips: startOffset: 3630 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3630 endOffset: 3771 -- name: 'Closing Pick: Science‑Fiction Recommendation — The Three‑Body Problem' +- name: 'Closing Pick: Science-Fiction Recommendation — The Three-Body Problem' startOffset: 3771 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3771 endOffset: 3938 -- name: Episode Wrap‑Up & Links to DStack and Guest Resources +- name: Episode Wrap-Up & Links to DStack and Guest Resources startOffset: 3938 url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3938 endOffset: 3964 @@ -242,7 +242,7 @@ transcript: sec: 327 time: '5:27' who: Andrey -- header: Cloud vs On‑Prem Costs and MLOps Limitations (SageMaker example) +- header: Cloud vs On-Prem Costs and MLOps Limitations (SageMaker example) - line: Yes, there are existing tools for machine learning, like SageMaker, but as you mentioned, cost becomes a major issue. sec: 505 @@ -269,7 +269,7 @@ transcript: sec: 537 time: '8:57' who: Alexey -- header: Cloud-to-On‑Prem Realities in the Post‑ChatGPT Era +- header: Cloud-to-On-Prem Realities in the Post-ChatGPT Era - line: Yes, and while many of these challenges are still relevant today, there are even bigger challenges ahead. The "ChatGPT moment" has introduced new issues, which makes AI infrastructure an even more important topic today. @@ -351,7 +351,7 @@ transcript: sec: 809 time: '13:29' who: Andrey -- header: 'Open vs Proprietary Models: Business Models and Trade‑Offs' +- header: 'Open vs Proprietary Models: Business Models and Trade-Offs' - line: I don’t know the full story behind OpenAI either, but I think they initially released many things as open-source. GPT-2 was open-source, and they also released Whisper and CLIP. But when they released GPT-3, they realized it was a gold mine. @@ -603,7 +603,7 @@ transcript: sec: 2255 time: '37:35' who: Alexey -- header: Fine‑Tuning & Serving Models for Non–AI‑First Companies +- header: Fine-Tuning & Serving Models for Non–AI-First Companies - line: Correct, although I’d be cautious about labeling companies as small or medium. I think it’s more about whether a company is AI-first or not. Once you figure that out, everything becomes much clearer. If a company is AI-first, they’re likely @@ -720,7 +720,7 @@ transcript: sec: 3106 time: '51:46' who: Andrey -- header: 'Hybrid Infrastructure Outlook: Cloud Dominance and On‑Prem Nuances' +- header: 'Hybrid Infrastructure Outlook: Cloud Dominance and On-Prem Nuances' - line: 'Here''s a question: Do you think the future will be a hybrid of bare metal and cloud, or will it be cloud-only?' sec: 3116 @@ -763,7 +763,7 @@ transcript: sec: 3268 time: '54:28' who: Andrey -- header: 'On‑Prem GPU Coordination: SSH, Resource Contention, and Real Examples' +- header: 'On-Prem GPU Coordination: SSH, Resource Contention, and Real Examples' - line: When I think about on-prem, particularly for data teams, data science teams, and ML teams, I recall my first company in Germany. We had a machine with GPUs, and everyone had access to it. We would SSH into the machine, but then we had @@ -797,7 +797,7 @@ transcript: sec: 3411 time: '56:51' who: Alexey -- header: 'Bare‑Metal as a Service: Provisioning, Automation, and Firmware Management' +- header: 'Bare-Metal as a Service: Provisioning, Automation, and Firmware Management' - line: Yes, bare metal as a service is another option. Some companies offer bare metal as a service, where they handle the provisioning and firmware updates for you. But if you want to run a service yourself across multiple bare metal providers, @@ -890,7 +890,7 @@ transcript: sec: 3762 time: '1:02:42' who: Andrey -- header: 'Closing Pick: Science‑Fiction Recommendation — The Three‑Body Problem' +- header: 'Closing Pick: Science-Fiction Recommendation — The Three-Body Problem' - line: So, last question for you. You mentioned you like science fiction. What’s your favorite book? sec: 3771 @@ -943,7 +943,7 @@ transcript: sec: 3920 time: '1:05:20' who: Andrey -- header: Episode Wrap‑Up & Links to DStack and Guest Resources +- header: Episode Wrap-Up & Links to DStack and Guest Resources - line: Sounds interesting! Thanks a lot, Andrey. We only touched on a fraction of the topics we wanted to discuss today, which is no surprise, given how much we wanted to cover. But it was great talking with you. Thanks for accepting the invite, @@ -963,22 +963,22 @@ context: 'Context: A conversation with an AI-infrastructure practitioner about m Core theme (single unifying idea): Practical AI is an infrastructure-first problem — success depends less on chasing the biggest model and more on designing cost-effective, controllable, and efficient stacks (hardware, orchestration, and software) that - fit hybrid cloud/on‑prem realities, leverage open-source ecosystems, and optimize + fit hybrid cloud/on-prem realities, leverage open-source ecosystems, and optimize distributed training and serving for real-world constraints. - Dominant through-line: Every segment — from cost of ownership and cloud vs on‑prem - trade‑offs to open vs proprietary models, decentralization, distributed training + Dominant through-line: Every segment — from cost of ownership and cloud vs on-prem + trade-offs to open vs proprietary models, decentralization, distributed training bottlenecks, orchestration gaps, and edge/federated use cases — returns to the same tension: how to deliver AI that is scalable, performant, and economically sustainable by choosing the right mix of tooling, deployment model, and optimizations. Key themes implied by the narrative: - Cost and control drive architecture choices - more than raw model capability. - Hybrid cloud + on‑prem is the pragmatic reality; + more than raw model capability. - Hybrid cloud + on-prem is the pragmatic reality; orchestration must adapt. - Open-source ecosystems accelerate feedback, tooling, and business flexibility. - Efficient distributed training and communication optimizations trump brute-force scaling. - Decentralization (privacy, local control, edge) is often a matter of fit and trade-offs, not ideology. - Practical provisioning, automation, - and orchestration are the unsolved scaling problems for non–AI‑first organizations.' + and orchestration are the unsolved scaling problems for non–AI-first organizations.' --- Links: diff --git a/_podcast/algorithmic-trading-with-python-and-machine-learning.md b/_podcast/algorithmic-trading-with-python-and-machine-learning.md index af3b38c4..bac49ead 100644 --- a/_podcast/algorithmic-trading-with-python-and-machine-learning.md +++ b/_podcast/algorithmic-trading-with-python-and-machine-learning.md @@ -16,18 +16,18 @@ links: youtube: https://www.youtube.com/watch?v=NThHAEIazFk description: 'Master algorithmic trading: backtesting and risk management—learn practical data sources, features, models & execution to build robust strategies.' -intro: How do you turn a trading idea into a robust, risk‑managed algorithm in Python? +intro: How do you turn a trading idea into a robust, risk-managed algorithm in Python? In this episode Ivan Brigida — analytics lead behind PythonInvest with 10+ years in statistical modeling, forecasting, econometrics and finance — walks through practical steps for algorithmic trading with Python, from data sourcing to deployment (and a clear reminder this is educational, not investment advice).

We cover - where retail traders get market data (Yahoo, Quandl, Polygon), OHLCV and adjusted‑close - nuances, and a concrete mean‑reversion example. Ivan explains backtesting methodology, - common pitfalls like time‑series data leakage, and walk‑forward simulation for realistic - validation. He breaks down risk management (stop‑loss thresholds, position sizing), + where retail traders get market data (Yahoo, Quandl, Polygon), OHLCV and adjusted-close + nuances, and a concrete mean-reversion example. Ivan explains backtesting methodology, + common pitfalls like time-series data leakage, and walk-forward simulation for realistic + validation. He breaks down risk management (stop-loss thresholds, position sizing), execution and trading fees, plus evaluation metrics (ROI, precision) and defining prediction targets (binary growth thresholds such as 5%).

On the modeling - side you’ll hear practical feature engineering (time‑window stats, handcrafted indicators), + side you’ll hear practical feature engineering (time-window stats, handcrafted indicators), model choices (logistic regression, XGBoost, neural nets), explainability via feature importance, and deployment options (cron, Airflow, APIs, partial automation). Listen to gain actionable guidance for building, validating, and deploying algorithmic @@ -83,15 +83,15 @@ quotableClips: startOffset: 1187 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1187 endOffset: 1334 -- name: Risk management fundamentals and stop‑loss thresholds +- name: Risk management fundamentals and stop-loss thresholds startOffset: 1334 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1334 endOffset: 1608 -- name: Backtesting methodology and avoiding time‑series data leakage +- name: Backtesting methodology and avoiding time-series data leakage startOffset: 1608 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1608 endOffset: 1784 -- name: 'Walk‑forward simulation: weekly predictions and selection rules' +- name: 'Walk-forward simulation: weekly predictions and selection rules' startOffset: 1784 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1784 endOffset: 2115 @@ -111,7 +111,7 @@ quotableClips: startOffset: 2619 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2619 endOffset: 2755 -- name: 'Feature engineering: time‑window stats and handcrafted indicators' +- name: 'Feature engineering: time-window stats and handcrafted indicators' startOffset: 2755 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2755 endOffset: 2882 @@ -135,11 +135,11 @@ quotableClips: startOffset: 3449 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3449 endOffset: 3666 -- name: Course plans, sign‑up, and community building +- name: Course plans, sign-up, and community building startOffset: 3666 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3666 endOffset: 3696 -- name: Episode Wrap‑up and final reminder (not financial advice) +- name: Episode Wrap-up and final reminder (not financial advice) startOffset: 3696 url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3696 endOffset: 3640 @@ -529,7 +529,7 @@ transcript: sec: 1311 time: '21:51' who: Ivan -- header: Risk management fundamentals and stop‑loss thresholds +- header: Risk management fundamentals and stop-loss thresholds - line: I have two questions right now. First question is, “Okay, now I see losses. What do I do with them?” sec: 1334 @@ -596,7 +596,7 @@ transcript: sec: 1513 time: '25:13' who: Ivan -- header: Backtesting methodology and avoiding time‑series data leakage +- header: Backtesting methodology and avoiding time-series data leakage - line: Okay, I actually got lost a bit. You said many things, like “long stocks,” “rebalancing strategy,” “portfolio allocation”… We should probably talk about that later. [Ivan agrees] One question I still have is – we know how to get data @@ -637,7 +637,7 @@ transcript: sec: 1738 time: '28:58' who: Alexey -- header: 'Walk‑forward simulation: weekly predictions and selection rules' +- header: 'Walk-forward simulation: weekly predictions and selection rules' - line: Yes. I can give an example of the exact thing that I had. I started from the 100 largest US stocks, and I made predictions for one week ahead. I tried to predict… Historically you can calculate future growth from the data – when you don't know @@ -874,7 +874,7 @@ transcript: sec: 2740 time: '45:40' who: Ivan -- header: 'Feature engineering: time‑window stats and handcrafted indicators' +- header: 'Feature engineering: time-window stats and handcrafted indicators' - line: How do you build…? Let's say, we want to build the simplest possible model for that, but still use machine learning – like logistic regression or something else. How exactly would we design the problem in order to predict this growth @@ -1113,7 +1113,7 @@ transcript: sec: 3665 time: '1:01:05' who: Alexey -- header: Course plans, sign‑up, and community building +- header: Course plans, sign-up, and community building - line: Yeah. I heard that and I will probably wait until 500 people are subscribed, so that I can say, “Okay, next year from January, I will run it.” It’s not there yet, but I hope someday. @@ -1129,7 +1129,7 @@ transcript: sec: 3692 time: '1:01:32' who: Ivan -- header: Episode Wrap‑up and final reminder (not financial advice) +- header: Episode Wrap-up and final reminder (not financial advice) - line: Okay. So we just need 300 more. [chuckles] Okay. Thanks a lot. It's unfortunately time to wrap up for today. It was amazing. I learned many new things. Hopefully, everyone else also learned new things. Thanks for joining us today, for sharing. @@ -1145,16 +1145,16 @@ transcript: time: '1:02:15' who: Ivan context: 'Context: This episode follows Ivan Brigida’s path from finance to analytics - and walks listeners step‑by‑step through the practical craft of retail algorithmic - investing — covering data sources and quality, time‑series market formats, strategy - ideas (like mean reversion), rigorous backtesting and walk‑forward validation, risk + and walks listeners step-by-step through the practical craft of retail algorithmic + investing — covering data sources and quality, time-series market formats, strategy + ideas (like mean reversion), rigorous backtesting and walk-forward validation, risk management and execution, feature engineering and model choice, explainability, deployment, and learning resources. Core: The unifying idea is that successful retail algorithmic trading is built like - an engineering pipeline — start with clean, well‑understood data; define precise + an engineering pipeline — start with clean, well-understood data; define precise prediction targets; design simple, interpretable models and handcrafted features; - validate performance with rigorous, leakage‑free backtests and walk‑forward simulations; + validate performance with rigorous, leakage-free backtests and walk-forward simulations; embed strict risk controls and disciplined execution; and iterate toward partial automation and reproducible deployment while treating the whole process as a continuous learning project rather than a shortcut to quick profits.' diff --git a/_podcast/algorithms-data-structures-for-engineers.md b/_podcast/algorithms-data-structures-for-engineers.md index af959996..027c0795 100644 --- a/_podcast/algorithms-data-structures-for-engineers.md +++ b/_podcast/algorithms-data-structures-for-engineers.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/mastering-algorithms-and-data-structures-marcello-la/id1541710331?i=1000534241523 description: Learn Bloom filters, approximate nearest-neighbor and performance tuning to gain memory-efficient containment, fast vector search and practical profiling tips -intro: How do engineers choose and implement the right algorithm for memory, latency, and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com and author of Algorithms and Data Structures in Action, with experience at Twitter, Microsoft and Apple — walks through practical algorithmic solutions engineers can actually use in production. We focus on Bloom filters for memory‑efficient containment checks (and real-world uses like crawlers, routing tables, and adtech device-ID targeting), and on approximate nearest‑neighbour (ANN) strategies when KD‑trees break down for high‑dimensional data — covering R‑trees, SS‑trees, vector similarity, embeddings and Faiss. Along the way Marcello discusses core data structures, profiling and performance pitfalls, abstraction vs implementation trade‑offs, cross‑language serialization, and language performance choices (Python vs C++ and Cython). If you want actionable guidance — including when to trust libraries versus inspect internals, practical code in Java/JavaScript/Python, and study resources to get hands‑on — this episode gives concrete patterns, trade‑offs, and examples you can apply to improve search, recommendation, and large‑scale systems performance +intro: How do engineers choose and implement the right algorithm for memory, latency, and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com and author of Algorithms and Data Structures in Action, with experience at Twitter, Microsoft and Apple — walks through practical algorithmic solutions engineers can actually use in production. We focus on Bloom filters for memory-efficient containment checks (and real-world uses like crawlers, routing tables, and adtech device-ID targeting), and on approximate nearest-neighbour (ANN) strategies when KD-trees break down for high-dimensional data — covering R-trees, SS-trees, vector similarity, embeddings and Faiss. Along the way Marcello discusses core data structures, profiling and performance pitfalls, abstraction vs implementation trade-offs, cross-language serialization, and language performance choices (Python vs C++ and Cython). If you want actionable guidance — including when to trust libraries versus inspect internals, practical code in Java/JavaScript/Python, and study resources to get hands-on — this episode gives concrete patterns, trade-offs, and examples you can apply to improve search, recommendation, and large-scale systems performance topics: - algorithms - data structures diff --git a/_podcast/applied-llm-research-and-career-growth-in-practice.md b/_podcast/applied-llm-research-and-career-growth-in-practice.md index 13458a74..3303d0e2 100644 --- a/_podcast/applied-llm-research-and-career-growth-in-practice.md +++ b/_podcast/applied-llm-research-and-career-growth-in-practice.md @@ -1168,8 +1168,8 @@ transcript: time: '58:10' who: Alexey context: 'A practical, curiosity-driven bridge between research and engineering: relentlessly - iterate with hands‑on prototyping, rigorous evaluation, and open dissemination to - solve real-world ML problems (ex: long‑context LLMs), while leveraging community, + iterate with hands-on prototyping, rigorous evaluation, and open dissemination to + solve real-world ML problems (ex: long-context LLMs), while leveraging community, mentorship, and strategic projects to accelerate career growth and drive measurable impact.' --- diff --git a/_podcast/bayesian-modeling-workflows-and-tools.md b/_podcast/bayesian-modeling-workflows-and-tools.md index 06e864ca..29fad801 100644 --- a/_podcast/bayesian-modeling-workflows-and-tools.md +++ b/_podcast/bayesian-modeling-workflows-and-tools.md @@ -60,7 +60,7 @@ quotableClips: startOffset: 492 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=492 endOffset: 572 -- name: 'Self‑Study Path: Learning statistics without formal classes' +- name: 'Self-Study Path: Learning statistics without formal classes' startOffset: 572 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=572 endOffset: 887 @@ -92,7 +92,7 @@ quotableClips: startOffset: 1757 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1757 endOffset: 2028 -- name: 'MCMC Fundamentals: Markov chains and exploring high‑probability regions' +- name: 'MCMC Fundamentals: Markov chains and exploring high-probability regions' startOffset: 2028 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2028 endOffset: 2199 @@ -132,7 +132,7 @@ quotableClips: startOffset: 3953 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3953 endOffset: 3991 -- name: Episode Wrap‑up, Links, and Next Steps +- name: Episode Wrap-up, Links, and Next Steps startOffset: 3991 url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3991 endOffset: 3905 @@ -299,7 +299,7 @@ transcript: sec: 557 time: '9:17' who: Alexey -- header: 'Self‑Study Path: Learning statistics without formal classes' +- header: 'Self-Study Path: Learning statistics without formal classes' - line: It was just dabbling. You're just sort of dabbling in the problems, you're reading about them. Because in some sense, if you're like, “Oh, I want to learn machine learning. Okay, I'm going to learn how to run a random forest or implement @@ -740,7 +740,7 @@ transcript: sec: 2023 time: '33:43' who: Alexey -- header: 'MCMC Fundamentals: Markov chains and exploring high‑probability regions' +- header: 'MCMC Fundamentals: Markov chains and exploring high-probability regions' - line: Our model essentially returns to us, “What's the probability the parameter has this value, given the data we return?” So we have these probabilities there. But we can't know in advance what the high probability regions are going to be. @@ -1290,7 +1290,7 @@ transcript: sec: 3986 time: '1:06:26' who: Rob -- header: Episode Wrap‑up, Links, and Next Steps +- header: Episode Wrap-up, Links, and Next Steps - line: Okay. We will also include the email in the description. I posted two links in the live chat – I will also post them in the description. I guess that's all for today. Thanks a lot, Rob, for joining us today. And thanks, everyone, for diff --git a/_podcast/big-data-analytics-and-postdoc-research.md b/_podcast/big-data-analytics-and-postdoc-research.md index 4f9d43ac..cf301358 100644 --- a/_podcast/big-data-analytics-and-postdoc-research.md +++ b/_podcast/big-data-analytics-and-postdoc-research.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/advancing-big-data-analytics-post-doctoral-research/id1541710331?i=1000543884294 description: 'Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, publishing, time-management and stream-processing tactics to boost your research.' -intro: How do you master spatial big data analytics while navigating the demands of postdoc research, systems building, and preparing for a PhD? In this episode, Eleni Tzirita‑Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with a PhD from EPFL and award‑winning work in data management — breaks down practical approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) and robust stream processing for IoT. We cover systems‑driven research like the Nebula Stream and Agora infrastructure, spotting research trends via conferences and reviewing, and aligning academic work with industry needs. Eleni also outlines the postdoc role (mentoring, teaching, reviewing, dissemination), time management strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD or master’s thesis. Listeners will gain concrete guidance on research priorities beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, data cleaning evaluation challenges, and steps to increase diversity in CS. Tune in for actionable postdoc mentoring and PhD tips grounded in spatial big data and stream processing research +intro: How do you master spatial big data analytics while navigating the demands of postdoc research, systems building, and preparing for a PhD? In this episode, Eleni Tzirita-Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with a PhD from EPFL and award-winning work in data management — breaks down practical approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) and robust stream processing for IoT. We cover systems-driven research like the Nebula Stream and Agora infrastructure, spotting research trends via conferences and reviewing, and aligning academic work with industry needs. Eleni also outlines the postdoc role (mentoring, teaching, reviewing, dissemination), time management strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD or master’s thesis. Listeners will gain concrete guidance on research priorities beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, data cleaning evaluation challenges, and steps to increase diversity in CS. Tune in for actionable postdoc mentoring and PhD tips grounded in spatial big data and stream processing research topics: - academia - big data analytics @@ -27,7 +27,7 @@ dateadded: 2021-12-05 duration: PT01H01M37S quotableClips: -- name: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at +- name: 'Guest Introduction: Eleni Tzirita-Zacharatou, postdoctoral researcher at DIMA, TU Berlin' startOffset: 73 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=73 @@ -69,7 +69,7 @@ quotableClips: startOffset: 1388 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1388 endOffset: 1455 -- name: 'System‑Driven Research: From Apache Flink legacy to new Nebula stream systems' +- name: 'System-Driven Research: From Apache Flink legacy to new Nebula stream systems' startOffset: 1455 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1455 endOffset: 1495 @@ -93,12 +93,12 @@ quotableClips: startOffset: 2181 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2181 endOffset: 2320 -- name: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross‑domain +- name: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross-domain work' startOffset: 2320 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2320 endOffset: 2470 -- name: 'Facilitating Cross‑Group Collaboration: Physical spaces and informal interactions' +- name: 'Facilitating Cross-Group Collaboration: Physical spaces and informal interactions' startOffset: 2470 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2470 endOffset: 2657 @@ -116,7 +116,7 @@ quotableClips: startOffset: 3127 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3127 endOffset: 3299 -- name: 'PhD Expectations: Publication requirements and top‑conference pressure' +- name: 'PhD Expectations: Publication requirements and top-conference pressure' startOffset: 3299 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3299 endOffset: 3319 @@ -129,7 +129,7 @@ quotableClips: startOffset: 3651 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3651 endOffset: 3714 -- name: 'Contact and Follow‑Up: DIMA page and email for questions' +- name: 'Contact and Follow-Up: DIMA page and email for questions' startOffset: 3714 url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3714 endOffset: 3722 @@ -139,7 +139,7 @@ quotableClips: endOffset: 3697 transcript: -- header: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at +- header: 'Guest Introduction: Eleni Tzirita-Zacharatou, postdoctoral researcher at DIMA, TU Berlin' - line: This week, we'll talk about doing postdoctoral research. We have a special guest today, Eleni. Eleni is a postdoctoral researcher at the DIMA Group at TU @@ -505,7 +505,7 @@ transcript: sec: 1439 time: '23:59' who: Alexey -- header: 'System‑Driven Research: From Apache Flink legacy to new Nebula stream systems' +- header: 'System-Driven Research: From Apache Flink legacy to new Nebula stream systems' - line: Not really. Basically, in a sense, this ‘nebula stream’ system is kind of the next Flick, you could say. This is representative of how the DIMA group works. Before there was Flink and a lot of researchers were working on different problems @@ -756,7 +756,7 @@ transcript: sec: 2293 time: '38:13' who: Alexey -- header: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross‑domain +- header: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross-domain work' - line: Yeah. I have some experience working with people that are not in Data Management. In general, there are connections – it's easy to find connections – from data @@ -790,7 +790,7 @@ transcript: sec: 2434 time: '40:34' who: Alexey -- header: 'Facilitating Cross‑Group Collaboration: Physical spaces and informal interactions' +- header: 'Facilitating Cross-Group Collaboration: Physical spaces and informal interactions' - line: Yeah, that's a good point. Maybe now it has improved, I would say. Actually, there is one issue that I find to be a problem concerning where the groups are, which is the building itself. I think it's actually not so great that the DIMA @@ -985,7 +985,7 @@ transcript: sec: 3267 time: '54:27' who: Alexey -- header: 'PhD Expectations: Publication requirements and top‑conference pressure' +- header: 'PhD Expectations: Publication requirements and top-conference pressure' - line: Yeah, different groups have different requirements in terms of how many papers you are expected to publish? But yeah, in general, it's always at least one. Broadly speaking, at DIMA, it’s typically three and they have to be at top conferences. @@ -1103,7 +1103,7 @@ transcript: sec: 3710 time: '1:01:50' who: Alexey -- header: 'Contact and Follow‑Up: DIMA page and email for questions' +- header: 'Contact and Follow-Up: DIMA page and email for questions' - line: This information is up to date. There is my email on my website – I guess that's the easiest way. sec: 3714 diff --git a/_podcast/big-data-engineer-vs-data-scientist.md b/_podcast/big-data-engineer-vs-data-scientist.md index cead0573..fd598dd8 100644 --- a/_podcast/big-data-engineer-vs-data-scientist.md +++ b/_podcast/big-data-engineer-vs-data-scientist.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/big-data-engineer-vs-data-scientist-roksolana-diachuk/id1541710331?i=1000528386609 description: Discover how Big Data Engineer vs Data Scientist roles differ — skills, performance optimization, ETL pipelines and ML deployment tips to advance your career -intro: 'How do the day‑to‑day responsibilities and skill sets really differ between a Big Data Engineer and a Data Scientist—and what should you learn to move between those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her career transition from backend Java into big data engineering and R&D.

We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala and Parquet formats—plus performance tuning: Spark job optimization, cluster resource planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries (data cleaning and feature engineering for data scientists vs pipeline design and formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning with Delta Lake, observability, documentation, starter projects and learning resources.

Listen to learn which skills, tools and projects will help you choose or transition between careers, and what to prioritize when building scalable data pipelines, deploying models, and ensuring data quality.' +intro: 'How do the day-to-day responsibilities and skill sets really differ between a Big Data Engineer and a Data Scientist—and what should you learn to move between those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her career transition from backend Java into big data engineering and R&D.

We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala and Parquet formats—plus performance tuning: Spark job optimization, cluster resource planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries (data cleaning and feature engineering for data scientists vs pipeline design and formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning with Delta Lake, observability, documentation, starter projects and learning resources.

Listen to learn which skills, tools and projects will help you choose or transition between careers, and what to prioritize when building scalable data pipelines, deploying models, and ensuring data quality.' topics: - career transition - software engineering diff --git a/_podcast/bioinformatics-worflows-tools-and-data-science.md b/_podcast/bioinformatics-worflows-tools-and-data-science.md index 2cc66e73..c72f2157 100644 --- a/_podcast/bioinformatics-worflows-tools-and-data-science.md +++ b/_podcast/bioinformatics-worflows-tools-and-data-science.md @@ -74,7 +74,7 @@ quotableClips: startOffset: 1076 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1076 endOffset: 1181 -- name: 'Building Microbial Networks: Co‑abundance and Association Inference' +- name: 'Building Microbial Networks: Co-abundance and Association Inference' startOffset: 1181 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1181 endOffset: 1471 @@ -90,7 +90,7 @@ quotableClips: startOffset: 1798 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1798 endOffset: 2180 -- name: 'Open‑Source Projects Overview: MCW2 Graph, VueGen, and VueCore' +- name: 'Open-Source Projects Overview: MCW2 Graph, VueGen, and VueCore' startOffset: 2180 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2180 endOffset: 2311 @@ -130,7 +130,7 @@ quotableClips: startOffset: 3197 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3197 endOffset: 3250 -- name: 'Episode Wrap‑up: Open‑Source Encouragement and Closing Remarks' +- name: 'Episode Wrap-up: Open-Source Encouragement and Closing Remarks' startOffset: 3250 url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3250 endOffset: 3313 @@ -457,7 +457,7 @@ transcript: sec: 1169 time: '19:29' who: Alexey -- header: 'Building Microbial Networks: Co‑abundance and Association Inference' +- header: 'Building Microbial Networks: Co-abundance and Association Inference' - line: Yes. In our case, we focused on wastewater treatment plants. We analyzed data from different locations because there were many available samples. sec: 1181 @@ -745,7 +745,7 @@ transcript: sec: 2173 time: '36:13' who: Alexey -- header: 'Open‑Source Projects Overview: MCW2 Graph, VueGen, and VueCore' +- header: 'Open-Source Projects Overview: MCW2 Graph, VueGen, and VueCore' - line: You worked on quite a few projects. The document mentions MCW2 Graph, VueGen, and VueCore. What are these projects and what do they do? sec: 2180 @@ -1060,7 +1060,7 @@ transcript: sec: 3243 time: '54:03' who: Sebastian -- header: 'Episode Wrap‑up: Open‑Source Encouragement and Closing Remarks' +- header: 'Episode Wrap-up: Open-Source Encouragement and Closing Remarks' - line: That must be amazing. I actually have another event starting soon, so I need to go. Sebastian, thanks a lot. It was really nice talking to you. I learned many new things. I suspected proteins were important not just for the gym but for other @@ -1086,7 +1086,7 @@ context: At its core this episode is about how building open, reproducible compu actionable insight—bridging wet lab and dry lab work so researchers can ask better questions, run fewer experiments, and move faster. From genomics and metagenomics pipelines to network inference, molecular simulation, knowledge graphs, visualization, - and AI assistants, the through‑line is empowering scientists with accessible tools, + and AI assistants, the through-line is empowering scientists with accessible tools, automation, and community-driven software that make complex biology interpretable, shareable, and useful in the real world. --- diff --git a/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md index eb00a1e1..ff5f9337 100644 --- a/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md +++ b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md @@ -55,7 +55,7 @@ quotableClips: startOffset: 352 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=352 endOffset: 464 -- name: 'Joining Noird.ai: Automating On‑call with Agents' +- name: 'Joining Noird.ai: Automating On-call with Agents' startOffset: 464 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=464 endOffset: 660 @@ -67,7 +67,7 @@ quotableClips: startOffset: 751 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=751 endOffset: 910 -- name: 'Planning Strategies: Single‑step, Multi‑pass & Self‑reflection' +- name: 'Planning Strategies: Single-step, Multi-pass & Self-reflection' startOffset: 910 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=910 endOffset: 1103 @@ -75,7 +75,7 @@ quotableClips: startOffset: 1103 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1103 endOffset: 1198 -- name: 'Code Agents vs Natural‑Language Agents: Trade‑offs' +- name: 'Code Agents vs Natural-Language Agents: Trade-offs' startOffset: 1198 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1198 endOffset: 1281 @@ -91,7 +91,7 @@ quotableClips: startOffset: 1499 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1499 endOffset: 1770 -- name: 'RAG Reality Check: Latency, Cost & Garbage‑In/Garbage‑Out' +- name: 'RAG Reality Check: Latency, Cost & Garbage-In/Garbage-Out' startOffset: 1770 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1770 endOffset: 1898 @@ -123,7 +123,7 @@ quotableClips: startOffset: 2648 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2648 endOffset: 2760 -- name: 'Framework Trade‑offs: LangChain, OpenAI Agents SDK, Small Agents' +- name: 'Framework Trade-offs: LangChain, OpenAI Agents SDK, Small Agents' startOffset: 2760 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2760 endOffset: 2880 @@ -139,7 +139,7 @@ quotableClips: startOffset: 3200 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3200 endOffset: 3362 -- name: 'Goal‑based Evaluation: Outcome Assertions Over Exact Paths' +- name: 'Goal-based Evaluation: Outcome Assertions Over Exact Paths' startOffset: 3362 url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3362 endOffset: 3491 @@ -279,7 +279,7 @@ transcript: sec: 394 time: '6:34' who: Ranjitha -- header: 'Joining Noird.ai: Automating On‑call with Agents' +- header: 'Joining Noird.ai: Automating On-call with Agents' - line: After working on agents at Dropbox, I was drawn to Noird, where I am now. I’m fully immersed in the potential of these agents. We are trying to solve the problem of engineering on call, taking that away from users and letting agents @@ -415,7 +415,7 @@ transcript: sec: 904 time: '15:04' who: Ranjitha -- header: 'Planning Strategies: Single‑step, Multi‑pass & Self‑reflection' +- header: 'Planning Strategies: Single-step, Multi-pass & Self-reflection' - line: Would you agree with the definition that an agent is just an LLM with tools? sec: 910 time: '15:10' @@ -462,7 +462,7 @@ transcript: sec: 1146 time: '19:06' who: Ranjitha -- header: 'Code Agents vs Natural‑Language Agents: Trade‑offs' +- header: 'Code Agents vs Natural-Language Agents: Trade-offs' - line: Some agents plan in plain English, others in code so-called code agents. The choice depends on the task complexity. For natural language problems, natural language-based agents work. For very complex tasks with many steps and conditionals, @@ -633,7 +633,7 @@ transcript: sec: 1732 time: '28:52' who: Ranjitha -- header: 'RAG Reality Check: Latency, Cost & Garbage‑In/Garbage‑Out' +- header: 'RAG Reality Check: Latency, Cost & Garbage-In/Garbage-Out' - line: We still need to reduce the amount of noise that we put into an LLM’s context, and that’s what context engineering is. sec: 1770 @@ -870,7 +870,7 @@ transcript: sec: 2752 time: '45:52' who: Alexey -- header: 'Framework Trade‑offs: LangChain, OpenAI Agents SDK, Small Agents' +- header: 'Framework Trade-offs: LangChain, OpenAI Agents SDK, Small Agents' - line: LangChain has its uses, but I haven’t used it much for agents. Early on, it couldn’t handle ambiguity in natural language. It has improved and has new agents to experiment with. @@ -950,7 +950,7 @@ transcript: sec: 3313 time: '55:13' who: Alexey -- header: 'Goal‑based Evaluation: Outcome Assertions Over Exact Paths' +- header: 'Goal-based Evaluation: Outcome Assertions Over Exact Paths' - line: 'I wouldn’t evaluate each path too strictly because LLMs can accomplish the same goal differently. Tool calls must consult the true source. For example, two ways exist to find a skip level: directly or by traversing an org chart. Both @@ -993,7 +993,7 @@ context: 'Context: The episode traces a practitioner’s journey from early ML a evaluation. Core (single unifying idea): Pragmatic agent engineering: turning LLMs into reliable, - task‑oriented autonomous systems by engineering around their capabilities and limits—designing + task-oriented autonomous systems by engineering around their capabilities and limits—designing objectives, orchestration, context/retrieval, tooling integrations, planning strategies, and rigorous evaluation so agents can safely, efficiently, and predictably perform real operational and enterprise tasks. @@ -1001,17 +1001,17 @@ context: 'Context: The episode traces a practitioner’s journey from early ML a Why this unifies the episode: - Defines what an “agent” means in practice (autonomy + objectives + LLMs) and why design choices matter. - Shows orchestration needs (tools, memory, knowledge stores) to ground LLM reasoning in real data and actions. - - Contrasts planning styles (single‑step, multi‑pass, self‑reflection) and implementation - tradeoffs (prompts vs SDKs, code vs natural‑language agents) as engineering choices, + - Contrasts planning styles (single-step, multi-pass, self-reflection) and implementation + tradeoffs (prompts vs SDKs, code vs natural-language agents) as engineering choices, not academic ones. - Treats retrieval/RAG as an engineering component with latency/cost/GIGO constraints and explores agentic RAG when RAG alone falls short. - Emphasizes integration abstractions and framework tradeoffs for production deployment (from bespoke stacks to marketplaces and SDKs). - Centers testing and evaluation—mocking tools, regression - tests, goal‑based benchmarks—to ensure outcomes over narrative plausibility. - Highlights + tests, goal-based benchmarks—to ensure outcomes over narrative plausibility. - Highlights specialization and domain constraints: generic agents struggle; practical value comes from adapting agents to workflows, data, and operational requirements. - Bottom line: The episode’s through‑line is that successful agent projects are not + Bottom line: The episode’s through-line is that successful agent projects are not just about large models: they are systems engineering problems requiring explicit choices about autonomy, grounding, tooling, planning, and measurement to deliver dependable, useful automation.' diff --git a/_podcast/building-ai-digital-health-startups.md b/_podcast/building-ai-digital-health-startups.md index 41f66d98..83e3e5b1 100644 --- a/_podcast/building-ai-digital-health-startups.md +++ b/_podcast/building-ai-digital-health-startups.md @@ -19,19 +19,19 @@ description: Discover actionable digital health MVP strategy, telemedicine tacti care. intro: How do you build a digital health startup that ships a focused MVP, uses AI for diagnosis, and delivers care via telemedicine while overcoming data gaps and - legacy workflows? In this episode Maria-Liisa Bruckert, Co‑Founder and Co‑CEO of + legacy workflows? In this episode Maria-Liisa Bruckert, Co-Founder and Co-CEO of SQIN and recipient of the Google Play Best of 2020 award and Google Female Founder Immersion 2020, walks through her transition from electrical engineering to health - tech and the practical playbook she uses to de‑risk product development.

+ tech and the practical playbook she uses to de-risk product development.

We cover MVP strategy and market research tactics—cold outreach, accelerators, clinical - meetings—and unconventional experiments like an AR “lipstick try‑on” to collect + meetings—and unconventional experiments like an AR “lipstick try-on” to collect engagement data. Maria explains how SQIN aligns AI diagnosis with concrete business cases, builds a digital clinic flow from diagnosis to prescription, and uses telemedicine - for remote follow‑up and prescriptions. You’ll also hear about data strategy and - community bootstrapping, ethics and UX for sensitive AI messaging, go‑to‑market + for remote follow-up and prescriptions. You’ll also hear about data strategy and + community bootstrapping, ethics and UX for sensitive AI messaging, go-to-market choices for regional rollout, and monetization through SaaS integrations and partnerships. -

Listen for actionable insights on product‑market fit, hiring priorities - for AI and full‑stack roles, and practical steps to launch a digital health startup +

Listen for actionable insights on product-market fit, hiring priorities + for AI and full-stack roles, and practical steps to launch a digital health startup that balances technical credibility with patient access. dateadded: 2023-12-03 duration: PT00H52M27S @@ -128,7 +128,7 @@ quotableClips: startOffset: 2624 url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2624 endOffset: 2768 -- name: 'Monetization: SaaS Integrations, Partnerships, and E‑commerce Cuts' +- name: 'Monetization: SaaS Integrations, Partnerships, and E-commerce Cuts' startOffset: 2768 url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2768 endOffset: 2879 @@ -1022,7 +1022,7 @@ transcript: sec: 2760 time: '46:00' who: Alexey -- header: 'Monetization: SaaS Integrations, Partnerships, and E‑commerce Cuts' +- header: 'Monetization: SaaS Integrations, Partnerships, and E-commerce Cuts' - line: We integrate our AI to different partners. We do the health checks on different points of sale. First of all, of course, in our own application, SQIN, where it’s a digital clinic – but we also have other applications and other points of sale, @@ -1183,17 +1183,17 @@ context: 'Context: The episode traces a founder’s shift from engineering to he entrepreneurship, driven by the opportunity to digitize fragmented medical systems. It covers pragmatic founder tactics (immersion, rapid MVPs, cold outreach), an unusual AR MVP to collect engagement data, and a discovery that everyday lifestyle interactions - reveal skin‑health signals. The conversation ties product experimentation and iterative + reveal skin-health signals. The conversation ties product experimentation and iterative pivots to building a digital clinic flow (diagnosis → prescription → telemedicine), while confronting legacy infrastructure, rural access gaps, ethical UX, and regional - go‑to‑market limits. Growth topics — community‑first data strategies, personalization, + go-to-market limits. Growth topics — community-first data strategies, personalization, retention, hiring, fundraising, and monetization via SaaS/partnerships — are framed - alongside human considerations like leadership choices and work‑life integration. + alongside human considerations like leadership choices and work-life integration. - Core theme: Building an ethical, product‑first digital healthcare startup by using - rapid experimentation and community‑driven engagement to bootstrap meaningful clinical + Core theme: Building an ethical, product-first digital healthcare startup by using + rapid experimentation and community-driven engagement to bootstrap meaningful clinical data and align AI capabilities with real patient workflows and viable business models—solving - legacy access and workflow problems regionally, iterating from MVP to product‑market + legacy access and workflow problems regionally, iterating from MVP to product-market fit, and scaling sustainably while keeping human needs and ethics central.' --- Links: diff --git a/_podcast/building-and-scaling-ai-data-products-with-mlops.md b/_podcast/building-and-scaling-ai-data-products-with-mlops.md index cf108398..2ba5714c 100644 --- a/_podcast/building-and-scaling-ai-data-products-with-mlops.md +++ b/_podcast/building-and-scaling-ai-data-products-with-mlops.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/product-management-essentials-for-data-professionals/id1541710331?i=1000550093434 description: Build scalable data products with MLOps roadmaps, customer research and metric-driven templates - prioritize impact, reduce failures, and measure success -intro: How do you move from proofs-of-concept to scalable AI data products that deliver measurable business value? In this episode, Greg Coquillo, a Technology Manager at Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, walks through practical approaches for building and scaling data products, MLOps, customer research, and metrics.

We cover Greg’s transition into AI product work and the role of data product managers (internal vs. external), then dive into customer journey mapping, domain knowledge, and structured customer research—interview techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how to work backwards from business problems, contribute technical input to roadmaps with T‑shirt sizing, and prioritize MLOps by spotting unscalable manual processes. Greg outlines three‑year roadmap thinking (impact, effort, cost), a pragmatic Excel template (problems → solutions → metrics), and SMART and operational metrics like pipeline failures, SLAs, and data quality. He also addresses operating without a PM, aligning team mental models, and on‑the‑job product skill development.

Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer research, and defining success metrics for AI-driven data products +intro: How do you move from proofs-of-concept to scalable AI data products that deliver measurable business value? In this episode, Greg Coquillo, a Technology Manager at Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, walks through practical approaches for building and scaling data products, MLOps, customer research, and metrics.

We cover Greg’s transition into AI product work and the role of data product managers (internal vs. external), then dive into customer journey mapping, domain knowledge, and structured customer research—interview techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how to work backwards from business problems, contribute technical input to roadmaps with T-shirt sizing, and prioritize MLOps by spotting unscalable manual processes. Greg outlines three-year roadmap thinking (impact, effort, cost), a pragmatic Excel template (problems → solutions → metrics), and SMART and operational metrics like pipeline failures, SLAs, and data quality. He also addresses operating without a PM, aligning team mental models, and on-the-job product skill development.

Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer research, and defining success metrics for AI-driven data products dateadded: 2022-02-06 duration: PT00H59M41S @@ -54,11 +54,11 @@ quotableClips: startOffset: 1585 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1585 endOffset: 1733 -- name: 'Contributing to Roadmaps: Technical Input & T‑Shirt Sizing' +- name: 'Contributing to Roadmaps: Technical Input & T-Shirt Sizing' startOffset: 1733 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1733 endOffset: 1905 -- name: 'Working Backwards: Problem‑First Feature Design' +- name: 'Working Backwards: Problem-First Feature Design' startOffset: 1905 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1905 endOffset: 2134 @@ -70,7 +70,7 @@ quotableClips: startOffset: 2341 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2341 endOffset: 2504 -- name: 'Three‑Year Roadmap: Prioritization by Impact, Effort & Cost' +- name: 'Three-Year Roadmap: Prioritization by Impact, Effort & Cost' startOffset: 2504 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2504 endOffset: 2838 @@ -94,7 +94,7 @@ quotableClips: startOffset: 3464 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3464 endOffset: 3522 -- name: 'Career Advice: Learn Product Skills on the Job & Follow‑up Resources' +- name: 'Career Advice: Learn Product Skills on the Job & Follow-up Resources' startOffset: 3522 url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3522 endOffset: 3647 @@ -486,7 +486,7 @@ transcript: sec: 1685 time: '28:05' who: Alexey -- header: 'Contributing to Roadmaps: Technical Input & T‑Shirt Sizing' +- header: 'Contributing to Roadmaps: Technical Input & T-Shirt Sizing' - line: If you're a data professional, you're on the business team as a data analyst, a business analyst, or you're on the tech side, even an ML engineer or a data engineer, etc. Since product roadmaps are led by product managers, the best practice @@ -526,7 +526,7 @@ transcript: sec: 1881 time: '31:21' who: Greg -- header: 'Working Backwards: Problem‑First Feature Design' +- header: 'Working Backwards: Problem-First Feature Design' - line: I guess another thing – I'm not sure if we talked about this – but you mentioned it a few times that you should start with an end in mind. I guess this is something that we can also do. Let's say, when we discuss any feature, we can say “Let's @@ -665,7 +665,7 @@ transcript: sec: 2424 time: '40:24' who: Greg -- header: 'Three‑Year Roadmap: Prioritization by Impact, Effort & Cost' +- header: 'Three-Year Roadmap: Prioritization by Impact, Effort & Cost' - line: I think you said “driving the roadmap” multiple times. Let's say I work in a team that has a product manager, and a bunch of other people. So then “driving the roadmap” of this team means taking active part in discussions when we talk @@ -922,7 +922,7 @@ transcript: sec: 3495 time: '58:15' who: Greg -- header: 'Career Advice: Learn Product Skills on the Job & Follow‑up Resources' +- header: 'Career Advice: Learn Product Skills on the Job & Follow-up Resources' - line: Yeah. Okay. I think we should be wrapping up. Do you want to say anything before we finish? sec: 3522 diff --git a/_podcast/building-and-scaling-data-team.md b/_podcast/building-and-scaling-data-team.md index 2d1663c1..588d6324 100644 --- a/_podcast/building-and-scaling-data-team.md +++ b/_podcast/building-and-scaling-data-team.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/building-and-leading-data-teams-tammy-liang/id1541710331?i=1000537994433 description: 'Learn to build a scalable data team: hiring, production ML delivery, demand forecasting and driving adoption—practical staffing, stack, and governance tips.' -intro: How do you build and scale a data team that moves beyond dashboards to production ML, reliable forecasting, and real adoption across the business? In this episode Tammy Liang, Chief of Data at Platanomelón and co‑host of Data for Future, walks through her journey building data capabilities for marketing, e‑commerce, and operations at a mission‑driven consumer brand.

Tammy breaks down practical hiring decisions—why she hired an analyst first, then a data engineer, and why early senior hires matter—plus the tradeoffs between analyst, engineer, and business‑facing roles. She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, Notion) to enable forecasting and production ML, and describes common model delivery challenges moving work out of notebooks. The conversation also covers demand forecasting, time‑series and basic machine learning skills, data accuracy and governance, dbt tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building trust.

Listen to learn concrete steps for hiring a data team, setting up a data warehouse for forecasting, delivering models to production, and creating data products that stakeholders actually use +intro: How do you build and scale a data team that moves beyond dashboards to production ML, reliable forecasting, and real adoption across the business? In this episode Tammy Liang, Chief of Data at Platanomelón and co-host of Data for Future, walks through her journey building data capabilities for marketing, e-commerce, and operations at a mission-driven consumer brand.

Tammy breaks down practical hiring decisions—why she hired an analyst first, then a data engineer, and why early senior hires matter—plus the tradeoffs between analyst, engineer, and business-facing roles. She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, Notion) to enable forecasting and production ML, and describes common model delivery challenges moving work out of notebooks. The conversation also covers demand forecasting, time-series and basic machine learning skills, data accuracy and governance, dbt tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building trust.

Listen to learn concrete steps for hiring a data team, setting up a data warehouse for forecasting, delivering models to production, and creating data products that stakeholders actually use topics: - team building - data teams @@ -36,7 +36,7 @@ quotableClips: startOffset: 74 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=74 endOffset: 247 -- name: 'Chief of Data Responsibilities: Marketing, e‑commerce, and operations' +- name: 'Chief of Data Responsibilities: Marketing, e-commerce, and operations' startOffset: 247 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=247 endOffset: 404 @@ -49,7 +49,7 @@ quotableClips: startOffset: 442 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=442 endOffset: 531 -- name: 'Cross‑team Collaboration: Streamlining reporting and building trust' +- name: 'Cross-team Collaboration: Streamlining reporting and building trust' startOffset: 531 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=531 endOffset: 606 @@ -73,7 +73,7 @@ quotableClips: startOffset: 1031 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1031 endOffset: 1121 -- name: 'Business‑Facing Role: Hiring for adoption and communication' +- name: 'Business-Facing Role: Hiring for adoption and communication' startOffset: 1121 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1121 endOffset: 1352 @@ -97,7 +97,7 @@ quotableClips: startOffset: 1857 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1857 endOffset: 1989 -- name: 'First‑Hire Qualities: Business alignment and leadership mindset' +- name: 'First-Hire Qualities: Business alignment and leadership mindset' startOffset: 1989 url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1989 endOffset: 2138 @@ -193,7 +193,7 @@ transcript: sec: 157 time: '2:37' who: Tammy -- header: 'Chief of Data Responsibilities: Marketing, e‑commerce, and operations' +- header: 'Chief of Data Responsibilities: Marketing, e-commerce, and operations' - line: You started as the first data person that's really cool. What do you do as chief of data? sec: 247 @@ -275,7 +275,7 @@ transcript: sec: 508 time: '8:28' who: Alexey -- header: 'Cross‑team Collaboration: Streamlining reporting and building trust' +- header: 'Cross-team Collaboration: Streamlining reporting and building trust' - line: I would say that it's a little bit the other way around. It’s more about the management realizing, “Okay, we do have the need. Everyone is talking about data and for the company to grow further it’s maybe something we need.” But people @@ -436,7 +436,7 @@ transcript: sec: 1031 time: '17:11' who: Tammy -- header: 'Business‑Facing Role: Hiring for adoption and communication' +- header: 'Business-Facing Role: Hiring for adoption and communication' - line: As the data team we need to work a lot, because if we do not tell the team what we are producing, the tools are developed, but they just sit there and no one uses them. So we would just be wasting our time and energy. Therefore, the @@ -757,7 +757,7 @@ transcript: sec: 1930 time: '32:10' who: Tammy -- header: 'First‑Hire Qualities: Business alignment and leadership mindset' +- header: 'First-Hire Qualities: Business alignment and leadership mindset' - line: Yeah, thanks. I have a lot of questions that I didn't send you in the list of questions that I have prepared. So the question I have is, “What kind of qualities do you need to have as the first data person in a company? Should you be more diff --git a/_podcast/building-data-team.md b/_podcast/building-data-team.md index 2d8188df..790b88f3 100644 --- a/_podcast/building-data-team.md +++ b/_podcast/building-data-team.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/building-a-data-science-team-dat-tran/id1541710331?i=1000502061864 description: 'Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven AI for startups—scale with T-shaped engineers, ship robust models.' -intro: 'How do you build and scale an ML team that delivers product-driven AI without getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks through practical strategies for hiring, MLOps, and shaping data teams for startups.

Dat draws on a decade of production ML experience to unpack the MLOps mindset (day‑two operations, model maintenance), how to hire early (T‑shaped generalists, take‑home assessments, key hiring signals), and when to shift to specialists as you scale. He also explains product-centric practices: aligning hiring to prototype vs. MVP needs, prioritizing impact over technical perfection, and building human‑centric AI (augmenting pricing managers at Priceloop). Other topics include open research and open source as strategic advantages, bootstrapping data capabilities, retention through autonomy and interesting work, and educating leadership about realistic AI expectations.

Listen for actionable guidance on building ML teams, hiring machine learning engineers, and implementing MLOps and product-driven AI in early‑stage startups.' +intro: 'How do you build and scale an ML team that delivers product-driven AI without getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks through practical strategies for hiring, MLOps, and shaping data teams for startups.

Dat draws on a decade of production ML experience to unpack the MLOps mindset (day-two operations, model maintenance), how to hire early (T-shaped generalists, take-home assessments, key hiring signals), and when to shift to specialists as you scale. He also explains product-centric practices: aligning hiring to prototype vs. MVP needs, prioritizing impact over technical perfection, and building human-centric AI (augmenting pricing managers at Priceloop). Other topics include open research and open source as strategic advantages, bootstrapping data capabilities, retention through autonomy and interesting work, and educating leadership about realistic AI expectations.

Listen for actionable guidance on building ML teams, hiring machine learning engineers, and implementing MLOps and product-driven AI in early-stage startups.' topics: - leadership - team building @@ -52,7 +52,7 @@ quotableClips: startOffset: 486 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=486 endOffset: 560 -- name: 'MLOps Mindset: Day‑Two Operations and Model Maintenance' +- name: 'MLOps Mindset: Day-Two Operations and Model Maintenance' startOffset: 560 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=560 endOffset: 667 @@ -72,23 +72,23 @@ quotableClips: startOffset: 1158 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1158 endOffset: 1226 -- name: 'Founding Priceloop: Technical Co‑founder and Pricing Opportunity' +- name: 'Founding Priceloop: Technical Co-founder and Pricing Opportunity' startOffset: 1226 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1226 endOffset: 1399 -- name: 'Pricing Product Vision: White‑Box AI Framework for Dynamic Pricing' +- name: 'Pricing Product Vision: White-Box AI Framework for Dynamic Pricing' startOffset: 1399 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1399 endOffset: 1492 -- name: 'Human‑Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' +- name: 'Human-Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' startOffset: 1492 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1492 endOffset: 1525 -- name: 'Early‑Stage Hiring Plan: Building a Tactical Product Team' +- name: 'Early-Stage Hiring Plan: Building a Tactical Product Team' startOffset: 1525 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1525 endOffset: 1645 -- name: 'Open Research Strategy: Community, Open‑Source & Competitive Advantage' +- name: 'Open Research Strategy: Community, Open-Source & Competitive Advantage' startOffset: 1645 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1645 endOffset: 1737 @@ -96,19 +96,19 @@ quotableClips: startOffset: 1737 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1737 endOffset: 1780 -- name: 'Cross‑Functional Roles: ML Engineers, Data Engineers, PMs & Designers' +- name: 'Cross-Functional Roles: ML Engineers, Data Engineers, PMs & Designers' startOffset: 1780 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1780 endOffset: 1839 -- name: 'Generalists First: T‑Shaped Engineers for Early Startups' +- name: 'Generalists First: T-Shaped Engineers for Early Startups' startOffset: 1839 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1839 endOffset: 2015 -- name: 'Mid‑Stage Hiring: Shifting Toward Specialists as Maturity Grows' +- name: 'Mid-Stage Hiring: Shifting Toward Specialists as Maturity Grows' startOffset: 2015 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2015 endOffset: 2243 -- name: 'Product‑Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' +- name: 'Product-Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' startOffset: 2243 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2243 endOffset: 2371 @@ -120,11 +120,11 @@ quotableClips: startOffset: 2607 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2607 endOffset: 2851 -- name: 'Take‑Home Assessments: Code Quality, Naming, Consistency & Detail' +- name: 'Take-Home Assessments: Code Quality, Naming, Consistency & Detail' startOffset: 2851 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2851 endOffset: 2991 -- name: 'Project Prioritization: Impact vs Technical Feasibility & Fail‑Fast' +- name: 'Project Prioritization: Impact vs Technical Feasibility & Fail-Fast' startOffset: 2991 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2991 endOffset: 3152 @@ -144,7 +144,7 @@ quotableClips: startOffset: 3400 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3400 endOffset: 3619 -- name: Episode Wrap‑Up & Key Takeaways +- name: Episode Wrap-Up & Key Takeaways startOffset: 3619 url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3619 endOffset: 3524 @@ -270,7 +270,7 @@ transcript: sec: 517 time: '8:37' who: Dat -- header: 'MLOps Mindset: Day‑Two Operations and Model Maintenance' +- header: 'MLOps Mindset: Day-Two Operations and Model Maintenance' - line: 'I learned a lot about this. I devised my own ideas on how to make it happen. Because at the time, no one was really thinking about that. What I was thinking was: how do you create this fancy machine learning model? How do you do all the @@ -430,7 +430,7 @@ transcript: sec: 1198 time: '19:58' who: Dat -- header: 'Founding Priceloop: Technical Co‑founder and Pricing Opportunity' +- header: 'Founding Priceloop: Technical Co-founder and Pricing Opportunity' - line: Then I was talking to a few friends. One idea was “Okay, maybe you go back to Vietnam.” I'm not from Vietnam, I'm from Germany, but maybe go to Vietnam and go to a consultancy, because the tech is really strong there, and maybe an idea @@ -472,7 +472,7 @@ transcript: sec: 1346 time: '22:26' who: Dat -- header: 'Pricing Product Vision: White‑Box AI Framework for Dynamic Pricing' +- header: 'Pricing Product Vision: White-Box AI Framework for Dynamic Pricing' - line: As far as you know, there's many AI software systems out there, also for pricing. Most of these pricing servers are actually more closed solutions. You get the data from your client, and then you put it into your system – maybe you have a @@ -496,7 +496,7 @@ transcript: sec: 1460 time: '24:20' who: Dat -- header: 'Human‑Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' +- header: 'Human-Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' - line: We don't want to take away the pricing manager. We don't want to tell them “Hey, if you're going to use this, you don't need to hire a pricing manager or you can fire the pricing manager.” No, we want to give them a frame of a tool. @@ -506,7 +506,7 @@ transcript: sec: 1492 time: '24:52' who: Dat -- header: 'Early‑Stage Hiring Plan: Building a Tactical Product Team' +- header: 'Early-Stage Hiring Plan: Building a Tactical Product Team' - line: A long story. But very interesting. What stood out to me was, first of all, you mentioned Andrew Ng and his course on Coursera. I think so many people ended up where they are now, because of that course. Including myself. Yeah, it changed @@ -528,7 +528,7 @@ transcript: sec: 1598 time: '26:38' who: Dat -- header: 'Open Research Strategy: Community, Open‑Source & Competitive Advantage' +- header: 'Open Research Strategy: Community, Open-Source & Competitive Advantage' - line: Our goal is to create a strong tactical product team. Which focuses on disrupting one of the industries. We believe that the future is in open research, and contribution from outside and contributing into ideas for many, many different organizations. @@ -558,7 +558,7 @@ transcript: sec: 1737 time: '28:57' who: Dat -- header: 'Cross‑Functional Roles: ML Engineers, Data Engineers, PMs & Designers' +- header: 'Cross-Functional Roles: ML Engineers, Data Engineers, PMs & Designers' - line: But we just don't know, which features will lead to this kind of thing. We are hiring for different roles that would take us to that point to get a better understanding of our vision. We’re building like an open framework. Like a library. @@ -575,7 +575,7 @@ transcript: sec: 1812 time: '30:12' who: Dat -- header: 'Generalists First: T‑Shaped Engineers for Early Startups' +- header: 'Generalists First: T-Shaped Engineers for Early Startups' - line: There's a lot of roles that need you to think about before. In the beginning you also need to think about – do you need very experienced people or inexperienced people? Also generalists with specialists? This is the question that you really @@ -625,7 +625,7 @@ transcript: sec: 1983 time: '33:03' who: Dat -- header: 'Mid‑Stage Hiring: Shifting Toward Specialists as Maturity Grows' +- header: 'Mid-Stage Hiring: Shifting Toward Specialists as Maturity Grows' - line: If I would map it to Idealo. Idealo was not very mature, but also not completely immature. It was in the middle of this transformation. They had a data analyst before – they had business intelligence people – they also had data engineering @@ -692,7 +692,7 @@ transcript: sec: 2241 time: '37:21' who: Dat -- header: 'Product‑Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' +- header: 'Product-Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' - line: You mentioned a couple of things previously. And one thing that stood out to me was – you want to build a strong product team. What does that mean to you – a strong product team? @@ -861,7 +861,7 @@ transcript: sec: 2841 time: '47:21' who: Alexey -- header: 'Take‑Home Assessments: Code Quality, Naming, Consistency & Detail' +- header: 'Take-Home Assessments: Code Quality, Naming, Consistency & Detail' - line: The second interview is a homework assignment. I send out a homework, which is not very difficult. Then they send me the code, whether it is Jupyter Notebook or whatever. Then I check it. From this simple task, you could already see how @@ -903,7 +903,7 @@ transcript: sec: 2939 time: '48:59' who: Alexey -- header: 'Project Prioritization: Impact vs Technical Feasibility & Fail‑Fast' +- header: 'Project Prioritization: Impact vs Technical Feasibility & Fail-Fast' - line: This is always a very difficult question. It's risky. Let's say you have 100 projects. You have only limited resources, which means you need to pick the one that has the highest return on investment. What I do is – I have this matrix. @@ -1070,7 +1070,7 @@ transcript: sec: 3608 time: '60:08' who: Alexey -- header: Episode Wrap‑Up & Key Takeaways +- header: Episode Wrap-Up & Key Takeaways - line: Yeah. Thanks a lot for taking time to come here and share your knowledge with us and your expertise. Thanks a lot and thank you everyone for attending and you questions. And we will put the video out soon. And yeah – that’s all, I think. diff --git a/_podcast/building-domestic-risk-assessment-tool.md b/_podcast/building-domestic-risk-assessment-tool.md index b91edbe3..2bd37544 100644 --- a/_podcast/building-domestic-risk-assessment-tool.md +++ b/_podcast/building-domestic-risk-assessment-tool.md @@ -17,16 +17,16 @@ links: youtube: https://www.youtube.com/watch?v=CpWlBAmD9ok description: 'Discover building a domestic risk assessment: data cleaning, risk scoring models, and privacy compliance to improve triage, reduce bias, and ensure compliance.' -intro: 'How do you build an accurate, privacy‑compliant domestic risk assessment tool +intro: 'How do you build an accurate, privacy-compliant domestic risk assessment tool that frontline teams can actually use? In this episode Sabina Firtala — who leads Frontline’s AI product development and brings experience in data wrangling, model - validation, and applied analytics from finance, SaaS, and mission‑driven projects + validation, and applied analytics from finance, SaaS, and mission-driven projects — walks through a practical roadmap.

We cover problem framing and project - scope; sources like case management, public records, and surveys; and hands‑on data + scope; sources like case management, public records, and surveys; and hands-on data work: cleaning, linking, and feature engineering. Sabina explains risk scoring approaches and model architecture, evaluation metrics and bias assessment, plus privacy, ethical considerations, and legal data governance. You’ll also hear about deployment into - frontline workflows, user interface and decision‑support design, training and stakeholder + frontline workflows, user interface and decision-support design, training and stakeholder trust, ongoing monitoring and drift detection, and examples of impact on triage and resource allocation. The conversation closes with collaboration strategies, funding and scaling, open documentation for reproducibility, and concrete lessons diff --git a/_podcast/building-explainable-and-actionable-ai-ml-systems.md b/_podcast/building-explainable-and-actionable-ai-ml-systems.md index d5711eb0..80fe420c 100644 --- a/_podcast/building-explainable-and-actionable-ai-ml-systems.md +++ b/_podcast/building-explainable-and-actionable-ai-ml-systems.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=EQcY83VA0Us description: "Build trustworthy ML systems that drive business decisions through explainable AI, organizational trust theory, and actionable model deployment." -intro: "How do you build ML systems that business teams trust and can act on? In this episode, Polina Mosolova — a data scientist at SAP who completed an industrial PhD building end‑to‑end ML pipelines — demonstrates how to bridge research and production through explainable AI grounded in organizational trust theory. Drawing from her churn prediction research, Polina shows how the ABI framework (Ability, Benevolence, Integrity) transforms model explanations into actionable business interventions.

We explore the industrial PhD path as a vehicle for building trustworthy ML systems, covering the practical tensions of research and production deliverables, supervision dynamics, and how academic rigor enhances deployable models. Technical deep-dives include interpretability versus explainability versus actionable ML, model architecture choices (glass‑box models, GAMs, Neural Additive Models), explainability tooling (random forest + SHAP), computer vision activation maps, and why LLM explainability faces unique challenges compared to tabular models. The conversation ties together trust proxies, KPIs, and MLOps practices that make explanations business‑relevant.

Listen to learn a systematic approach for building ML systems where explanations drive decisions — essential for data scientists who need to deploy models that stakeholders understand, trust, and can act upon to achieve measurable business outcomes." +intro: "How do you build ML systems that business teams trust and can act on? In this episode, Polina Mosolova — a data scientist at SAP who completed an industrial PhD building end-to-end ML pipelines — demonstrates how to bridge research and production through explainable AI grounded in organizational trust theory. Drawing from her churn prediction research, Polina shows how the ABI framework (Ability, Benevolence, Integrity) transforms model explanations into actionable business interventions.

We explore the industrial PhD path as a vehicle for building trustworthy ML systems, covering the practical tensions of research and production deliverables, supervision dynamics, and how academic rigor enhances deployable models. Technical deep-dives include interpretability versus explainability versus actionable ML, model architecture choices (glass-box models, GAMs, Neural Additive Models), explainability tooling (random forest + SHAP), computer vision activation maps, and why LLM explainability faces unique challenges compared to tabular models. The conversation ties together trust proxies, KPIs, and MLOps practices that make explanations business-relevant.

Listen to learn a systematic approach for building ML systems where explanations drive decisions — essential for data scientists who need to deploy models that stakeholders understand, trust, and can act upon to achieve measurable business outcomes." topics: - machine learning - AI @@ -36,15 +36,15 @@ quotableClips: startOffset: 74 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=74 endOffset: 125 -- name: 'Career Journey: Industrial PhD to Full‑Stack Data Scientist at SAP' +- name: 'Career Journey: Industrial PhD to Full-Stack Data Scientist at SAP' startOffset: 125 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=125 endOffset: 439 -- name: 'Role Evolution: From Full‑Stack Data Scientist to MLOps Specialization' +- name: 'Role Evolution: From Full-Stack Data Scientist to MLOps Specialization' startOffset: 439 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=439 endOffset: 559 -- name: 'PhD Practice: Building End‑to‑End ML Pipelines During Doctoral Research' +- name: 'PhD Practice: Building End-to-End ML Pipelines During Doctoral Research' startOffset: 559 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=559 endOffset: 634 @@ -64,7 +64,7 @@ quotableClips: startOffset: 1077 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1077 endOffset: 1145 -- name: 'Research‑Industry Bridge: Academic Conferences and Summer Schools' +- name: 'Research-Industry Bridge: Academic Conferences and Summer Schools' startOffset: 1145 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1145 endOffset: 1237 @@ -100,7 +100,7 @@ quotableClips: startOffset: 2643 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2643 endOffset: 2842 -- name: 'Model Choices: Glass‑Box Models, Generalized Additive Models, Neural Additive +- name: 'Model Choices: Glass-Box Models, Generalized Additive Models, Neural Additive Models' startOffset: 2842 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2842 @@ -138,7 +138,7 @@ quotableClips: startOffset: 3629 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3629 endOffset: 3761 -- name: Episode Wrap‑Up and Closing Remarks +- name: Episode Wrap-Up and Closing Remarks startOffset: 3761 url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3761 endOffset: 3708 @@ -166,7 +166,7 @@ transcript: sec: 116 time: '1:56' who: Alexey -- header: 'Career Journey: Industrial PhD to Full‑Stack Data Scientist at SAP' +- header: 'Career Journey: Industrial PhD to Full-Stack Data Scientist at SAP' - line: So let's start. Before we go into our main topic of interpretable/explainable AI and ML, let's start with your background. Can you tell us about your career journey so far? @@ -210,7 +210,7 @@ transcript: sec: 418 time: '6:58' who: Polina -- header: 'Role Evolution: From Full‑Stack Data Scientist to MLOps Specialization' +- header: 'Role Evolution: From Full-Stack Data Scientist to MLOps Specialization' - line: Yeah, it's funny that you mentioned this full-stack data scientist term. When I first gave this talk like two or three years ago, it was a thing because the role of an ML engineer was not yet that developed. It wasn't that common. And @@ -244,7 +244,7 @@ transcript: sec: 499 time: '8:19' who: Polina -- header: 'PhD Practice: Building End‑to‑End ML Pipelines During Doctoral Research' +- header: 'PhD Practice: Building End-to-End ML Pipelines During Doctoral Research' - line: Is it a common situation when a PhD student actually needs to do everything end-to-end? Because I think it is, right? That's kind of the point. Or is there usually help? @@ -421,7 +421,7 @@ transcript: sec: 1126 time: '18:46' who: Alexey -- header: 'Research‑Industry Bridge: Academic Conferences and Summer Schools' +- header: 'Research-Industry Bridge: Academic Conferences and Summer Schools' - line: Day-to-day, I think it was just a data science project. Just the data science work that you can imagine – regular calls with stakeholders. I think that's not that much different from what every data scientist who has business facing roles @@ -910,7 +910,7 @@ transcript: sec: 2841 time: '47:21' who: Polina -- header: 'Model Choices: Glass‑Box Models, Generalized Additive Models, Neural Additive +- header: 'Model Choices: Glass-Box Models, Generalized Additive Models, Neural Additive Models' - line: And then would random forest plus SHAP values be a glass box model or black box? @@ -1200,7 +1200,7 @@ transcript: sec: 3748 time: '1:02:28' who: Polina -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: Okay. I think we should be wrapping up. Thanks a lot, Polina, for joining us today, for sharing your experience with us, for telling us about your experience doing a PhD, and your work. And thanks, everyone, for joining us today too, and diff --git a/_podcast/building-healthcare-machine-learning-systems.md b/_podcast/building-healthcare-machine-learning-systems.md index 616d3627..7aa89a23 100644 --- a/_podcast/building-healthcare-machine-learning-systems.md +++ b/_podcast/building-healthcare-machine-learning-systems.md @@ -53,11 +53,11 @@ quotableClips: startOffset: 165 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=165 endOffset: 283 -- name: 'Philips Healthcare Projects: C‑arm imaging and pregnancy monitoring' +- name: 'Philips Healthcare Projects: C-arm imaging and pregnancy monitoring' startOffset: 283 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=283 endOffset: 408 -- name: 'Low‑Resource Pediatric Monitoring: Vital‑sign system design for Malawi' +- name: 'Low-Resource Pediatric Monitoring: Vital-sign system design for Malawi' startOffset: 408 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=408 endOffset: 454 @@ -74,7 +74,7 @@ quotableClips: startOffset: 663 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=663 endOffset: 793 -- name: '3D Reconstruction Work: multi‑view geometry from C‑arm images' +- name: '3D Reconstruction Work: multi-view geometry from C-arm images' startOffset: 793 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=793 endOffset: 943 @@ -87,7 +87,7 @@ quotableClips: startOffset: 1085 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1085 endOffset: 1168 -- name: 'Ballistography Signal Research: denoising and U‑Net for infant heart rate +- name: 'Ballistography Signal Research: denoising and U-Net for infant heart rate estimation' startOffset: 1168 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1168 @@ -97,7 +97,7 @@ quotableClips: startOffset: 1309 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1309 endOffset: 1483 -- name: 'Patient Acuity Scoring: vitals‑based scoring poster' +- name: 'Patient Acuity Scoring: vitals-based scoring poster' startOffset: 1483 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1483 endOffset: 1523 @@ -113,7 +113,7 @@ quotableClips: startOffset: 1870 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1870 endOffset: 2034 -- name: 'Healthcare vs E‑commerce Data: offline events, timestamps, and higher risk' +- name: 'Healthcare vs E-commerce Data: offline events, timestamps, and higher risk' startOffset: 2034 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2034 endOffset: 2145 @@ -134,7 +134,7 @@ quotableClips: startOffset: 2792 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2792 endOffset: 3050 -- name: 'ML Deployment Constraints: on‑device vs cloud for low‑resource settings' +- name: 'ML Deployment Constraints: on-device vs cloud for low-resource settings' startOffset: 3050 url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3050 endOffset: 3165 @@ -260,7 +260,7 @@ transcript: sec: 262 time: '4:22' who: Elena -- header: 'Philips Healthcare Projects: C‑arm imaging and pregnancy monitoring' +- header: 'Philips Healthcare Projects: C-arm imaging and pregnancy monitoring' - line: Nice. Well, I have never heard about Philips Healthcare. What I know about Philips is that they produce lamps. I have a few smart lamps – Philips Hue. I also know that they produce trimmers (for shaving) but that's pretty much the @@ -288,7 +288,7 @@ transcript: sec: 380 time: '6:20' who: Alexey -- header: 'Low‑Resource Pediatric Monitoring: Vital‑sign system design for Malawi' +- header: 'Low-Resource Pediatric Monitoring: Vital-sign system design for Malawi' - line: I actually left my current company in July. I'm actually now on sabbatical. I worked for them for almost two years, focusing more on designing things like, “What is the future of data science in the company and how can we develop data @@ -387,7 +387,7 @@ transcript: sec: 753 time: '12:33' who: Alexey -- header: '3D Reconstruction Work: multi‑view geometry from C‑arm images' +- header: '3D Reconstruction Work: multi-view geometry from C-arm images' - line: Yeah. And there, I worked on a topic that was not really related to data science. They have this C-arm, which is in the shape of a C, like that [Elena shows the shape of the letter C with her hand] and on the top of this arm, there are four @@ -493,7 +493,7 @@ transcript: sec: 1162 time: '19:22' who: Alexey -- header: 'Ballistography Signal Research: denoising and U‑Net for infant heart rate +- header: 'Ballistography Signal Research: denoising and U-Net for infant heart rate estimation' - line: During the period when I was working for this company where we were working on vital sign monitoring systems in Africa, I was working on… It is called ballistography @@ -570,7 +570,7 @@ transcript: sec: 1446 time: '24:06' who: Alexey -- header: 'Patient Acuity Scoring: vitals‑based scoring poster' +- header: 'Patient Acuity Scoring: vitals-based scoring poster' - line: This is my first publication. But then I also published a poster that is not available online. It was about calculating a patient’s score based on the vitals of the patient. The main idea behind this was to have an overall assessment of @@ -714,7 +714,7 @@ transcript: sec: 2018 time: '33:38' who: Alexey -- header: 'Healthcare vs E‑commerce Data: offline events, timestamps, and higher risk' +- header: 'Healthcare vs E-commerce Data: offline events, timestamps, and higher risk' - line: I see that there is a comment from Sylvia. “Thanks, Eleni, for sharing your experience. How advanced and trusted is data science in healthcare compared to other sectors?” For example, I worked in e-commerce, and I think in e-commerce, @@ -974,7 +974,7 @@ transcript: sec: 2996 time: '49:56' who: Alexey -- header: 'ML Deployment Constraints: on‑device vs cloud for low‑resource settings' +- header: 'ML Deployment Constraints: on-device vs cloud for low-resource settings' - line: This is indeed, for the data engineer to just take the model that the data scientists create, and then deploy that on the machine so that it also works and aligns with the rest of the software there. Also, they take into account the restrictions diff --git a/_podcast/building-ml-communities-diversity-and-career-growth.md b/_podcast/building-ml-communities-diversity-and-career-growth.md index acb7f218..26c06c45 100644 --- a/_podcast/building-ml-communities-diversity-and-career-growth.md +++ b/_podcast/building-ml-communities-diversity-and-career-growth.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=SRUwwvk_YCk description: Discover how to build and scale a data science community, boost diversity, deploy ML, and accelerate career growth with mentoring & hiring strategies -intro: 'How do you build and scale a data science community that actually advances diversity, supports machine learning deployment, and accelerates career growth? In this episode, Dânia Meira — AI Guild co‑founder, data scientist, teacher and speaker with a Master’s in Computer Science (AI) — walks through her journey from applied math and marketing analytics to founding a global data science community in Berlin.

We cover practical community building: turning women’s meetups into monthly dinners and an international membership, curating meetup content and the Datalift Summit, and policies like visibility-first speaker invites, codes of conduct, and misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) improves product fit and market reach, how to create psychological safety, and how to source and train diverse talent for regulated industries. She also outlines a vendor‑agnostic consulting model for machine learning deployment, community‑to‑client matching, and scaling from a freelance network to full‑time teams.' +intro: 'How do you build and scale a data science community that actually advances diversity, supports machine learning deployment, and accelerates career growth? In this episode, Dânia Meira — AI Guild co-founder, data scientist, teacher and speaker with a Master’s in Computer Science (AI) — walks through her journey from applied math and marketing analytics to founding a global data science community in Berlin.

We cover practical community building: turning women’s meetups into monthly dinners and an international membership, curating meetup content and the Datalift Summit, and policies like visibility-first speaker invites, codes of conduct, and misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) improves product fit and market reach, how to create psychological safety, and how to source and train diverse talent for regulated industries. She also outlines a vendor-agnostic consulting model for machine learning deployment, community-to-client matching, and scaling from a freelance network to full-time teams.' topics: - data science - machine learning @@ -32,7 +32,7 @@ quotableClips: startOffset: 0 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=0 endOffset: 94 -- name: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' +- name: 'Guest Introduction: Dania — AI Guild co-founder, machine learning background' startOffset: 94 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=94 endOffset: 152 @@ -40,7 +40,7 @@ quotableClips: startOffset: 152 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=152 endOffset: 216 -- name: 'Move to Berlin: Startup roles and building end‑to‑end data skills' +- name: 'Move to Berlin: Startup roles and building end-to-end data skills' startOffset: 216 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=216 endOffset: 319 @@ -60,7 +60,7 @@ quotableClips: startOffset: 921 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=921 endOffset: 1005 -- name: 'Datalift Summit Origin: Organizing the first in‑person conference post‑COVID' +- name: 'Datalift Summit Origin: Organizing the first in-person conference post-COVID' startOffset: 1005 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1005 endOffset: 1191 @@ -85,7 +85,7 @@ quotableClips: startOffset: 1884 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1884 endOffset: 2023 -- name: 'Consulting Model: Vendor‑agnostic machine learning deployment support' +- name: 'Consulting Model: Vendor-agnostic machine learning deployment support' startOffset: 2023 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2023 endOffset: 2061 @@ -105,11 +105,11 @@ quotableClips: startOffset: 2736 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2736 endOffset: 2970 -- name: 'Responding to Misconduct: Reporting, case‑by‑case handling, and consequences' +- name: 'Responding to Misconduct: Reporting, case-by-case handling, and consequences' startOffset: 2970 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2970 endOffset: 3228 -- name: 'Community‑to‑Client Matching: Leveraging member expertise for projects' +- name: 'Community-to-Client Matching: Leveraging member expertise for projects' startOffset: 3228 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3228 endOffset: 3373 @@ -117,7 +117,7 @@ quotableClips: startOffset: 3373 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3373 endOffset: 3476 -- name: 'Scaling Strategy: Freelance network today, hiring full‑time as demand grows' +- name: 'Scaling Strategy: Freelance network today, hiring full-time as demand grows' startOffset: 3476 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3476 endOffset: 3551 @@ -129,14 +129,14 @@ quotableClips: startOffset: 3642 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3642 endOffset: 3678 -- name: Closing Remarks and Sign‑off +- name: Closing Remarks and Sign-off startOffset: 3678 url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3678 endOffset: 3584 transcript: - header: Podcast Introduction -- header: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' +- header: 'Guest Introduction: Dania — AI Guild co-founder, machine learning background' - line: This week, we'll talk about diversity and leadership in data science and AI. We have a special guest today, Dania. Dania is a co-founder and director at the AI Guild, where she works with companies scaling data analytics and machine learning. @@ -181,7 +181,7 @@ transcript: sec: 214 time: '3:34' who: Alexey -- header: 'Move to Berlin: Startup roles and building end‑to‑end data skills' +- header: 'Move to Berlin: Startup roles and building end-to-end data skills' - line: Yeah. At this point, I was finishing my Master's, and I was thinking about working abroad. It happened that by chance, I got approached via LinkedIn for a job as a data scientist in Berlin. It was a perfect match. They were looking @@ -418,7 +418,7 @@ transcript: sec: 963 time: '16:03' who: Alexey -- header: 'Datalift Summit Origin: Organizing the first in‑person conference post‑COVID' +- header: 'Datalift Summit Origin: Organizing the first in-person conference post-COVID' - line: Yes. There's a lot going on. Yes. The monthly dinners were how we started. We always had the idea to get people together in a bigger group. We have the local dinners, but what about one big event? One where everyone travels to Berlin, which @@ -728,7 +728,7 @@ transcript: sec: 1918 time: '31:58' who: Dania -- header: 'Consulting Model: Vendor‑agnostic machine learning deployment support' +- header: 'Consulting Model: Vendor-agnostic machine learning deployment support' - line: You told us the story of how the AI Guild started. You had these meetups, these sessions, where you wanted to connect with other women in the field, and then eventually it grew to dinners and then the Guild itself. This was probably @@ -992,7 +992,7 @@ transcript: sec: 2930 time: '48:50' who: Alexey -- header: 'Responding to Misconduct: Reporting, case‑by‑case handling, and consequences' +- header: 'Responding to Misconduct: Reporting, case-by-case handling, and consequences' - line: This is a big challenge. We were talking about this diversity aspect in terms of culture or nationality. In different countries, you have different behavior that is accepted or not accepted. That's what I mean with the hardline. Some things @@ -1085,7 +1085,7 @@ transcript: sec: 3210 time: '53:30' who: Dania -- header: 'Community‑to‑Client Matching: Leveraging member expertise for projects' +- header: 'Community-to-Client Matching: Leveraging member expertise for projects' - line: I wanted to talk a bit more about the Guild. Right now, you're a for-profit organization and you offer consulting. So how does it work? Companies approach you saying, “Hey, we want to deploy some models.” Help us.” Something like that? @@ -1173,7 +1173,7 @@ transcript: sec: 3430 time: '57:10' who: Dania -- header: 'Scaling Strategy: Freelance network today, hiring full‑time as demand grows' +- header: 'Scaling Strategy: Freelance network today, hiring full-time as demand grows' - line: I see an interesting question from Azif. “What if you have too many customers and cannot cope with the numbers because there are just two of you?” How do you do this? Do you start finding somebody in the community to delegate work to? @@ -1282,7 +1282,7 @@ transcript: sec: 3677 time: '1:01:17' who: Dania -- header: Closing Remarks and Sign‑off +- header: Closing Remarks and Sign-off - line: '[laughs] Okay. Thanks a lot. Thanks, everyone, for joining us today. Today is Friday, so everyone – have a great weekend.' sec: 3678 diff --git a/_podcast/building-mlops-startup.md b/_podcast/building-mlops-startup.md index aecba523..0b1b0d76 100644 --- a/_podcast/building-mlops-startup.md +++ b/_podcast/building-mlops-startup.md @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7fwbqo5tDrtakuqWaIuEjc apple: https://podcasts.apple.com/us/podcast/i-want-to-build-a-machine-learning-startup-elena-samuylova/id1541710331?i=1000529106923 -description: 'Discover practical MLOps, model monitoring and founder‑fit tactics to build an ML startup: hire, fund, productize, and reach product‑market fit faster.' -intro: 'What does it take to build a successful ML startup—especially around MLOps, model monitoring, open source, and founder fit? Elena Samuylova, Co‑founder & CEO of Evidently AI, joins to answer that question drawing on her applied machine learning experience since 2014, including roles at Yandex Data Factory and an industrial AI startup.

This episode walks through practical founder decisions: sourcing problem‑first ideas, finding compatible co‑founders and establishing pre‑launch alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena explains what “AI‑first” positioning really means, how developer tools and open source shape go‑to‑market strategies (open core, cloud, monetization and cloning risks), and how Evidently validated model monitoring as a business. You’ll hear tactical guidance on customer discovery, persuading engineers to adopt your tool, data safety and on‑prem deployments, hiring and scaling tradeoffs, funding paths, productizing services for non‑technical founders, and normalizing failure and work–life tradeoffs.

Listen to gain actionable frameworks for building an ML startup—covering model monitoring, MLOps, open source strategy, founder‑market fit, and the concrete signals that indicate product–market fit.' +description: 'Discover practical MLOps, model monitoring and founder-fit tactics to build an ML startup: hire, fund, productize, and reach product-market fit faster.' +intro: 'What does it take to build a successful ML startup—especially around MLOps, model monitoring, open source, and founder fit? Elena Samuylova, Co-founder & CEO of Evidently AI, joins to answer that question drawing on her applied machine learning experience since 2014, including roles at Yandex Data Factory and an industrial AI startup.

This episode walks through practical founder decisions: sourcing problem-first ideas, finding compatible co-founders and establishing pre-launch alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena explains what “AI-first” positioning really means, how developer tools and open source shape go-to-market strategies (open core, cloud, monetization and cloning risks), and how Evidently validated model monitoring as a business. You’ll hear tactical guidance on customer discovery, persuading engineers to adopt your tool, data safety and on-prem deployments, hiring and scaling tradeoffs, funding paths, productizing services for non-technical founders, and normalizing failure and work–life tradeoffs.

Listen to gain actionable frameworks for building an ML startup—covering model monitoring, MLOps, open source strategy, founder-market fit, and the concrete signals that indicate product–market fit.' topics: - startup - machine learning @@ -69,7 +69,7 @@ quotableClips: startOffset: 1473 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1473 endOffset: 1581 -- name: 'Founder Skills: Self‑Starter Mindset and Learning Agility' +- name: 'Founder Skills: Self-Starter Mindset and Learning Agility' startOffset: 1581 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1581 endOffset: 1697 @@ -85,7 +85,7 @@ quotableClips: startOffset: 1967 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1967 endOffset: 2046 -- name: 'Part‑Time Startups: Weekend MVPs, Bootstrapping, and Grants' +- name: 'Part-Time Startups: Weekend MVPs, Bootstrapping, and Grants' startOffset: 2046 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2046 endOffset: 2147 @@ -93,7 +93,7 @@ quotableClips: startOffset: 2147 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2147 endOffset: 2288 -- name: 'Non‑Technical Founders: No-Code MVPs and Productizing Services' +- name: 'Non-Technical Founders: No-Code MVPs and Productizing Services' startOffset: 2288 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2288 endOffset: 2365 @@ -125,7 +125,7 @@ quotableClips: startOffset: 2969 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2969 endOffset: 3108 -- name: 'Bottom‑Up Adoption: Engineers First, Enterprise Later' +- name: 'Bottom-Up Adoption: Engineers First, Enterprise Later' startOffset: 3108 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3108 endOffset: 3189 @@ -137,7 +137,7 @@ quotableClips: startOffset: 3237 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3237 endOffset: 3377 -- name: 'Data Safety Options: On‑Premise Deployments with Open Source' +- name: 'Data Safety Options: On-Premise Deployments with Open Source' startOffset: 3377 url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3377 endOffset: 3426 @@ -723,7 +723,7 @@ transcript: sec: 1571 time: '26:11' who: Elena -- header: 'Founder Skills: Self‑Starter Mindset and Learning Agility' +- header: 'Founder Skills: Self-Starter Mindset and Learning Agility' - line: What kind of skills do I need to start a startup? sec: 1581 time: '26:21' @@ -893,7 +893,7 @@ transcript: sec: 2008 time: '33:28' who: Elena -- header: 'Part‑Time Startups: Weekend MVPs, Bootstrapping, and Grants' +- header: 'Part-Time Startups: Weekend MVPs, Bootstrapping, and Grants' - line: We already started getting questions. You already mentioned that when you start a startup, you don't necessarily have to bootstrap yourself – meaning that you don’t have to live off of your savings. You can do a startup while still working. @@ -982,7 +982,7 @@ transcript: sec: 2276 time: '37:56' who: Elena -- header: 'Non‑Technical Founders: No-Code MVPs and Productizing Services' +- header: 'Non-Technical Founders: No-Code MVPs and Productizing Services' - line: We talked about a situation where “I'm a technical person, and I want to start a startup, this is what I do.” What if I'm not super-technical? I also don't have any resources to develop something, what do I do? @@ -1312,7 +1312,7 @@ transcript: sec: 3082 time: '51:22' who: Elena -- header: 'Bottom‑Up Adoption: Engineers First, Enterprise Later' +- header: 'Bottom-Up Adoption: Engineers First, Enterprise Later' - line: I guess in case of open source, what can happen is that the engineers and data scientists find your library, start using it, and then it reaches the management. The management sees it and then you sell the company your enterprise offer, right? @@ -1405,7 +1405,7 @@ transcript: sec: 3334 time: '55:34' who: Elena -- header: 'Data Safety Options: On‑Premise Deployments with Open Source' +- header: 'Data Safety Options: On-Premise Deployments with Open Source' - line: I think when Emely did a presentation a while ago at DataTalks.Club, one of the questions was “Hey, I'm a bit concerned about my data going to Russia.” What you answered was “Hey, it's open source. You don't have to be concerned. You just diff --git a/_podcast/building-open-source-data-product-for-identity-resolution.md b/_podcast/building-open-source-data-product-for-identity-resolution.md index ae943ac9..7cb333b5 100644 --- a/_podcast/building-open-source-data-product-for-identity-resolution.md +++ b/_podcast/building-open-source-data-product-for-identity-resolution.md @@ -138,7 +138,7 @@ quotableClips: startOffset: 3020 url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3020 endOffset: 3099 -- name: 'Impact Case Studies: Public-data donors, e‑commerce and classifieds' +- name: 'Impact Case Studies: Public-data donors, e-commerce and classifieds' startOffset: 3099 url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3099 endOffset: 3251 @@ -1092,7 +1092,7 @@ transcript: sec: 3098 time: '51:38' who: Sonal -- header: 'Impact Case Studies: Public-data donors, e‑commerce and classifieds' +- header: 'Impact Case Studies: Public-data donors, e-commerce and classifieds' - line: Okay. Another interesting question is about some success stories of implementing identity resolution in products. Maybe I can start with fraud detection. We didn't use Zingg for that at OLX, but there is a nice article at OLX’s tech blog (tech.OLX.com) diff --git a/_podcast/building-production-ml-platform-and-mlops-team.md b/_podcast/building-production-ml-platform-and-mlops-team.md index 8b7c93dd..242dabd3 100644 --- a/_podcast/building-production-ml-platform-and-mlops-team.md +++ b/_podcast/building-production-ml-platform-and-mlops-team.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=CB1YIsxQRtc description: Discover MLOps strategies to build an ML platform with experiment tracking, improved reproducibility, faster releases and compliance-ready model operations -intro: How do you design an ML platform that reliably deploys models, tracks experiments, and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining & Data Warehousing — walks through practical MLOps platform design grounded in real-world deployment challenges.

We cover a clear definition of MLOps as people, processes, and technology, and dig into core platform skills (cloud infrastructure, Kubernetes, Terraform), user‑centric design for notebooks and data science workflows, and software engineering fundamentals for production ML. Simon explains experiment tracking, model registry practices, deployment patterns (batch vs online), orchestration choices like Airflow, and stitching SaaS and open‑source tools into a coherent ML platform. The episode also addresses compliance and data governance — GDPR, fintech security constraints — plus metadata, lineage, API design, and monitoring. We close with build vs buy trade‑offs, staffing and on‑call considerations, and how emerging LLM needs affect platforms.

Listen to learn concrete guidance on model deployment, reproducibility, orchestration, and compliance to help you design a pragmatic, scalable ML platform +intro: How do you design an ML platform that reliably deploys models, tracks experiments, and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining & Data Warehousing — walks through practical MLOps platform design grounded in real-world deployment challenges.

We cover a clear definition of MLOps as people, processes, and technology, and dig into core platform skills (cloud infrastructure, Kubernetes, Terraform), user-centric design for notebooks and data science workflows, and software engineering fundamentals for production ML. Simon explains experiment tracking, model registry practices, deployment patterns (batch vs online), orchestration choices like Airflow, and stitching SaaS and open-source tools into a coherent ML platform. The episode also addresses compliance and data governance — GDPR, fintech security constraints — plus metadata, lineage, API design, and monitoring. We close with build vs buy trade-offs, staffing and on-call considerations, and how emerging LLM needs affect platforms.

Listen to learn concrete guidance on model deployment, reproducibility, orchestration, and compliance to help you design a pragmatic, scalable ML platform topics: - MLOps - machine learning @@ -59,7 +59,7 @@ quotableClips: startOffset: 830 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=830 endOffset: 934 -- name: 'Team Size & On‑Call: Staffing and operational considerations' +- name: 'Team Size & On-Call: Staffing and operational considerations' startOffset: 934 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=934 endOffset: 1012 @@ -79,11 +79,11 @@ quotableClips: startOffset: 1263 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1263 endOffset: 1700 -- name: 'Self‑Service Compute: Notebooks, BigQuery, Databricks provisioning' +- name: 'Self-Service Compute: Notebooks, BigQuery, Databricks provisioning' startOffset: 1700 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1700 endOffset: 1781 -- name: 'Experiment Tracking: Low‑hanging fruit for reproducibility and collaboration' +- name: 'Experiment Tracking: Low-hanging fruit for reproducibility and collaboration' startOffset: 1781 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1781 endOffset: 1832 @@ -144,7 +144,7 @@ quotableClips: startOffset: 3452 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3452 endOffset: 3579 -- name: Episode Wrap‑Up and Closing Remarks +- name: Episode Wrap-Up and Closing Remarks startOffset: 3579 url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3579 endOffset: 3522 @@ -448,7 +448,7 @@ transcript: sec: 902 time: '15:02' who: Simon -- header: 'Team Size & On‑Call: Staffing and operational considerations' +- header: 'Team Size & On-Call: Staffing and operational considerations' - line: How many people should there be? At least two? sec: 934 time: '15:34' @@ -664,7 +664,7 @@ transcript: sec: 1679 time: '27:59' who: Alexey -- header: 'Self‑Service Compute: Notebooks, BigQuery, Databricks provisioning' +- header: 'Self-Service Compute: Notebooks, BigQuery, Databricks provisioning' - line: It could be, let's say, GCP of BigQuery and then you have some Colab notebook and you authenticate to BigQuery, write your SQL query, and the notebook pulls in your data. That would be an exploratory setup. Of course, you want to have @@ -698,7 +698,7 @@ transcript: sec: 1774 time: '29:34' who: Simon -- header: 'Experiment Tracking: Low‑hanging fruit for reproducibility and collaboration' +- header: 'Experiment Tracking: Low-hanging fruit for reproducibility and collaboration' - line: Okay, so that's the data exploration part, where we pull the data, we explore, and we see what we can actually do with this data. The second step is, once we did the initial exploration, we train and evaluate models. Then you mentioned @@ -1265,7 +1265,7 @@ transcript: sec: 3469 time: '57:49' who: Simon -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: Yeah, thank you, Simon. Thanks a lot, everyone, for joining us today. Thanks, Simon, for joining us today too, and sharing all your expertise. That's all we have for now. Enjoy the rest of your day and the rest of the week. See you soon. diff --git a/_podcast/causal-inference-for-machine-learning.md b/_podcast/causal-inference-for-machine-learning.md index b59937be..cae678ba 100644 --- a/_podcast/causal-inference-for-machine-learning.md +++ b/_podcast/causal-inference-for-machine-learning.md @@ -61,7 +61,7 @@ quotableClips: startOffset: 1095 url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1095 endOffset: 1282 -- name: 'Meta-learners overview: T‑learner and counterfactual estimation' +- name: 'Meta-learners overview: T-learner and counterfactual estimation' startOffset: 1282 url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1282 endOffset: 1464 @@ -487,7 +487,7 @@ transcript: sec: 1244 time: '20:44' who: Alexey -- header: 'Meta-learners overview: T‑learner and counterfactual estimation' +- header: 'Meta-learners overview: T-learner and counterfactual estimation' - line: That's a great question. You are correct. Out of the box, supervised models do not have the capabilities to reason causally and there are many different types of causal models. But the one that I think is relatively the easiest to to grasp, diff --git a/_podcast/community-building-and-teaching-in-ai-tech.md b/_podcast/community-building-and-teaching-in-ai-tech.md index c25872f7..1559f5fa 100644 --- a/_podcast/community-building-and-teaching-in-ai-tech.md +++ b/_podcast/community-building-and-teaching-in-ai-tech.md @@ -52,7 +52,7 @@ quotableClips: startOffset: 303 url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=303 endOffset: 364 -- name: 'Omdena Projects: Global collaborators solving real‑world AI problems' +- name: 'Omdena Projects: Global collaborators solving real-world AI problems' startOffset: 364 url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=364 endOffset: 619 @@ -133,7 +133,7 @@ quotableClips: startOffset: 3203 url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3203 endOffset: 3289 -- name: 'Access & Scholarship Resources: Courses, GitHub projects, and women‑focused +- name: 'Access & Scholarship Resources: Courses, GitHub projects, and women-focused support' startOffset: 3289 url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3289 @@ -237,7 +237,7 @@ transcript: sec: 328 time: '5:28' who: Alexey -- header: 'Omdena Projects: Global collaborators solving real‑world AI problems' +- header: 'Omdena Projects: Global collaborators solving real-world AI problems' - line: Omdena is a global community that started in 2019. Every participant is called a "collaborator." We solve real-world challenges with AI. We started with a mental assistive application in 2019, then projects like finding anomalies on Mars. Our @@ -649,7 +649,7 @@ transcript: sec: 3269 time: '54:29' who: Alexey -- header: 'Access & Scholarship Resources: Courses, GitHub projects, and women‑focused +- header: 'Access & Scholarship Resources: Courses, GitHub projects, and women-focused support' - line: Join AI communities. There are courses available, and sometimes scholarships on platforms like Coursera. Learning through projects on GitHub is also valuable. @@ -689,14 +689,14 @@ transcript: time: '58:28' who: Alexey context: 'Context: This episode follows Erum Afzal and Omdena Academy’s evolution—how - global, project‑based AI collaborations and community organizing were systematized + global, project-based AI collaborations and community organizing were systematized into accessible, tiered courses and local chapters to teach practical, ethical AI skills. - Core theme: The unifying idea is that democratizing real‑world AI expertise requires - a community‑first, project‑to‑course approach—turning collaborative problem‑solving - into structured learning pathways, open instructor pipelines, regional sub‑communities, - and integrity‑focused practices so diverse learners can rapidly gain practical skills, + Core theme: The unifying idea is that democratizing real-world AI expertise requires + a community-first, project-to-course approach—turning collaborative problem-solving + into structured learning pathways, open instructor pipelines, regional sub-communities, + and integrity-focused practices so diverse learners can rapidly gain practical skills, leadership opportunities, and ethical career pathways in AI.' --- Links: diff --git a/_podcast/data-engineering-leadership-and-modern-data-platforms.md b/_podcast/data-engineering-leadership-and-modern-data-platforms.md index 4ae9bf24..a50fc5e1 100644 --- a/_podcast/data-engineering-leadership-and-modern-data-platforms.md +++ b/_podcast/data-engineering-leadership-and-modern-data-platforms.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=FljnbUQ796w description: Learn to scale ETL to ELT and build resilient data platforms—gain leadership skills, stakeholder management, data quality metrics and hiring tips -intro: 'How do you lead a data engineering team to scale ETL into ELT, build a robust data platform, and maintain data quality as you grow? In this episode, Rahul Jain — a data engineering manager at Siemens with 12+ years in data and three years in management — walks through that transition from ETL developer to IoT data platform lead and what leadership looks like in practice.

We cover practical topics like migrating ETL to ELT architectures, data lake and data lineage design, and end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul discusses stakeholder management, prioritization, hands-on technical credibility, balancing individual contributor work with people management, and onboarding strategies to build trust and delegate effectively. He shares approaches for measuring success (data culture, consumers served, data quality), detecting data reconciliation issues, GDPR tactics like dynamic data masking and role‑based access, and how to evaluate new tools (example: Prefect). Hiring, interview screening, and essential skills (SQL, Python, CI/CD, cloud) are also explored.

Listen to gain concrete leadership and technical guidance for scaling data platforms, improving throughput, and enabling your team to deliver reliable, compliant data products.' +intro: 'How do you lead a data engineering team to scale ETL into ELT, build a robust data platform, and maintain data quality as you grow? In this episode, Rahul Jain — a data engineering manager at Siemens with 12+ years in data and three years in management — walks through that transition from ETL developer to IoT data platform lead and what leadership looks like in practice.

We cover practical topics like migrating ETL to ELT architectures, data lake and data lineage design, and end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul discusses stakeholder management, prioritization, hands-on technical credibility, balancing individual contributor work with people management, and onboarding strategies to build trust and delegate effectively. He shares approaches for measuring success (data culture, consumers served, data quality), detecting data reconciliation issues, GDPR tactics like dynamic data masking and role-based access, and how to evaluate new tools (example: Prefect). Hiring, interview screening, and essential skills (SQL, Python, CI/CD, cloud) are also explored.

Listen to gain concrete leadership and technical guidance for scaling data platforms, improving throughput, and enabling your team to deliver reliable, compliant data products.' topics: - data engineering - career growth @@ -66,7 +66,7 @@ quotableClips: startOffset: 992 url: https://www.youtube.com/watch?v=FljnbUQ796w&t=992 endOffset: 1395 -- name: 'Expectation Framework: Non‑Negotiable Deliverables vs. Stretch (Aspirational) +- name: 'Expectation Framework: Non-Negotiable Deliverables vs. Stretch (Aspirational) Goals' startOffset: 1395 url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1395 @@ -531,7 +531,7 @@ transcript: sec: 1375 time: '22:55' who: Alexey -- header: 'Expectation Framework: Non‑Negotiable Deliverables vs. Stretch (Aspirational) +- header: 'Expectation Framework: Non-Negotiable Deliverables vs. Stretch (Aspirational) Goals' - line: Yeah, it took quite some time to build the framework to set this because the nature of businesses changes very dynamically and you will have the requirements diff --git a/_podcast/data-engineering-tools-modern-data-stack.md b/_podcast/data-engineering-tools-modern-data-stack.md index 3c729d91..cdde41b0 100644 --- a/_podcast/data-engineering-tools-modern-data-stack.md +++ b/_podcast/data-engineering-tools-modern-data-stack.md @@ -94,11 +94,11 @@ quotableClips: startOffset: 1859 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1859 endOffset: 1891 -- name: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' +- name: 'Airbyte’s Role in the Stack: Reliable E-L and DBT Integration' startOffset: 1891 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1891 endOffset: 2025 -- name: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' +- name: 'Modern Analytics Stack: Best-of-Breed Tools and Typical Components' startOffset: 2025 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2025 endOffset: 2142 @@ -106,7 +106,7 @@ quotableClips: startOffset: 2142 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2142 endOffset: 2346 -- name: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing Them' +- name: 'Low-Code/No-Code Tools: Evolving Data Engineering Roles, Not Replacing Them' startOffset: 2346 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2346 endOffset: 2490 @@ -126,7 +126,7 @@ quotableClips: startOffset: 2759 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2759 endOffset: 2906 -- name: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' +- name: 'Open-Source Risks: Competition and Licensing (Elasticsearch Example)' startOffset: 2906 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2906 endOffset: 2938 @@ -138,7 +138,7 @@ quotableClips: startOffset: 2972 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2972 endOffset: 3642 -- name: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' +- name: 'Episode Wrap-Up: Final Thoughts, Hiring News, and Contact Information' startOffset: 3642 url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=3642 endOffset: 3595 @@ -976,7 +976,7 @@ transcript: sec: 1872 time: '31:12' who: Natalie -- header: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' +- header: 'Airbyte’s Role in the Stack: Reliable E-L and DBT Integration' - line: I think you mentioned at the beginning what Airbyte does – it's about transformation, right? It's about ingesting and then putting it into a data warehouse. Maybe now we can try to make sense from all these buzzwords. We know what the transformation @@ -984,7 +984,7 @@ transcript: about putting something into a data warehouse. Then a data warehouse is basically the database that we use for all these analytical purposes. So yeah, maybe you can tell us now what Airbyte does? -- header: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' +- header: 'Airbyte’s Role in the Stack: Reliable E-L and DBT Integration' - line: I think you mentioned at the beginning what Airbyte does – it's about transformation, right? It's about ingesting and then putting it into a data warehouse. Maybe now we can try to make sense from all these buzzwords. We know what the transformation @@ -1037,14 +1037,14 @@ transcript: sec: 2013 time: '33:33' who: Natalie -- header: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' +- header: 'Modern Analytics Stack: Best-of-Breed Tools and Typical Components' - line: Yeah. So speaking of this modern stack, I've heard this term many times and actually we have a talk about this quite soon. It's about this modern stack for analytics. Actually the talk we have is “modern data stack for analytics engineering.” I don't know if there are different stacks for analytics and for analytics engineering – probably they’re the same. So, what is it? Can you tell us a bit about it? Which tools are a part of this stack? Why do we even talk about it? Why is it a thing? -- header: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' +- header: 'Modern Analytics Stack: Best-of-Breed Tools and Typical Components' - line: Yeah. So speaking of this modern stack, I've heard this term many times and actually we have a talk about this quite soon. It's about this modern stack for analytics. Actually the talk we have is “modern data stack for analytics engineering.” @@ -1177,7 +1177,7 @@ transcript: sec: 2316 time: '38:36' who: Natalie -- header: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing +- header: 'Low-Code/No-Code Tools: Evolving Data Engineering Roles, Not Replacing Them' - line: 'To make sure I understood the whole picture: we have some of these tools like Google AdWords – all these systems, like Google AdWords, or Facebook Ads, @@ -1185,7 +1185,7 @@ transcript: our data warehouse or ingest. We import and then we do something and then we export back, right? Or using the terminology we just learned, we first extract, then do something, and then we do this reverse extract, and then put that back.' -- header: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing +- header: 'Low-Code/No-Code Tools: Evolving Data Engineering Roles, Not Replacing Them' - line: 'To make sure I understood the whole picture: we have some of these tools like Google AdWords – all these systems, like Google AdWords, or Facebook Ads, @@ -1411,13 +1411,13 @@ transcript: sec: 2846 time: '47:26' who: Alexey -- header: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' +- header: 'Open-Source Risks: Competition and Licensing (Elasticsearch Example)' - line: Yeah, exactly. It’s essentially a performance consideration. It also allows you to capture deleted rows. So that's another benefit as well. I think that we don't offer it on all of our data warehouse sources yet. But we are actively working on building out CDC capabilities for all the sources that essentially allow for that. -- header: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' +- header: 'Open-Source Risks: Competition and Licensing (Elasticsearch Example)' - line: Yeah, exactly. It’s essentially a performance consideration. It also allows you to capture deleted rows. So that's another benefit as well. I think that we don't offer it on all of our data warehouse sources yet. But we are actively working @@ -1702,7 +1702,7 @@ transcript: sec: 3636 time: '1:00:36' who: Alexey -- header: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' +- header: 'Episode Wrap-Up: Final Thoughts, Hiring News, and Contact Information' - line: It was such a pleasure to be on this, talking about these acronyms. I hope it helped some of your listeners get more clarity. Airbyte – check us out. We are also hiring on a lot of different fronts. Not just on the engineering front, @@ -1710,7 +1710,7 @@ transcript: gets listed on our company docs page – very public. If you want to contribute back or check us out, you can do that very easily. All the information is on our website. -- header: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' +- header: 'Episode Wrap-Up: Final Thoughts, Hiring News, and Contact Information' - line: It was such a pleasure to be on this, talking about these acronyms. I hope it helped some of your listeners get more clarity. Airbyte – check us out. We are also hiring on a lot of different fronts. Not just on the engineering front, diff --git a/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md index 252b1a6d..56fd9648 100644 --- a/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md +++ b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md @@ -682,9 +682,9 @@ context: 'Context: Through Dimitri’s journey and practical segments on job dat Core narrative: The unifying idea is that a sustainable, scalable freelance data career is built by starting with market demand—validate financial targets, specialize - around high‑impact problems, productize repeatable analytics offerings, leverage + around high-impact problems, productize repeatable analytics offerings, leverage productivity tools (including AI) to deliver efficiently, and adopt deliberate pricing - and client‑retention models (projects, subscriptions, or agency paths) so you can + and client-retention models (projects, subscriptions, or agency paths) so you can reliably land clients, capture value, and grow on your own terms.' --- Links: diff --git a/_podcast/data-privacy-engineering-gdpr-machine-learning.md b/_podcast/data-privacy-engineering-gdpr-machine-learning.md index 420c7a80..b289b6fd 100644 --- a/_podcast/data-privacy-engineering-gdpr-machine-learning.md +++ b/_podcast/data-privacy-engineering-gdpr-machine-learning.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=gbjoFfrm4iw description: Discover differential privacy, federated learning and PETs - privacy engineering, consent UX fixes and compliance to reduce re-identification risk -intro: 'How can teams build useful machine learning while respecting user privacy, compliance, and re‑identification risk? In this episode, Katharine Jarmul — privacy activist and Principal Data Scientist at ThoughtWorks Germany — walks through a practical Data Privacy Playbook focused on differential privacy, federated learning, privacy‑enhancing technologies (PETs) and consent UX.

Katharine draws on a career from data journalism and NLP to startup work at KI Protect and enterprise ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies for pseudonymisation, encrypted ML and federated architectures. We cover consent and opt‑out UX, legal vs technical definitions of privacy, profiling and fingerprinting risks, and privacy‑friendly personalization like session‑based intent and ephemeral inference.

You’ll get concrete takeaways: why differential privacy matters (formal definition, use cases, Tumult and other libraries), common anonymization pitfalls (hashing, k‑anonymity, Netflix lessons), how PETs fit into system design, and generative AI privacy considerations including retention and localized model deployment. Listeners leave with actionable guidance on privacy engineering, data minimization, consent design, and resources to continue learning.' +intro: 'How can teams build useful machine learning while respecting user privacy, compliance, and re-identification risk? In this episode, Katharine Jarmul — privacy activist and Principal Data Scientist at ThoughtWorks Germany — walks through a practical Data Privacy Playbook focused on differential privacy, federated learning, privacy-enhancing technologies (PETs) and consent UX.

Katharine draws on a career from data journalism and NLP to startup work at KI Protect and enterprise ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies for pseudonymisation, encrypted ML and federated architectures. We cover consent and opt-out UX, legal vs technical definitions of privacy, profiling and fingerprinting risks, and privacy-friendly personalization like session-based intent and ephemeral inference.

You’ll get concrete takeaways: why differential privacy matters (formal definition, use cases, Tumult and other libraries), common anonymization pitfalls (hashing, k-anonymity, Netflix lessons), how PETs fit into system design, and generative AI privacy considerations including retention and localized model deployment. Listeners leave with actionable guidance on privacy engineering, data minimization, consent design, and resources to continue learning.' topics: - data governance - data privacy @@ -48,7 +48,7 @@ quotableClips: startOffset: 693 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=693 endOffset: 875 -- name: 'Cookie Consent & Opt‑Out UX: one‑click rejects and user behavior' +- name: 'Cookie Consent & Opt-Out UX: one-click rejects and user behavior' startOffset: 875 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=875 endOffset: 984 @@ -64,12 +64,12 @@ quotableClips: startOffset: 1358 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1358 endOffset: 1512 -- name: 'User Profiling & Fingerprinting: browser history, apps, and re‑identification +- name: 'User Profiling & Fingerprinting: browser history, apps, and re-identification risks' startOffset: 1512 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1512 endOffset: 1815 -- name: 'Privacy‑Friendly Personalization: session‑based intent and ephemeral inference' +- name: 'Privacy-Friendly Personalization: session-based intent and ephemeral inference' startOffset: 1815 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1815 endOffset: 1988 @@ -86,7 +86,7 @@ quotableClips: startOffset: 2450 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2450 endOffset: 2708 -- name: 'Anonymization Pitfalls: hashing, k‑anonymity, Netflix de‑anonymization lessons' +- name: 'Anonymization Pitfalls: hashing, k-anonymity, Netflix de-anonymization lessons' startOffset: 2708 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2708 endOffset: 2820 @@ -99,7 +99,7 @@ quotableClips: startOffset: 3155 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3155 endOffset: 3569 -- name: 'Deploying Localized Models: Azure localization, fine‑tuning, and ownership' +- name: 'Deploying Localized Models: Azure localization, fine-tuning, and ownership' startOffset: 3569 url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3569 endOffset: 3675 @@ -403,7 +403,7 @@ transcript: sec: 872 time: '14:32' who: Alexey -- header: 'Cookie Consent & Opt‑Out UX: one‑click rejects and user behavior' +- header: 'Cookie Consent & Opt-Out UX: one-click rejects and user behavior' - line: Because I think I've seen how the collected data gets shared and used in advertising optimization and I'm not a big fan of personalized advertising myself. I find it to be annoying. So I'd rather not participate if I can opt out. But I think @@ -649,7 +649,7 @@ transcript: sec: 1511 time: '25:11' who: Katharine -- header: 'User Profiling & Fingerprinting: browser history, apps, and re‑identification +- header: 'User Profiling & Fingerprinting: browser history, apps, and re-identification risks' - line: We talked about this website – about data engineering, right? [Katharine agrees] So maybe you can use the online advertisements as an example. I visit a website @@ -734,7 +734,7 @@ transcript: sec: 1787 time: '29:47' who: Alexey -- header: 'Privacy‑Friendly Personalization: session‑based intent and ephemeral inference' +- header: 'Privacy-Friendly Personalization: session-based intent and ephemeral inference' - line: Yeah. One of the things that I'm excited to see is that I think there's a lot more thinking through intent-based recommendation, so “What is the intent of the user? And how do we improve?” The first time I noticed it was, I think, @@ -1006,7 +1006,7 @@ transcript: sec: 2676 time: '44:36' who: Alexey -- header: 'Anonymization Pitfalls: hashing, k‑anonymity, Netflix de‑anonymization +- header: 'Anonymization Pitfalls: hashing, k-anonymity, Netflix de-anonymization lessons' - line: Those are kind of what I would call more “old school” methods of anonymization. People might have also heard about K-anonymity, which is another one of what I @@ -1296,7 +1296,7 @@ transcript: sec: 3525 time: '58:45' who: Alexey -- header: 'Deploying Localized Models: Azure localization, fine‑tuning, and ownership' +- header: 'Deploying Localized Models: Azure localization, fine-tuning, and ownership' - line: Yeah. But, again, I don't think the responsibility should ever lie on the user. You're doing exactly the way the product is designed to be used. It is not your fault that it's not thought through. How you could be like, “Oh, hey. For diff --git a/_podcast/data-quality-data-observability-data-reliability.md b/_podcast/data-quality-data-observability-data-reliability.md index 5632852c..833e3fab 100644 --- a/_podcast/data-quality-data-observability-data-reliability.md +++ b/_podcast/data-quality-data-observability-data-reliability.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/data-observability-barr-moses/id1541710331?i=1000518351217 description: Discover data observability, freshness, lineage and schema detection to prevent downtime, stop model drift and cut false positives in pipelines -intro: How do you prevent data downtime, drift, and false positives before they break analytics and models? In this episode, Barr Moses, CEO and co‑founder of Monte Carlo and former VP of Customer Operations at Gainsight, walks through a practical framework for data observability grounded in real-world incidents and DevOps principles.

Barr explains why batch data needs different approaches than app monitoring and outlines the Five Pillars of Data Observability—freshness, volume, distribution, schema, and lineage. You’ll hear a schema‑change case study, learn how silent failures and model drift occur, and how to move from monitoring to true observability for faster root cause analysis using correlation, logs, and lineage. The conversation covers accountability models (RACI), defining and automating data SLAs, operational runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria for end‑to‑end platforms versus point tools.

Listeners will get actionable guidance on reducing false positives, prioritizing pipeline fixes, implementing auto lineage, and applying anomaly detection with contextual alerts—practical steps to improve data quality, reliability, and observability across cloud‑agnostic environments +intro: How do you prevent data downtime, drift, and false positives before they break analytics and models? In this episode, Barr Moses, CEO and co-founder of Monte Carlo and former VP of Customer Operations at Gainsight, walks through a practical framework for data observability grounded in real-world incidents and DevOps principles.

Barr explains why batch data needs different approaches than app monitoring and outlines the Five Pillars of Data Observability—freshness, volume, distribution, schema, and lineage. You’ll hear a schema-change case study, learn how silent failures and model drift occur, and how to move from monitoring to true observability for faster root cause analysis using correlation, logs, and lineage. The conversation covers accountability models (RACI), defining and automating data SLAs, operational runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria for end-to-end platforms versus point tools.

Listeners will get actionable guidance on reducing false positives, prioritizing pipeline fixes, implementing auto lineage, and applying anomaly detection with contextual alerts—practical steps to improve data quality, reliability, and observability across cloud-agnostic environments topics: - MLOps - data observability diff --git a/_podcast/data-science-leadership-hiring-mlops.md b/_podcast/data-science-leadership-hiring-mlops.md index 4211485e..3b20b64b 100644 --- a/_podcast/data-science-leadership-hiring-mlops.md +++ b/_podcast/data-science-leadership-hiring-mlops.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/becoming-a-data-science-manager-mariano-semelman/id1541710331?i=1000547222296 description: Discover data science leadership, recommender systems & MLOps tactics—hire, mentor and deploy models faster with practical frameworks and tips -intro: How do you lead a data science team that prioritizes product impact while building recommender systems, real‑time bidding (RTB) solutions, and maintainable MLOps? In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 years of experience, walks through practical leadership decisions that bridge models and products.

Mariano describes his shift from software development to data science leadership, daily responsibilities (meetings, mentoring, planning), and how he structures teams of data scientists and ML engineers. Key topics include product‑first ML, search and recommender systems, advertising and RTB campaign optimization, CRISP‑DM in production, diagnosing overfitting and feature issues, and pragmatic deployment patterns like start simple, fail fast, and iterative experiments. He also shares onboarding tactics (30‑60‑90 plans), feedback techniques ("ask permission, care, offer options"), one‑on‑ones, handling departures, code reviews as a manager, delegation through senior engineers, and hiring/remediation practices.

Listen to learn concrete approaches for prioritizing modeling time, running experiments in production, improving MLOps and NLP practices, and mentoring engineers to deliver measurable product outcomes +intro: How do you lead a data science team that prioritizes product impact while building recommender systems, real-time bidding (RTB) solutions, and maintainable MLOps? In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 years of experience, walks through practical leadership decisions that bridge models and products.

Mariano describes his shift from software development to data science leadership, daily responsibilities (meetings, mentoring, planning), and how he structures teams of data scientists and ML engineers. Key topics include product-first ML, search and recommender systems, advertising and RTB campaign optimization, CRISP-DM in production, diagnosing overfitting and feature issues, and pragmatic deployment patterns like start simple, fail fast, and iterative experiments. He also shares onboarding tactics (30-60-90 plans), feedback techniques ("ask permission, care, offer options"), one-on-ones, handling departures, code reviews as a manager, delegation through senior engineers, and hiring/remediation practices.

Listen to learn concrete approaches for prioritizing modeling time, running experiments in production, improving MLOps and NLP practices, and mentoring engineers to deliver measurable product outcomes topics: - data science - machine learning @@ -62,7 +62,7 @@ quotableClips: startOffset: 651 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=651 endOffset: 772 -- name: '30‑60‑90 Plan: Onboarding, Listening, and Learning' +- name: '30-60-90 Plan: Onboarding, Listening, and Learning' startOffset: 772 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=772 endOffset: 916 @@ -78,7 +78,7 @@ quotableClips: startOffset: 1197 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1197 endOffset: 1279 -- name: 'Advertising Domain: Real‑Time Bidding and Campaign Optimization' +- name: 'Advertising Domain: Real-Time Bidding and Campaign Optimization' startOffset: 1279 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1279 endOffset: 1389 @@ -90,7 +90,7 @@ quotableClips: startOffset: 1576 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1576 endOffset: 1769 -- name: 'Product‑First Mindset: Prioritizing User Impact' +- name: 'Product-First Mindset: Prioritizing User Impact' startOffset: 1769 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1769 endOffset: 1806 @@ -102,7 +102,7 @@ quotableClips: startOffset: 2016 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2016 endOffset: 2172 -- name: CRISP‑DM Process and Deployment Realities +- name: CRISP-DM Process and Deployment Realities startOffset: 2172 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2172 endOffset: 2210 @@ -118,7 +118,7 @@ quotableClips: startOffset: 2657 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2657 endOffset: 2893 -- name: One‑on‑Ones and Creating a Safe Growth Environment +- name: One-on-Ones and Creating a Safe Growth Environment startOffset: 2893 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2893 endOffset: 3039 @@ -142,7 +142,7 @@ quotableClips: startOffset: 3697 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3697 endOffset: 3947 -- name: Key Takeaways and Follow‑Up Opportunities +- name: Key Takeaways and Follow-Up Opportunities startOffset: 3947 url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3947 endOffset: 3991 @@ -325,7 +325,7 @@ transcript: sec: 758 time: '12:38' who: Alexey -- header: '30‑60‑90 Plan: Onboarding, Listening, and Learning' +- header: '30-60-90 Plan: Onboarding, Listening, and Learning' - line: I think I got to know about it a few months in advance. Until it's announced and doesn’t actually happen, you don't know if it will happen. As soon as I knew that they wanted to put me in the position, I had one month or so to prepare myself. @@ -456,7 +456,7 @@ transcript: sec: 1274 time: '21:14' who: Alexey -- header: 'Advertising Domain: Real‑Time Bidding and Campaign Optimization' +- header: 'Advertising Domain: Real-Time Bidding and Campaign Optimization' - line: Kind of. It actually confused me a lot because I didn't know this thing beforehand, at least not in detail. That was my first “Aha!” moment, because I assumed it was the team that we were using to do publicity for OLX – to send traffic to OLX. @@ -560,7 +560,7 @@ transcript: sec: 1576 time: '26:16' who: Mariano -- header: 'Product‑First Mindset: Prioritizing User Impact' +- header: 'Product-First Mindset: Prioritizing User Impact' - line: I think one thing you didn't mention is that you also help a lot – from what I see – is connecting product people (from product management) with data scientists and helping them by becoming a translator between them. That's something that @@ -660,7 +660,7 @@ transcript: sec: 2016 time: '33:36' who: Mariano -- header: CRISP‑DM Process and Deployment Realities +- header: CRISP-DM Process and Deployment Realities - line: I like CRISP-DM, but there is no book. It's just an article in Wikipedia. I think there is a book, actually. But anyway, I like this process. It's a very old process – it’s like 20 years old or something like that. Surprisingly, it's @@ -826,7 +826,7 @@ transcript: sec: 2890 time: '48:10' who: Alexey -- header: One‑on‑Ones and Creating a Safe Growth Environment +- header: One-on-Ones and Creating a Safe Growth Environment - line: Yes, one-on-ones. For me, as I mentioned, I believe they should happen at least once a week. Of course, with every single person, maybe 15 minutes to catch up once a week, that's fine. You may feel compelled to skip it if there is no @@ -1087,7 +1087,7 @@ transcript: sec: 3944 time: '1:05:44' who: Mariano -- header: Key Takeaways and Follow‑Up Opportunities +- header: Key Takeaways and Follow-Up Opportunities - line: No, but like we actually diverged and I think it turned out to be better than what I had in mind. So yeah, thanks a lot for joining us today. Thanks a lot for sharing your story with us, for sharing your experience. Also, this 30-60-90 thing, diff --git a/_podcast/data-science-manager-vs-expert-hiring-guide.md b/_podcast/data-science-manager-vs-expert-hiring-guide.md index 3b828ba1..29f1571c 100644 --- a/_podcast/data-science-manager-vs-expert-hiring-guide.md +++ b/_podcast/data-science-manager-vs-expert-hiring-guide.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/data-science-manager-vs-data-science-expert-barbara/id1541710331?i=1000542496818 description: Learn hiring strategies for Data Science Manager vs Data Science Expert—when to hire experts, build teams, assess ML needs, and boost business impact -intro: 'When should you hire a data science manager versus a deep technical expert, and how do you decide whether machine learning is actually the right solution? In this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, and manager by passion — walks through her career from GIS → SQL → BI to leading teams, and tackles hiring strategy, role design, and practical ML use cases like mental health monitoring and demand forecasting.

We cover common pitfalls (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager skill balance between technical literacy and soft skills, and what “hands-on” really means for managers: high‑level understanding, code review, and time allocation. Learn when to hire a data science expert for complex models or domain knowledge, and when a manager-plus-generalist approach or a startup “unicorn” makes sense. Barbara also discusses team building (learning plans, pairing), project prioritization, model monitoring, feasibility checks (data quality and baselines), and measuring impact with KPIs and client discovery.

Listen to gain practical hiring criteria, role profiles, and decision frameworks for when to use machine learning and how to build teams that deliver.' +intro: 'When should you hire a data science manager versus a deep technical expert, and how do you decide whether machine learning is actually the right solution? In this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, and manager by passion — walks through her career from GIS → SQL → BI to leading teams, and tackles hiring strategy, role design, and practical ML use cases like mental health monitoring and demand forecasting.

We cover common pitfalls (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager skill balance between technical literacy and soft skills, and what “hands-on” really means for managers: high-level understanding, code review, and time allocation. Learn when to hire a data science expert for complex models or domain knowledge, and when a manager-plus-generalist approach or a startup “unicorn” makes sense. Barbara also discusses team building (learning plans, pairing), project prioritization, model monitoring, feasibility checks (data quality and baselines), and measuring impact with KPIs and client discovery.

Listen to gain practical hiring criteria, role profiles, and decision frameworks for when to use machine learning and how to build teams that deliver.' topics: - data science - machine learning diff --git a/_podcast/data-science-team-structure-and-org-design.md b/_podcast/data-science-team-structure-and-org-design.md index 916ed1fd..c0413df0 100644 --- a/_podcast/data-science-team-structure-and-org-design.md +++ b/_podcast/data-science-team-structure-and-org-design.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=F_rJ4fg5ZEA description: 'Discover how to design high-impact data science orgs: centralized vs embedded models, staffing ratios and experimentation to speed decisions and scale impact.' -intro: 'How should you structure a data science organization to maximize product impact: centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director of Data Science at Twitter who leads 70 data scientists and previously led Azure Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation patterns for designing high‑impact data science orgs.

We cover centralized vs embedded models and what “embedding” really means for reporting lines and day‑to‑day integration with feature teams; Twitter’s hybrid per‑division approach for product and ads; staffing guidance (including an engineers‑to‑data‑scientist ratio reference); and rhythms for cross‑functional planning, OKRs, and dependency management. Lisa also discusses experimentation and experiment review, defining success metrics and ship criteria, knowledge sharing practices, differences between analytics and ML‑heavy data science, and how to partner with product, engineering, design, and research.

Listen to gain actionable guidance on choosing an org model, setting staffing expectations, establishing experiment and metrics practices, and aligning data pipelines, data quality, and OKRs to drive data‑driven product decisions.' +intro: 'How should you structure a data science organization to maximize product impact: centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director of Data Science at Twitter who leads 70 data scientists and previously led Azure Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation patterns for designing high-impact data science orgs.

We cover centralized vs embedded models and what “embedding” really means for reporting lines and day-to-day integration with feature teams; Twitter’s hybrid per-division approach for product and ads; staffing guidance (including an engineers-to-data-scientist ratio reference); and rhythms for cross-functional planning, OKRs, and dependency management. Lisa also discusses experimentation and experiment review, defining success metrics and ship criteria, knowledge sharing practices, differences between analytics and ML-heavy data science, and how to partner with product, engineering, design, and research.

Listen to gain actionable guidance on choosing an org model, setting staffing expectations, establishing experiment and metrics practices, and aligning data pipelines, data quality, and OKRs to drive data-driven product decisions.' topics: - data science - data teams @@ -39,7 +39,7 @@ quotableClips: startOffset: 387 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=387 endOffset: 514 -- name: 'Embedding Explained: Reporting lines vs day‑to‑day integration with feature +- name: 'Embedding Explained: Reporting lines vs day-to-day integration with feature teams' startOffset: 514 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=514 @@ -52,15 +52,15 @@ quotableClips: startOffset: 926 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=926 endOffset: 1123 -- name: 'Team Rhythms & Planning: Cross‑functional ceremonies and dependency management' +- name: 'Team Rhythms & Planning: Cross-functional ceremonies and dependency management' startOffset: 1123 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1123 endOffset: 1318 -- name: 'Cross‑Functional Alignment: OKRs and aligning goals across levels' +- name: 'Cross-Functional Alignment: OKRs and aligning goals across levels' startOffset: 1318 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1318 endOffset: 1493 -- name: 'Twitter’s Approach: Hybrid per‑division model for product and ads' +- name: 'Twitter’s Approach: Hybrid per-division model for product and ads' startOffset: 1493 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1493 endOffset: 1548 @@ -68,7 +68,7 @@ quotableClips: startOffset: 1548 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1548 endOffset: 1765 -- name: 'Centralized Model: Knowledge sharing, consistency, and context‑building challenges' +- name: 'Centralized Model: Knowledge sharing, consistency, and context-building challenges' startOffset: 1765 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1765 endOffset: 1852 @@ -81,7 +81,7 @@ quotableClips: startOffset: 1988 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1988 endOffset: 2209 -- name: 'Staffing Guidance: Engineers‑to‑data‑scientist ratios and ML partnerships +- name: 'Staffing Guidance: Engineers-to-data-scientist ratios and ML partnerships (8:1 reference)' startOffset: 2209 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2209 @@ -91,7 +91,7 @@ quotableClips: startOffset: 2539 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2539 endOffset: 2769 -- name: 'Product Partnership: Co‑ownership with product, engineering, design, and +- name: 'Product Partnership: Co-ownership with product, engineering, design, and research' startOffset: 2769 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2769 @@ -101,7 +101,7 @@ quotableClips: startOffset: 2840 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2840 endOffset: 3044 -- name: 'Analytics vs Data Science: Analysts driving dashboards vs ML‑heavy DS work' +- name: 'Analytics vs Data Science: Analysts driving dashboards vs ML-heavy DS work' startOffset: 3044 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3044 endOffset: 3150 @@ -110,11 +110,11 @@ quotableClips: startOffset: 3150 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3150 endOffset: 3256 -- name: 'Resolving Conflicts: Data‑driven opportunity sizing for prioritization decisions' +- name: 'Resolving Conflicts: Data-driven opportunity sizing for prioritization decisions' startOffset: 3256 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3256 endOffset: 3348 -- name: 'Data‑Driven Product Innovation: Guiding roadmap decisions with trusted data' +- name: 'Data-Driven Product Innovation: Guiding roadmap decisions with trusted data' startOffset: 3348 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3348 endOffset: 3451 @@ -127,7 +127,7 @@ quotableClips: startOffset: 3578 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3578 endOffset: 3603 -- name: Episode Wrap‑Up and Closing Remarks +- name: Episode Wrap-Up and Closing Remarks startOffset: 3603 url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3603 endOffset: 3535 @@ -266,7 +266,7 @@ transcript: sec: 498 time: '8:18' who: Alexey -- header: 'Embedding Explained: Reporting lines vs day‑to‑day integration with feature +- header: 'Embedding Explained: Reporting lines vs day-to-day integration with feature teams' - line: Yeah, we can go back maybe to the Azure example. So we have a whole organization, with the VP managing the managers across the various areas – all data science @@ -458,7 +458,7 @@ transcript: sec: 1101 time: '18:21' who: Lisa -- header: 'Team Rhythms & Planning: Cross‑functional ceremonies and dependency management' +- header: 'Team Rhythms & Planning: Cross-functional ceremonies and dependency management' - line: But I guess a team has to have some sort of rhythm – some sort of ceremonies and things like this. For example, you start with planning, then you work for two weeks, then you finish with a retrospective – some sort of process, right? @@ -517,7 +517,7 @@ transcript: sec: 1239 time: '20:39' who: Lisa -- header: 'Cross‑Functional Alignment: OKRs and aligning goals across levels' +- header: 'Cross-Functional Alignment: OKRs and aligning goals across levels' - line: But on the surface, it looks a bit complicated, right? Because you have a lot of functions that are not really connected, if you think about the hierarchical structures, but they still somehow work together and move towards the same direction @@ -567,7 +567,7 @@ transcript: sec: 1443 time: '24:03' who: Lisa -- header: 'Twitter’s Approach: Hybrid per‑division model for product and ads' +- header: 'Twitter’s Approach: Hybrid per-division model for product and ads' - line: I'm taking a lot of notes because I want to come back to this and talk about that. But I also wanted to take a step back and, again, come back to this “centralized vs decentralized”. I think we've talked about what we can call “centralized,” @@ -650,7 +650,7 @@ transcript: sec: 1729 time: '28:49' who: Lisa -- header: 'Centralized Model: Knowledge sharing, consistency, and context‑building +- header: 'Centralized Model: Knowledge sharing, consistency, and context-building challenges' - line: Okay. And what are the cons of the centralized approach? What are the disadvantages? sec: 1765 @@ -793,7 +793,7 @@ transcript: sec: 2186 time: '36:26' who: Lisa -- header: 'Staffing Guidance: Engineers‑to‑data‑scientist ratios and ML partnerships +- header: 'Staffing Guidance: Engineers-to-data-scientist ratios and ML partnerships (8:1 reference)' - line: We have a question, “How many data scientists will I need? How do I estimate this before starting a project?” @@ -981,7 +981,7 @@ transcript: sec: 2755 time: '45:55' who: Lisa -- header: 'Product Partnership: Co‑ownership with product, engineering, design, and +- header: 'Product Partnership: Co-ownership with product, engineering, design, and research' - line: But you still can work from home if you want to, right? Okay. There was something else that I wanted to talk about, which is – I took a look at your LinkedIn and @@ -1054,7 +1054,7 @@ transcript: sec: 2890 time: '48:10' who: Lisa -- header: 'Analytics vs Data Science: Analysts driving dashboards vs ML‑heavy DS work' +- header: 'Analytics vs Data Science: Analysts driving dashboards vs ML-heavy DS work' - line: Do you have product analysts? Or is it mostly data scientists who do analytics? sec: 3044 time: '50:44' @@ -1121,7 +1121,7 @@ transcript: sec: 3178 time: '52:58' who: Lisa -- header: 'Resolving Conflicts: Data‑driven opportunity sizing for prioritization +- header: 'Resolving Conflicts: Data-driven opportunity sizing for prioritization decisions' - line: How often does it happen – maybe not specifically at Twitter, but just in your experience – that in this kind of setup, different functions have conflicting @@ -1158,7 +1158,7 @@ transcript: sec: 3347 time: '55:47' who: Lisa -- header: 'Data‑Driven Product Innovation: Guiding roadmap decisions with trusted +- header: 'Data-Driven Product Innovation: Guiding roadmap decisions with trusted data' - line: Okay. [chuckles] Coming back to this sentence, “partnering closely with product management, engineering, design and research,” we covered that – “to pursue data-driven @@ -1255,7 +1255,7 @@ transcript: sec: 3595 time: '59:55' who: Alexey -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: This is great, yeah. I love the conversation. Thank you for driving through all the different topics I’m exploring here. Great to chat with you, as always. sec: 3603 diff --git a/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md index ec5d1feb..c12cc513 100644 --- a/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md +++ b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=KsV_SVXlTo8 description: 'Build indie-hacking products: launch crypto alerts & generative AI apps, validate ideas, choose tech, price effectively and monetize for sustainable growth.' -intro: 'How do you build, launch, and actually monetize indie-hacker products in crypto alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux — an IBM data science manager and consultant with eight years’ experience who also ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired selfie-to-art tool) — walks through the practical steps.

We cover Pauline’s career path and an ML production case study (money‑laundering detection), then move into indie-hacking essentials: bootstrapping, splitting time between a full‑time role and side projects, and validating ideas. You’ll hear how she productized projects (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API fine‑tuning vs self‑hosted GPUs), managed operating costs, and launched via Twitter and niche listings. The conversation also tackles customer acquisition, pricing constraints, marketing and content strategy, and skills gained across GCP, data engineering, web dev, and growth.

Listen for actionable guidance on product launch, monetization, and time management for indie hackers working on crypto alerts and generative AI—concrete steps to validate, build, and grow side products without external funding.' +intro: 'How do you build, launch, and actually monetize indie-hacker products in crypto alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux — an IBM data science manager and consultant with eight years’ experience who also ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired selfie-to-art tool) — walks through the practical steps.

We cover Pauline’s career path and an ML production case study (money-laundering detection), then move into indie-hacking essentials: bootstrapping, splitting time between a full-time role and side projects, and validating ideas. You’ll hear how she productized projects (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API fine-tuning vs self-hosted GPUs), managed operating costs, and launched via Twitter and niche listings. The conversation also tackles customer acquisition, pricing constraints, marketing and content strategy, and skills gained across GCP, data engineering, web dev, and growth.

Listen for actionable guidance on product launch, monetization, and time management for indie hackers working on crypto alerts and generative AI—concrete steps to validate, build, and grow side products without external funding.' topics: - indie hacking - bootstrapping diff --git a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md index 40123c87..9fb06e8e 100644 --- a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md +++ b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=jGbfeYdlCiQ description: 'Master actionable data strategy, DataOps & GPT: learn to pitch small AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products.' -intro: How do you turn AI ambitions into measurable, deliverable data products? In this episode Boyan Angelov — author of Elements of Data Strategy and leader of data strategy at Exxeta AG — walks through practical steps to make data strategy actionable for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused plan and explains the due diligence needed to align business goals with feasible use cases.

Topics covered include use case ideation, feasibility and prioritization, managing influence cascades and scope creep, impact assessment and portfolio management, and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for data — and clarify platform, AI and BI roles and the core skills required for strategists. Boyan also shows how GPT and ChatGPT can be used as a writing co‑pilot for outlines, pitches and technical guidance, and recommends starting small with budgeted use cases plus baseline and post-implementation metrics to measure success.

Listen to get concrete guidance on pitching, measuring and operationalizing a data strategy for AI-powered products — including practical DataOps and GPT workflows you can apply right away +intro: How do you turn AI ambitions into measurable, deliverable data products? In this episode Boyan Angelov — author of Elements of Data Strategy and leader of data strategy at Exxeta AG — walks through practical steps to make data strategy actionable for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused plan and explains the due diligence needed to align business goals with feasible use cases.

Topics covered include use case ideation, feasibility and prioritization, managing influence cascades and scope creep, impact assessment and portfolio management, and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for data — and clarify platform, AI and BI roles and the core skills required for strategists. Boyan also shows how GPT and ChatGPT can be used as a writing co-pilot for outlines, pitches and technical guidance, and recommends starting small with budgeted use cases plus baseline and post-implementation metrics to measure success.

Listen to get concrete guidance on pitching, measuring and operationalizing a data strategy for AI-powered products — including practical DataOps and GPT workflows you can apply right away topics: - data strategy - dataops @@ -91,7 +91,7 @@ quotableClips: startOffset: 2491 url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2491 endOffset: 2626 -- name: 'GPT as Writing Co‑Pilot: Sidebars, Editing & Ethical Considerations' +- name: 'GPT as Writing Co-Pilot: Sidebars, Editing & Ethical Considerations' startOffset: 2626 url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2626 endOffset: 2840 @@ -936,7 +936,7 @@ transcript: sec: 2548 time: '42:28' who: Boyan -- header: 'GPT as Writing Co‑Pilot: Sidebars, Editing & Ethical Considerations' +- header: 'GPT as Writing Co-Pilot: Sidebars, Editing & Ethical Considerations' - line: Interesting. Then there's something I really wanted to talk to you about. For your book, you used GPT, and that's really nice that you explicitly acknowledge that. diff --git a/_podcast/dataops-and-gitops-best-practices-for-data-teams.md b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md index 94b606b6..44e8214d 100644 --- a/_podcast/dataops-and-gitops-best-practices-for-data-teams.md +++ b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=lem7knxqNzg description: Master DataOps, GitOps and IaC best practices for reproducibility, onboarding and production reliability — actionable Git workflows, Terraform, Docker tips -intro: How do you make data work less fragile and easier to onboard while keeping production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner from Poznań with roots in econometrics, product analytics, data engineering and ML, walks through practical DataOps and GitOps patterns for data teams. We cover platform onboarding (requesting infra vs. merge requests), Infrastructure as Code with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis dry‑run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, dependency management—and common production pitfalls like silent failures and Airflow caveats. You’ll hear about reducing onboarding friction for data scientists, the minimal operational skills every data role benefits from (Git, CLI, IAM), and platform team responsibilities for review, enablement and proactive support. If you’re focused on Infrastructure as Code, GitOps, reproducible pipelines, or practical production best practices for batch workloads and CI migrations, this episode delivers hands‑on advice, learning paths and tooling choices to make your data work faster, safer and more maintainable +intro: How do you make data work less fragile and easier to onboard while keeping production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner from Poznań with roots in econometrics, product analytics, data engineering and ML, walks through practical DataOps and GitOps patterns for data teams. We cover platform onboarding (requesting infra vs. merge requests), Infrastructure as Code with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis dry-run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, dependency management—and common production pitfalls like silent failures and Airflow caveats. You’ll hear about reducing onboarding friction for data scientists, the minimal operational skills every data role benefits from (Git, CLI, IAM), and platform team responsibilities for review, enablement and proactive support. If you’re focused on Infrastructure as Code, GitOps, reproducible pipelines, or practical production best practices for batch workloads and CI migrations, this episode delivers hands-on advice, learning paths and tooling choices to make your data work faster, safer and more maintainable topics: - DataOps - GitOps @@ -43,7 +43,7 @@ quotableClips: startOffset: 271 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=271 endOffset: 320 -- name: 'ML Education: Multi‑Dimensional Analysis to Machine Learning' +- name: 'ML Education: Multi-Dimensional Analysis to Machine Learning' startOffset: 320 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=320 endOffset: 394 @@ -63,7 +63,7 @@ quotableClips: startOffset: 787 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=787 endOffset: 852 -- name: 'Motivation Shift: From Model‑Centric to Data‑Centric Work' +- name: 'Motivation Shift: From Model-Centric to Data-Centric Work' startOffset: 852 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=852 endOffset: 1139 @@ -91,7 +91,7 @@ quotableClips: startOffset: 1654 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1654 endOffset: 1774 -- name: 'Learning Path: Narrow Scope, Hands‑On Mentorship, Roadmap Advice' +- name: 'Learning Path: Narrow Scope, Hands-On Mentorship, Roadmap Advice' startOffset: 1774 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1774 endOffset: 2155 @@ -107,7 +107,7 @@ quotableClips: startOffset: 2444 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2444 endOffset: 2512 -- name: 'Proactive Support: Monitoring, Onboarding, and Cross‑Team Education' +- name: 'Proactive Support: Monitoring, Onboarding, and Cross-Team Education' startOffset: 2512 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2512 endOffset: 2663 @@ -119,7 +119,7 @@ quotableClips: startOffset: 2875 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2875 endOffset: 3277 -- name: 'Distinction from Management: Cross‑Team Enablement vs Team Leads' +- name: 'Distinction from Management: Cross-Team Enablement vs Team Leads' startOffset: 3277 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3277 endOffset: 3404 @@ -127,7 +127,7 @@ quotableClips: startOffset: 3404 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3404 endOffset: 3506 -- name: 'Company‑Scale Migration: Jenkins → GitLab CI and Broad Collaboration' +- name: 'Company-Scale Migration: Jenkins → GitLab CI and Broad Collaboration' startOffset: 3506 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3506 endOffset: 3687 @@ -135,7 +135,7 @@ quotableClips: startOffset: 3687 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3687 endOffset: 3748 -- name: 'Confidence in Data: Pragmatic Edge‑Case Checks & Airflow Caveats' +- name: 'Confidence in Data: Pragmatic Edge-Case Checks & Airflow Caveats' startOffset: 3748 url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3748 endOffset: 3941 @@ -207,7 +207,7 @@ transcript: sec: 303 time: '5:03' who: Alexey -- header: 'ML Education: Multi‑Dimensional Analysis to Machine Learning' +- header: 'ML Education: Multi-Dimensional Analysis to Machine Learning' - line: Sure. I mentioned “by accident” because I haven't actually searched for it specifically. Yeah, I definitely wanted to be into machine learning and stuff. I just received a link that said, “Hey, some company is hiring for a machine learning @@ -333,7 +333,7 @@ transcript: sec: 848 time: '14:08' who: Tomasz -- header: 'Motivation Shift: From Model‑Centric to Data‑Centric Work' +- header: 'Motivation Shift: From Model-Centric to Data-Centric Work' - line: So when did you realize that you actually enjoy doing this stuff more than your work as a data scientist? How did it happen? sec: 852 @@ -555,7 +555,7 @@ transcript: sec: 1743 time: '29:03' who: Tomasz -- header: 'Learning Path: Narrow Scope, Hands‑On Mentorship, Roadmap Advice' +- header: 'Learning Path: Narrow Scope, Hands-On Mentorship, Roadmap Advice' - line: Okay. One of the questions I wanted to ask you is – how did you actually learn this thing? How did you become a DataOps? But I think from what I understood is, you just simply had to do this and then you had a Zoom call with some sort of @@ -735,7 +735,7 @@ transcript: sec: 2505 time: '41:45' who: Alexey -- header: 'Proactive Support: Monitoring, Onboarding, and Cross‑Team Education' +- header: 'Proactive Support: Monitoring, Onboarding, and Cross-Team Education' - line: Exactly. Most often, honestly, live coding, designing some solutions. If you think about which domains DataOps touches, it's essentially past – meaning absorbing the technical debt. The present – meaning handling the users’ requests, like daily @@ -931,7 +931,7 @@ transcript: sec: 3269 time: '54:29' who: Tomasz -- header: 'Distinction from Management: Cross‑Team Enablement vs Team Leads' +- header: 'Distinction from Management: Cross-Team Enablement vs Team Leads' - line: While you were away, I was trying to keep people on the call entertained. One Adonis mentioned is that what we talked about largely sounded like a data management role – all these Zoom calls, all this support in Slack, all this trying @@ -987,7 +987,7 @@ transcript: sec: 3496 time: '58:16' who: Tomasz -- header: 'Company‑Scale Migration: Jenkins → GitLab CI and Broad Collaboration' +- header: 'Company-Scale Migration: Jenkins → GitLab CI and Broad Collaboration' - line: Okay, we have a few questions. One of the questions is, “What was your most interesting project and why?” sec: 3506 @@ -1046,7 +1046,7 @@ transcript: sec: 3747 time: '1:02:27' who: Tomasz -- header: 'Confidence in Data: Pragmatic Edge‑Case Checks & Airflow Caveats' +- header: 'Confidence in Data: Pragmatic Edge-Case Checks & Airflow Caveats' - line: Okay. Last question for today. At the beginning, you told us a story when you worked in analytics and somebody from management asked you how confident you were in the results. So how do you usually answer this question? diff --git a/_podcast/dataops-for-data-engineering.md b/_podcast/dataops-for-data-engineering.md index 21794875..b29e1ef8 100644 --- a/_podcast/dataops-for-data-engineering.md +++ b/_podcast/dataops-for-data-engineering.md @@ -22,12 +22,12 @@ intro: How do you transform fragile data pipelines and unreliable ML deployments CEO of DataKitchen and co-author of the DataOps Cookbook and DataOps Manifesto, walks through practical DataOps for data engineering—drawing on 25+ years across research, software engineering, and analytics.

We trace his career from - pre‑cloud SQL Server scaling challenges to early DevOps lessons, then dig into what + pre-cloud SQL Server scaling challenges to early DevOps lessons, then dig into what DataOps means for teams facing burnout, deployment fear, and inconsistent processes. Key topics include automation, observability, CI/CD pipelines, regression tests - and test data for analytics, model reliability and on‑call readiness, end‑to‑end + and test data for analytics, model reliability and on-call readiness, end-to-end deployment automation, data versioning, and the differences between containers and - serverless. The episode also clarifies MLOps and LLM buzzwords, explores day‑one/day‑two/day‑three + serverless. The episode also clarifies MLOps and LLM buzzwords, explores day-one/day-two/day-three operational lifecycle practices, and outlines concrete steps to reduce rework and cycle time.

If you’re a data engineer, data scientist, or engineering leader looking to improve analytics delivery, this conversation offers actionable guidance @@ -76,7 +76,7 @@ quotableClips: startOffset: 1436 url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1436 endOffset: 1573 -- name: Model Reliability and On‑call Readiness for Data Science +- name: Model Reliability and On-call Readiness for Data Science startOffset: 1573 url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1573 endOffset: 1855 @@ -314,7 +314,7 @@ transcript: sec: 1464 time: '24:24' who: Christopher -- header: Model Reliability and On‑call Readiness for Data Science +- header: Model Reliability and On-call Readiness for Data Science - line: Let's take a data scientist as an example. They pull data, do some transformations, and build a model. Day one is about getting that initial version ready. What happens on day two? @@ -475,8 +475,8 @@ context: 'DataOps is the episode’s unifying idea: treating data and ML work as production-ready products by applying software best practices—automation, CI/CD, testing and test data, immutable versioning, and observability—plus cultural change and leadership to remove fear, reduce rework and burnout, and shorten cycle time. - The through-line argues that operationalizing the full lifecycle (day‑one provisioning - through day‑two reliability and day‑three evolution) turns pockets of heroic, ad‑hoc + The through-line argues that operationalizing the full lifecycle (day-one provisioning + through day-two reliability and day-three evolution) turns pockets of heroic, ad-hoc data work into consistent, reliable delivery that enables safe, scalable use of AI and analytics.' --- diff --git a/_podcast/dataops-principles-and-scalable-data-platforms.md b/_podcast/dataops-principles-and-scalable-data-platforms.md index 5d9a9a94..58923da2 100644 --- a/_podcast/dataops-principles-and-scalable-data-platforms.md +++ b/_podcast/dataops-principles-and-scalable-data-platforms.md @@ -1,5 +1,5 @@ --- -title: 'DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self‑Service Lakehouse & Reproducibility' +title: 'DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self-Service Lakehouse & Reproducibility' short: DataOps 101 season: 2 episode: 11 @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/dataops-101-lars-albertsson/id1541710331?i=1000514542438 description: Discover DataOps strategies, immutable pipelines & a self-service lakehouse to boost reproducibility, scale data platforms, enable analysts and speed delivery -intro: How do you scale a data platform that supports self‑service analytics while keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, founder of Scling and former Google, Spotify and Schibsted engineer, walks through pragmatic DataOps principles for building scalable data platforms.

We dig into building self‑service at Spotify, orchestration with Luigi, and the core platform components—storage, compute and workflow engines—plus compute choices like Spark, Flink, containers and managed services. Lars explains immutable, functional pipeline design to solve reproducibility problems, contrasts data lakes and warehouses (raw dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, CDC and database versioning strategies. He also explores batch vs streaming trade‑offs, micro‑batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, and risks around data mesh and decentralization.

Listeners will come away with concrete architectural trade‑offs, patterns for immutable pipelines and self‑service lakehouse design, and recommended readings from the Scling list to deepen expertise in DataOps, lineage, versioning and practical data engineering +intro: How do you scale a data platform that supports self-service analytics while keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, founder of Scling and former Google, Spotify and Schibsted engineer, walks through pragmatic DataOps principles for building scalable data platforms.

We dig into building self-service at Spotify, orchestration with Luigi, and the core platform components—storage, compute and workflow engines—plus compute choices like Spark, Flink, containers and managed services. Lars explains immutable, functional pipeline design to solve reproducibility problems, contrasts data lakes and warehouses (raw dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, CDC and database versioning strategies. He also explores batch vs streaming trade-offs, micro-batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, and risks around data mesh and decentralization.

Listeners will come away with concrete architectural trade-offs, patterns for immutable pipelines and self-service lakehouse design, and recommended readings from the Scling list to deepen expertise in DataOps, lineage, versioning and practical data engineering topics: - DataOps - date engineering @@ -34,7 +34,7 @@ quotableClips: startOffset: 218 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=218 endOffset: 472 -- name: 'Scaling Data Teams: Building Self‑Service at Spotify' +- name: 'Scaling Data Teams: Building Self-Service at Spotify' startOffset: 472 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=472 endOffset: 648 @@ -62,7 +62,7 @@ quotableClips: startOffset: 1409 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1409 endOffset: 1702 -- name: 'Ingress & Egress: Offline Processing and Self‑Service SQL' +- name: 'Ingress & Egress: Offline Processing and Self-Service SQL' startOffset: 1702 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1702 endOffset: 1834 @@ -74,7 +74,7 @@ quotableClips: startOffset: 1878 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1878 endOffset: 2157 -- name: 'Cloud Trade‑offs: Prepackaged Platforms vs DIY Assembly' +- name: 'Cloud Trade-offs: Prepackaged Platforms vs DIY Assembly' startOffset: 2157 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2157 endOffset: 2397 @@ -86,19 +86,19 @@ quotableClips: startOffset: 2513 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2513 endOffset: 2711 -- name: 'Micro‑batching vs Streaming: Dependency Management & Predictability' +- name: 'Micro-batching vs Streaming: Dependency Management & Predictability' startOffset: 2711 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2711 endOffset: 2812 -- name: 'DataOps Maturity: Test‑Certified Practices, Quality & Schema Automation' +- name: 'DataOps Maturity: Test-Certified Practices, Quality & Schema Automation' startOffset: 2812 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2812 endOffset: 3013 -- name: 'Enabling Self‑Service Analytics: Embedding Engineers with Analysts' +- name: 'Enabling Self-Service Analytics: Embedding Engineers with Analysts' startOffset: 3013 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3013 endOffset: 3211 -- name: 'MLOps vs DataOps: Shared Principles and ML‑Specific Requirements' +- name: 'MLOps vs DataOps: Shared Principles and ML-Specific Requirements' startOffset: 3211 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3211 endOffset: 3466 @@ -110,7 +110,7 @@ quotableClips: startOffset: 3782 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3782 endOffset: 3858 -- name: 'Lineage & Versioning: Code‑Defined Pipelines vs Catalog Tools' +- name: 'Lineage & Versioning: Code-Defined Pipelines vs Catalog Tools' startOffset: 3858 url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3858 endOffset: 3961 @@ -202,7 +202,7 @@ transcript: sec: 468 time: '7:48' who: Lars -- header: 'Scaling Data Teams: Building Self‑Service at Spotify' +- header: 'Scaling Data Teams: Building Self-Service at Spotify' - line: You said, you started with DataOps, even before it became a thing — in 2013. How was it called there? How did you come up with this? sec: 472 @@ -460,7 +460,7 @@ transcript: sec: 1586 time: '26:26' who: Lars -- header: 'Ingress & Egress: Offline Processing and Self‑Service SQL' +- header: 'Ingress & Egress: Offline Processing and Self-Service SQL' - line: 'At Spotify, we had songs that have been played and then we joined with the user so we know what product they were or what country they''re in. Then these pipelines fan out, these popular data sets are used for many purposes: for reporting @@ -526,7 +526,7 @@ transcript: sec: 1878 time: '31:18' who: Lars -- header: 'Cloud Trade‑offs: Prepackaged Platforms vs DIY Assembly' +- header: 'Cloud Trade-offs: Prepackaged Platforms vs DIY Assembly' - line: Then you need compute. You need some way to perform these transformations. There are scalable things like Spark and Flink. For most companies, horizontal scalability is actually not necessary. You can get 12 terabyte memory machines @@ -661,7 +661,7 @@ transcript: sec: 2549 time: '42:29' who: Lars -- header: 'Micro‑batching vs Streaming: Dependency Management & Predictability' +- header: 'Micro-batching vs Streaming: Dependency Management & Predictability' - line: 'Then you have batch, where things can be really slow, like reporting, or you''re making analytics or business insights. You can wait for an hour and that''s fine. Then you have streaming. It takes care of the window in-between. Then the @@ -704,7 +704,7 @@ transcript: sec: 2719 time: '45:19' who: Lars -- header: 'DataOps Maturity: Test‑Certified Practices, Quality & Schema Automation' +- header: 'DataOps Maturity: Test-Certified Practices, Quality & Schema Automation' - line: Makes sense. Thank you. I also wanted to talk about maturity levels, and you briefly touched on them. What are the maturity levels of an organization? When an organization is ready for DataOps? And what are the different levels of readiness? @@ -719,7 +719,7 @@ transcript: sec: 2833 time: '47:13' who: Lars -- header: 'Enabling Self‑Service Analytics: Embedding Engineers with Analysts' +- header: 'Enabling Self-Service Analytics: Embedding Engineers with Analysts' - line: Regarding the maturity levels, I don't have a super great definition of maturity levels. There was an interesting development at Spotify. When I was at Google, if we traced back to that time, we had a maturity ladder in terms of DevOps — @@ -784,7 +784,7 @@ transcript: sec: 3155 time: '52:35' who: Lars -- header: 'MLOps vs DataOps: Shared Principles and ML‑Specific Requirements' +- header: 'MLOps vs DataOps: Shared Principles and ML-Specific Requirements' - line: This reminds me that at the beginning of our chat, we wanted to ask you about different “something-Ops”. We already talked about the difference between DevOps and DataOps. Correct me if I'm wrong, but in case of DataOps, you have the same @@ -931,7 +931,7 @@ transcript: sec: 3856 time: '1:04:16' who: Lars -- header: 'Lineage & Versioning: Code‑Defined Pipelines vs Catalog Tools' +- header: 'Lineage & Versioning: Code-Defined Pipelines vs Catalog Tools' - line: How do you keep track of all the transformations that have been undertaken between each newly created data set within the data platform? sec: 3858 diff --git a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md index 10a12cab..d5d2e79b 100644 --- a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md +++ b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md @@ -48,7 +48,7 @@ quotableClips: startOffset: 77 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=77 endOffset: 251 -- name: 'Transition: Full‑time on DataTalks.Club; engineering-heavy roles' +- name: 'Transition: Full-time on DataTalks.Club; engineering-heavy roles' startOffset: 251 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=251 endOffset: 355 @@ -64,7 +64,7 @@ quotableClips: startOffset: 442 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=442 endOffset: 670 -- name: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take‑home Tests' +- name: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take-home Tests' startOffset: 670 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=670 endOffset: 896 @@ -84,7 +84,7 @@ quotableClips: startOffset: 1895 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1895 endOffset: 2026 -- name: 'Course Model: Creating Zoomcamps Inspired by Community‑Driven Courses' +- name: 'Course Model: Creating Zoomcamps Inspired by Community-Driven Courses' startOffset: 2026 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2026 endOffset: 2131 @@ -100,7 +100,7 @@ quotableClips: startOffset: 2351 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2351 endOffset: 2572 -- name: 'Content Strategy: Choosing Guests and Avoiding Hype‑Chasing' +- name: 'Content Strategy: Choosing Guests and Avoiding Hype-Chasing' startOffset: 2572 url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2572 endOffset: 2748 @@ -212,7 +212,7 @@ transcript: sec: 250 time: '4:10' who: Johanna -- header: 'Transition: Full‑time on DataTalks.Club; engineering-heavy roles' +- header: 'Transition: Full-time on DataTalks.Club; engineering-heavy roles' - line: Yeah. I was mostly doing the engineering stuff there, even though my title was “data scientist”. It's a fun fact. Now, since April, I'm fully focused on DataTalks.Club. @@ -333,7 +333,7 @@ transcript: sec: 605 time: '10:05' who: Johanna -- header: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take‑home Tests' +- header: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take-home Tests' - line: Let's actually move to the next question, which kind of touches on what we've just discussed. “How do you think the other jobs will change as different GPT-like services come into play and extend the skills of data professionals?” @@ -681,7 +681,7 @@ transcript: sec: 1981 time: '33:01' who: Alexey -- header: 'Course Model: Creating Zoomcamps Inspired by Community‑Driven Courses' +- header: 'Course Model: Creating Zoomcamps Inspired by Community-Driven Courses' - line: Yeah, yeah. sec: 2026 time: '33:46' @@ -839,7 +839,7 @@ transcript: sec: 2525 time: '42:05' who: Johanna -- header: 'Content Strategy: Choosing Guests and Avoiding Hype‑Chasing' +- header: 'Content Strategy: Choosing Guests and Avoiding Hype-Chasing' - line: What topics or trends in the data world are you most excited about exploring in upcoming club events or interviews? sec: 2572 diff --git a/_podcast/datatalksclub-scaling-and-free-courses.md b/_podcast/datatalksclub-scaling-and-free-courses.md index 9c1ece63..a0eeea96 100644 --- a/_podcast/datatalksclub-scaling-and-free-courses.md +++ b/_podcast/datatalksclub-scaling-and-free-courses.md @@ -1201,10 +1201,10 @@ context: 'Context: Born during COVID as a volunteer meetup, DataTalks.Club scale intro: How do you scale a volunteer-run learning community into a sustainable platform offering free data engineering, MLOps, and LLM courses? In this episode Alexey Grigorev, founder of DataTalks.Club, walks through the origin story of the project, the leap - to running it full‑time, and the practical tradeoffs of building free data engineering + to running it full-time, and the practical tradeoffs of building free data engineering courses at scale.

Alexey’s background as the founder guides discussions on course portfolio decisions (Machine Learning, Data Engineering, MLOps, LLMs, - Stock Analytics), organic growth strategies like Zoomcamp word‑of‑mouth, and technical + Stock Analytics), organic growth strategies like Zoomcamp word-of-mouth, and technical choices—building the course platform in Django. We cover community safety and moderation, revenue volatility from sponsorships, tax and cashflow considerations in Germany, and how staying technical through pet projects and LLM experiments informed their @@ -1212,5 +1212,5 @@ intro: How do you scale a volunteer-run learning community into a sustainable pl concrete takeaways on scaling online education, community-driven learning, course product work, and practical ways to help—mentoring, guesting, or joining projects and events. Useful for educators, course builders, and data practitioners wondering - how to create and sustain free, high‑quality data science and MLOps training. + how to create and sustain free, high-quality data science and MLOps training. --- diff --git a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md index 14926211..86aaf2ee 100644 --- a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md +++ b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=6dn6uZFkk04 description: 'Discover LLM deployment tactics: fine-tuning, retrieval and open-source vs API tradeoffs to cut latency, control costs, and ground production models.' -intro: 'How do you take large language models from experiment to reliable production—balancing fine-tuning, retrieval strategies, and the tradeoffs between open‑source models and API services? In this episode, Meryem Arik, a recovering physicist and co‑founder of TitanML, walks through practical choices for LLM deployment based on her pivot from computer vision to building tools that make models smaller, cheaper, and easier to run in production.

We cover model fundamentals and selection (classification vs generative tasks), open‑source model options like LLaMA, FLAN‑T5, Falcon and MPT, and the operational realities of serving: model size, compression, inference optimization, latency and cost tradeoffs. Meryem explains when to prototype with GPT‑3.5/4 APIs versus self‑hosting, the risks of API model drift, and why fine‑tuning or retrieval‑augmented generation often beats continuous retraining. You’ll also get a clear breakdown of retrieval patterns, vector databases for semantic search, dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing architectures, reducing costs, and grounding answers reliably with retrieval.' +intro: 'How do you take large language models from experiment to reliable production—balancing fine-tuning, retrieval strategies, and the tradeoffs between open-source models and API services? In this episode, Meryem Arik, a recovering physicist and co-founder of TitanML, walks through practical choices for LLM deployment based on her pivot from computer vision to building tools that make models smaller, cheaper, and easier to run in production.

We cover model fundamentals and selection (classification vs generative tasks), open-source model options like LLaMA, FLAN-T5, Falcon and MPT, and the operational realities of serving: model size, compression, inference optimization, latency and cost tradeoffs. Meryem explains when to prototype with GPT-3.5/4 APIs versus self-hosting, the risks of API model drift, and why fine-tuning or retrieval-augmented generation often beats continuous retraining. You’ll also get a clear breakdown of retrieval patterns, vector databases for semantic search, dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing architectures, reducing costs, and grounding answers reliably with retrieval.' dateadded: 2023-07-29 duration: PT00H59M31S @@ -42,7 +42,7 @@ quotableClips: startOffset: 289 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=289 endOffset: 402 -- name: 'Early LLM Interest: customer-driven pivot and GPT‑3 experience' +- name: 'Early LLM Interest: customer-driven pivot and GPT-3 experience' startOffset: 402 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=402 endOffset: 557 @@ -50,7 +50,7 @@ quotableClips: startOffset: 557 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=557 endOffset: 624 -- name: 'LLM Fundamentals: generative vs. non‑generative models and transformers' +- name: 'LLM Fundamentals: generative vs. non-generative models and transformers' startOffset: 624 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=624 endOffset: 704 @@ -58,7 +58,7 @@ quotableClips: startOffset: 704 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=704 endOffset: 825 -- name: 'Open‑source Model Landscape: LLaMA, FLAN‑T5, Falcon, MPT' +- name: 'Open-source Model Landscape: LLaMA, FLAN-T5, Falcon, MPT' startOffset: 825 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=825 endOffset: 885 @@ -66,7 +66,7 @@ quotableClips: startOffset: 885 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=885 endOffset: 1008 -- name: 'Open‑source vs API Models: control, privacy, and fine‑tuning benefits' +- name: 'Open-source vs API Models: control, privacy, and fine-tuning benefits' startOffset: 1008 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1008 endOffset: 1126 @@ -82,11 +82,11 @@ quotableClips: startOffset: 1526 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1526 endOffset: 1590 -- name: 'Fine‑tuning Purpose: specialization, domain adaptation, and tone' +- name: 'Fine-tuning Purpose: specialization, domain adaptation, and tone' startOffset: 1590 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1590 endOffset: 1898 -- name: 'Fine‑tuning Generative Models: data formats and end‑task considerations' +- name: 'Fine-tuning Generative Models: data formats and end-task considerations' startOffset: 1898 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1898 endOffset: 2038 @@ -98,7 +98,7 @@ quotableClips: startOffset: 2446 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2446 endOffset: 2522 -- name: 'Grounding Answers: indexing docs and retrieval‑augmented responses' +- name: 'Grounding Answers: indexing docs and retrieval-augmented responses' startOffset: 2522 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2522 endOffset: 2802 @@ -110,19 +110,19 @@ quotableClips: startOffset: 2881 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2881 endOffset: 2984 -- name: 'Prototyping vs Production: when to use GPT‑3.5/4 APIs vs open‑source LLMs' +- name: 'Prototyping vs Production: when to use GPT-3.5/4 APIs vs open-source LLMs' startOffset: 2984 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2984 endOffset: 3095 -- name: 'Latency & Cost Tradeoffs: self‑hosting performance and hardware choices' +- name: 'Latency & Cost Tradeoffs: self-hosting performance and hardware choices' startOffset: 3095 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3095 endOffset: 3214 -- name: 'Data Quality Metrics: gold‑standard examples and output‑driven evaluation' +- name: 'Data Quality Metrics: gold-standard examples and output-driven evaluation' startOffset: 3214 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3214 endOffset: 3332 -- name: 'Dataset Expansion: LLM‑assisted augmentation for training data' +- name: 'Dataset Expansion: LLM-assisted augmentation for training data' startOffset: 3332 url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3332 endOffset: 3399 @@ -274,7 +274,7 @@ transcript: sec: 386 time: '6:26' who: Meryem -- header: 'Early LLM Interest: customer-driven pivot and GPT‑3 experience' +- header: 'Early LLM Interest: customer-driven pivot and GPT-3 experience' - line: I know we're kind of late to the party in terms of speaking about the LLM because, as I mentioned at the beginning, this is actually our first event ever about LLMs (where we explicitly talk about them). LLMs are large language models, @@ -342,7 +342,7 @@ transcript: sec: 619 time: '10:19' who: Alexey -- header: 'LLM Fundamentals: generative vs. non‑generative models and transformers' +- header: 'LLM Fundamentals: generative vs. non-generative models and transformers' - line: Yeah. LLMs are large language models. I would kind of distinguish large language models into two things – quite often we conflate these ideas. Large language models, as we typically talk about them, are generative models. What these are, are models @@ -412,7 +412,7 @@ transcript: sec: 808 time: '13:28' who: Alexey -- header: 'Open‑source Model Landscape: LLaMA, FLAN‑T5, Falcon, MPT' +- header: 'Open-source Model Landscape: LLaMA, FLAN-T5, Falcon, MPT' - line: Yeah. There's a whole range and ecosystem of language models and they're good at different things. For example, there's the Google FLAN-T5 range, which is able to generate text. But what that's particularly good at is translation and summarization. @@ -485,7 +485,7 @@ transcript: sec: 979 time: '16:19' who: Alexey -- header: 'Open‑source vs API Models: control, privacy, and fine‑tuning benefits' +- header: 'Open-source vs API Models: control, privacy, and fine-tuning benefits' - line: Sure. There are a whole bunch of open source language models, and they're getting better and better month by month. I think only two days ago, Meta released LLaMA 2, which is a massively improved version from LLaMA 1, trained on 40% more @@ -658,7 +658,7 @@ transcript: sec: 1563 time: '26:03' who: Alexey -- header: 'Fine‑tuning Purpose: specialization, domain adaptation, and tone' +- header: 'Fine-tuning Purpose: specialization, domain adaptation, and tone' - line: Sure. When you take a model off the shelf, what it has and what it's very, very good at, is general language knowledge and understanding. Your model will speak English or speak whatever language it was trained in, and it'll have reasonably @@ -760,7 +760,7 @@ transcript: sec: 1877 time: '31:17' who: Alexey -- header: 'Fine‑tuning Generative Models: data formats and end‑task considerations' +- header: 'Fine-tuning Generative Models: data formats and end-task considerations' - line: Yeah. This kind of changes depending on the end task that you want it to get it to do. But in cases that we've done, you can literally just have strings of documents, you can just have raw text that you can fine-tune on. So you don't @@ -949,7 +949,7 @@ transcript: sec: 2521 time: '42:01' who: Alexey -- header: 'Grounding Answers: indexing docs and retrieval‑augmented responses' +- header: 'Grounding Answers: indexing docs and retrieval-augmented responses' - line: Yeah, exactly, a huge knowledge base. And I think most companies have those kinds of knowledge bases, whether in Confluence, or Notion, etc. What you can do is embed all of that documentation and reinvent it every single time it changes @@ -1082,7 +1082,7 @@ transcript: sec: 2970 time: '49:30' who: Meryem -- header: 'Prototyping vs Production: when to use GPT‑3.5/4 APIs vs open‑source LLMs' +- header: 'Prototyping vs Production: when to use GPT-3.5/4 APIs vs open-source LLMs' - line: For this task, do you know if we should go with an open source LLM or go with GPT-3.5 or 4? Are there any pros and cons? sec: 2984 @@ -1113,7 +1113,7 @@ transcript: sec: 3074 time: '51:14' who: Alexey -- header: 'Latency & Cost Tradeoffs: self‑hosting performance and hardware choices' +- header: 'Latency & Cost Tradeoffs: self-hosting performance and hardware choices' - line: I mean, they are really fast. They're really, really fast, because they're hosted on very expensive hardware. If you were to host your model on the same hardware, using good techniques – using something like the Titan Takeoff server @@ -1147,7 +1147,7 @@ transcript: sec: 3177 time: '52:57' who: Meryem -- header: 'Data Quality Metrics: gold‑standard examples and output‑driven evaluation' +- header: 'Data Quality Metrics: gold-standard examples and output-driven evaluation' - line: We have a few interesting questions from Tara. The first question he's asking is, “How can you measure if the data you feed into an LLM is good enough?” Do you even think about these things or are you just saying, “This is the data I @@ -1178,7 +1178,7 @@ transcript: sec: 3312 time: '55:12' who: Alexey -- header: 'Dataset Expansion: LLM‑assisted augmentation for training data' +- header: 'Dataset Expansion: LLM-assisted augmentation for training data' - line: Yeah, it's super similar. A very basic example is – if I have a dataset where one example is “the pig is pink,” I might get my LLM to say “the cat is black”. It just kind of switches words out, but it's semantically similar. Another way diff --git a/_podcast/developer-personal-brand-learn-in-public.md b/_podcast/developer-personal-brand-learn-in-public.md index bd137f16..50a723e1 100644 --- a/_podcast/developer-personal-brand-learn-in-public.md +++ b/_podcast/developer-personal-brand-learn-in-public.md @@ -39,7 +39,7 @@ quotableClips: startOffset: 144 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=144 endOffset: 376 -- name: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' +- name: 'Why Self-Marketing Matters: recognition, promotions, opportunities' startOffset: 376 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=376 endOffset: 513 @@ -51,7 +51,7 @@ quotableClips: startOffset: 626 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=626 endOffset: 756 -- name: 'Personal Brand for Non‑star Developers: find distinctiveness' +- name: 'Personal Brand for Non-star Developers: find distinctiveness' startOffset: 756 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=756 endOffset: 787 @@ -107,7 +107,7 @@ quotableClips: startOffset: 2540 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2540 endOffset: 2597 -- name: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' +- name: 'Work-safe Content Ideas: war stories, industry problems, and summaries' startOffset: 2597 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2597 endOffset: 2743 @@ -115,7 +115,7 @@ quotableClips: startOffset: 2743 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2743 endOffset: 2834 -- name: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' +- name: 'Open Knowledge Projects: collaborative docs and cheat-sheets as visibility' startOffset: 2834 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2834 endOffset: 3070 @@ -123,7 +123,7 @@ quotableClips: startOffset: 3070 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3070 endOffset: 3256 -- name: 'Signature Initiative: company‑wide projects that build influence' +- name: 'Signature Initiative: company-wide projects that build influence' startOffset: 3256 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3256 endOffset: 3429 @@ -139,7 +139,7 @@ quotableClips: startOffset: 3717 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3717 endOffset: 3791 -- name: 'Final Takeaway: non‑technical skills dominate engineering ladders' +- name: 'Final Takeaway: non-technical skills dominate engineering ladders' startOffset: 3791 url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3791 endOffset: 3761 @@ -266,8 +266,8 @@ transcript: sec: 358 time: '5:58' who: Swyx -- header: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' -- header: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' +- header: 'Why Self-Marketing Matters: recognition, promotions, opportunities' +- header: 'Why Self-Marketing Matters: recognition, promotions, opportunities' - line: About marketing ourselves. You are one of the people I follow on Twitter for stuff like marketing ourselves and learning in public. So I wanted to ask you, why should we market ourselves? Why is it important for our careers? You have @@ -405,12 +405,12 @@ transcript: stuff and cover your bases. So, there are just so many details here, like we can go into any one of them. who: Swyx -- header: 'Personal Brand for Non‑star Developers: find distinctiveness' +- header: 'Personal Brand for Non-star Developers: find distinctiveness' - line: Let’s say I already have some skills. I’m not the top developer but I know how to code. Let’s say I know Python or JavaScript, or I’m good at data science. How do I find my personal brand as an average data scientist or as an average software engineer? -- header: 'Personal Brand for Non‑star Developers: find distinctiveness' +- header: 'Personal Brand for Non-star Developers: find distinctiveness' - line: Let’s say I already have some skills. I’m not the top developer but I know how to code. Let’s say I know Python or JavaScript, or I’m good at data science. How do I find my personal brand as an average data scientist or as an average @@ -1168,11 +1168,11 @@ transcript: it is just more about, do they like you or do they trust you that are it and everything else can be taught. who: Swyx -- header: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' +- header: 'Work-safe Content Ideas: war stories, industry problems, and summaries' - line: I imagine it takes a lot of time to build a spotify clone. Let’s say I work already. I am already experienced. I want to find a new job in the same domain. In this case what do I share? What do I write about? Stuff that I do at work? -- header: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' +- header: 'Work-safe Content Ideas: war stories, industry problems, and summaries' - line: I imagine it takes a lot of time to build a spotify clone. Let’s say I work already. I am already experienced. I want to find a new job in the same domain. In this case what do I share? What do I write about? Stuff that I do at work? @@ -1242,12 +1242,12 @@ transcript: sec: 2801 time: '46:41' who: Alexey -- header: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' +- header: 'Open Knowledge Projects: collaborative docs and cheat-sheets as visibility' - line: Yes. It’s nice, especially if you have your own highlights from a book or a blog post. Then you can go on Google and type your domain name and then that search word. You can use Google as your own personal search engine for your notes. It’s really helpful when you are trying to look up something to reference people. -- header: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' +- header: 'Open Knowledge Projects: collaborative docs and cheat-sheets as visibility' - line: Yes. It’s nice, especially if you have your own highlights from a book or a blog post. Then you can go on Google and type your domain name and then that search word. You can use Google as your own personal search engine for your notes. @@ -1393,12 +1393,12 @@ transcript: sec: 3240 time: '54:00' who: Alexey -- header: 'Signature Initiative: company‑wide projects that build influence' +- header: 'Signature Initiative: company-wide projects that build influence' - line: There is more than that. You can also do a signature initiative. This is a term that I picked up at AWS. It’s a big project that you hit on your own. That’s what you are known for. It gives you a chance to win outside of your team, to show individual accomplishment and leadership. -- header: 'Signature Initiative: company‑wide projects that build influence' +- header: 'Signature Initiative: company-wide projects that build influence' - line: There is more than that. You can also do a signature initiative. This is a term that I picked up at AWS. It’s a big project that you hit on your own. That’s what you are known for. It gives you a chance to win outside of your team, to @@ -1640,12 +1640,12 @@ transcript: sec: 3781 time: '1:03:01' who: Alexey -- header: 'Final Takeaway: non‑technical skills dominate engineering ladders' +- header: 'Final Takeaway: non-technical skills dominate engineering ladders' - line: I don’t get to talk about the marketing chapter enough. There is so much to career development. I always want to invite people to have a discussion about this. We don’t talk about it enough. We always talk about code. We should talk about the 75% of the engineering ladder criteria that is not technical. -- header: 'Final Takeaway: non‑technical skills dominate engineering ladders' +- header: 'Final Takeaway: non-technical skills dominate engineering ladders' - line: I don’t get to talk about the marketing chapter enough. There is so much to career development. I always want to invite people to have a discussion about this. We don’t talk about it enough. We always talk about code. We should talk diff --git a/_podcast/devrel-data-science-open-source-tools.md b/_podcast/devrel-data-science-open-source-tools.md index 31c858c4..dfecf9d3 100644 --- a/_podcast/devrel-data-science-open-source-tools.md +++ b/_podcast/devrel-data-science-open-source-tools.md @@ -56,7 +56,7 @@ quotableClips: startOffset: 902 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=902 endOffset: 1004 -- name: 'Release promotion: real‑time engagement on Hacker News, Reddit, and social +- name: 'Release promotion: real-time engagement on Hacker News, Reddit, and social media' startOffset: 1004 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1004 @@ -66,7 +66,7 @@ quotableClips: url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1074 endOffset: 1187 - name: 'DevRel job realities: content creation, community management, and support - trade‑offs' + trade-offs' startOffset: 1187 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1187 endOffset: 1431 @@ -74,7 +74,7 @@ quotableClips: startOffset: 1431 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1431 endOffset: 1561 -- name: 'Community metrics: signals, analytics, and full‑time analysis potential' +- name: 'Community metrics: signals, analytics, and full-time analysis potential' startOffset: 1561 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1561 endOffset: 1624 @@ -114,7 +114,7 @@ quotableClips: startOffset: 2715 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2715 endOffset: 2886 -- name: 'Audience growth: metrics, growth‑hacking versus sustainable strategies' +- name: 'Audience growth: metrics, growth-hacking versus sustainable strategies' startOffset: 2886 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2886 endOffset: 3019 @@ -135,7 +135,7 @@ quotableClips: startOffset: 3399 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3399 endOffset: 3492 -- name: Episode Wrap‑up and Farewell +- name: Episode Wrap-up and Farewell startOffset: 3492 url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3492 endOffset: 3315 @@ -370,7 +370,7 @@ transcript: sec: 932 time: '15:32' who: Elle -- header: 'Release promotion: real‑time engagement on Hacker News, Reddit, and social +- header: 'Release promotion: real-time engagement on Hacker News, Reddit, and social media' - line: So it's more like a feeling. Plus, if there is a certain release, you know that you will need to prepare for this release – prepare some supporting material, @@ -424,7 +424,7 @@ transcript: time: '18:15' who: Elle - header: 'DevRel job realities: content creation, community management, and support - trade‑offs' + trade-offs' - line: Yeah, thank you. Since I was pretty curious about this role, I decided to just take a random job description of a developer advocate for a company and see what it says. I found this in some Slack, and it said in the responsibilities @@ -516,7 +516,7 @@ transcript: sec: 1431 time: '23:51' who: Elle -- header: 'Community metrics: signals, analytics, and full‑time analysis potential' +- header: 'Community metrics: signals, analytics, and full-time analysis potential' - line: Yeah, it makes sense. So basically, it's more a description of a full stack role, right? For example, in full stack data science, we have somebody who can talk to stakeholders, build data pipelines, train a model, roll the model out @@ -903,7 +903,7 @@ transcript: sec: 2772 time: '46:12' who: Elle -- header: 'Audience growth: metrics, growth‑hacking versus sustainable strategies' +- header: 'Audience growth: metrics, growth-hacking versus sustainable strategies' - line: You mentioned that people don't consider that it will involve a lot of growth hacking and things like that. But it actually does involve these things, right? sec: 2886 @@ -1085,7 +1085,7 @@ transcript: sec: 3407 time: '56:47' who: Elle -- header: Episode Wrap‑up and Farewell +- header: Episode Wrap-up and Farewell - line: Yes. Thanks a lot for being here today and sharing your experience. And thanks everyone else for being here as well. Let's see each other again next week. Thanks, Elle. Goodbye. diff --git a/_podcast/devrel-open-source-machine-learning.md b/_podcast/devrel-open-source-machine-learning.md index 61aa1f9d..7455eb0e 100644 --- a/_podcast/devrel-open-source-machine-learning.md +++ b/_podcast/devrel-open-source-machine-learning.md @@ -29,11 +29,11 @@ quotableClips: startOffset: 0 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=0 endOffset: 93 -- name: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' +- name: 'Guest Introduction: Hugo Bowne-Anderson, Outerbounds & Metaflow' startOffset: 93 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=93 endOffset: 134 -- name: Metaflow Sandbox Demo & Full‑Stack Machine Learning Spotlight +- name: Metaflow Sandbox Demo & Full-Stack Machine Learning Spotlight startOffset: 134 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=134 endOffset: 216 @@ -41,11 +41,11 @@ quotableClips: startOffset: 216 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=216 endOffset: 226 -- name: Building Courses, Open‑Source Collaboration & DataCamp Impact +- name: Building Courses, Open-Source Collaboration & DataCamp Impact startOffset: 226 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=226 endOffset: 647 -- name: 'Open‑Source Governance: Company Support for Projects (Dask, Metaflow)' +- name: 'Open-Source Governance: Company Support for Projects (Dask, Metaflow)' startOffset: 647 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=647 endOffset: 832 @@ -81,7 +81,7 @@ quotableClips: startOffset: 1901 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1901 endOffset: 2090 -- name: 'Role Trade‑offs: Content Work vs Internal Data Science' +- name: 'Role Trade-offs: Content Work vs Internal Data Science' startOffset: 2090 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2090 endOffset: 2187 @@ -93,7 +93,7 @@ quotableClips: startOffset: 2241 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2241 endOffset: 2417 -- name: 'AI‑Assisted Drafting: Whisper, ChatGPT & Productivity Tools' +- name: 'AI-Assisted Drafting: Whisper, ChatGPT & Productivity Tools' startOffset: 2417 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2417 endOffset: 2594 @@ -101,7 +101,7 @@ quotableClips: startOffset: 2594 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2594 endOffset: 2769 -- name: 'Content Goals: Awareness, Support & Open‑Source Strategy Decisions' +- name: 'Content Goals: Awareness, Support & Open-Source Strategy Decisions' startOffset: 2769 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2769 endOffset: 2923 @@ -117,7 +117,7 @@ quotableClips: startOffset: 3271 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3271 endOffset: 3372 -- name: 'Long‑Form Conversations: Vanishing Gradients Podcast Overview' +- name: 'Long-Form Conversations: Vanishing Gradients Podcast Overview' startOffset: 3372 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3372 endOffset: 3503 @@ -125,14 +125,14 @@ quotableClips: startOffset: 3503 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3503 endOffset: 3565 -- name: Closing Remarks & Episode Wrap‑Up +- name: Closing Remarks & Episode Wrap-Up startOffset: 3565 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3565 endOffset: 3472 transcript: - header: Podcast Introduction -- header: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' +- header: 'Guest Introduction: Hugo Bowne-Anderson, Outerbounds & Metaflow' - line: This week, we'll talk about developer advocacy. We have a special guest today – very special – Hugo. Hugo is the Head of Developer Relations at Outerbounds. He's also a co-host of the Vanishing Gradients podcast. He's a data scientist, @@ -144,7 +144,7 @@ transcript: sec: 93 time: '1:33' who: Alexey -- header: Metaflow Sandbox Demo & Full‑Stack Machine Learning Spotlight +- header: Metaflow Sandbox Demo & Full-Stack Machine Learning Spotlight - line: Thank you so much for having me here. It's a great honor to be here. For those who don't know, this will go live soon, but we also recorded an open-source demo of Metaflow and full-stack machine learning using the sandbox we've built recently, @@ -186,7 +186,7 @@ transcript: sec: 216 time: '3:36' who: Alexey -- header: Building Courses, Open‑Source Collaboration & DataCamp Impact +- header: Building Courses, Open-Source Collaboration & DataCamp Impact - line: Yeah, and feel free to stop me at any point. I'm not quite sure what level of granularity to go into. But I'll kind of give a bit of background that's relevant to what we're talking about today, as well. My background is in scientific research @@ -293,7 +293,7 @@ transcript: sec: 633 time: '10:33' who: Hugo -- header: 'Open‑Source Governance: Company Support for Projects (Dask, Metaflow)' +- header: 'Open-Source Governance: Company Support for Projects (Dask, Metaflow)' - line: I still don't know how exactly it works. Sometimes people can post links, sometimes they can't. Most of the time, they cannot. Only if the host can, which I guess makes sense because otherwise people might come and post spam. I'm really @@ -704,7 +704,7 @@ transcript: sec: 2087 time: '34:47' who: Alexey -- header: 'Role Trade‑offs: Content Work vs Internal Data Science' +- header: 'Role Trade-offs: Content Work vs Internal Data Science' - line: I was just gonna say, the other thing worth mentioning – and this is always a challenge for a lot of data scientists and machine learning engineers who are thinking of getting into DevRel. It's not clear in a lot of organizations. Once @@ -810,7 +810,7 @@ transcript: sec: 2264 time: '37:44' who: Hugo -- header: 'AI‑Assisted Drafting: Whisper, ChatGPT & Productivity Tools' +- header: 'AI-Assisted Drafting: Whisper, ChatGPT & Productivity Tools' - line: What helped me is having an editor who would point out, “Hey, look. This paragraph is completely not understandable. I tried to read it three times. I still don't get it. Let's work on this paragraph to really understand what you meant here @@ -921,7 +921,7 @@ transcript: sec: 2607 time: '43:27' who: Hugo -- header: 'Content Goals: Awareness, Support & Open‑Source Strategy Decisions' +- header: 'Content Goals: Awareness, Support & Open-Source Strategy Decisions' - line: How do you understand what kind of goal you have? Maybe somebody comes to you and says, “Hey, we don't have any posts in our blog. Let's create blog posts.” And then you're like “Okay, let's create.” Then you think, “Okay. What is actually @@ -1101,7 +1101,7 @@ transcript: sec: 3353 time: '55:53' who: Alexey -- header: 'Long‑Form Conversations: Vanishing Gradients Podcast Overview' +- header: 'Long-Form Conversations: Vanishing Gradients Podcast Overview' - line: Well, you also have a podcast. We still have 3 minutes. Maybe you can tell us about that podcast before we finish? sec: 3372 @@ -1171,7 +1171,7 @@ transcript: sec: 3556 time: '59:16' who: Hugo -- header: Closing Remarks & Episode Wrap‑Up +- header: Closing Remarks & Episode Wrap-Up - line: Well, have a nice weekend. And for those who are not in Australia, have a nice Friday, and then a great weekend. sec: 3565 diff --git a/_podcast/fairness-in-ai-ml-engineering.md b/_podcast/fairness-in-ai-ml-engineering.md index a9381f75..ce47c6de 100644 --- a/_podcast/fairness-in-ai-ml-engineering.md +++ b/_podcast/fairness-in-ai-ml-engineering.md @@ -26,13 +26,13 @@ intro: How do you reduce bias in credit scoring models without sacrificing expla debt and repossession.

Tamara explains Fairlearn’s group fairness tools, visualization and mitigation methods, and the tradeoffs between false positives, false negatives, and demographic parity. She discusses how to choose sensitive groups - in domain‑specific settings, the limits of automation, the need for human‑in‑the‑loop + in domain-specific settings, the limits of automation, the need for human-in-the-loop systems, and who in an organization should decide fairness tradeoffs. The episode also covers interpretability and explainable models — inspection tools, partial - dependence, and cross‑library integration with scikit‑learn and estimator APIs — + dependence, and cross-library integration with scikit-learn and estimator APIs — plus practical concerns like secure model serialization and community contribution paths.

Listen to learn actionable guidance on auditing and mitigating credit - scoring bias, building explainable models, and integrating Fairlearn into real‑world + scoring bias, building explainable models, and integrating Fairlearn into real-world ML workflows dateadded: 2025-02-24 duration: PT00H59M14S @@ -41,8 +41,8 @@ quotableClips: startOffset: 0 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=0 endOffset: 151 -- name: 'Guest Introduction: Tamara’s Open‑Source Roles (Fairlearn, scikit‑learn, - Skope‑Rules)' +- name: 'Guest Introduction: Tamara’s Open-Source Roles (Fairlearn, scikit-learn, + Skope-Rules)' startOffset: 151 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=151 endOffset: 198 @@ -78,7 +78,7 @@ quotableClips: startOffset: 824 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=824 endOffset: 892 -- name: 'Fairness in AI: Credit Scoring Use Case and Real‑World Impact' +- name: 'Fairness in AI: Credit Scoring Use Case and Real-World Impact' startOffset: 892 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=892 endOffset: 910 @@ -94,7 +94,7 @@ quotableClips: startOffset: 1291 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1291 endOffset: 1444 -- name: 'Sensitive Group Selection: Domain‑Specific Decisions in Credit Models' +- name: 'Sensitive Group Selection: Domain-Specific Decisions in Credit Models' startOffset: 1444 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1444 endOffset: 1581 @@ -114,15 +114,15 @@ quotableClips: startOffset: 1991 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1991 endOffset: 2123 -- name: 'Moderation Case Study: Cross‑Functional Teams and Domain Expertise' +- name: 'Moderation Case Study: Cross-Functional Teams and Domain Expertise' startOffset: 2123 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2123 endOffset: 2233 -- name: 'Human‑in‑the‑Loop: Essential Component for Fair AI Systems' +- name: 'Human-in-the-Loop: Essential Component for Fair AI Systems' startOffset: 2233 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2233 endOffset: 2358 -- name: 'Joining Probable: From Open‑Source Contributions to a Role' +- name: 'Joining Probable: From Open-Source Contributions to a Role' startOffset: 2358 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2358 endOffset: 2457 @@ -134,7 +134,7 @@ quotableClips: startOffset: 2574 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2574 endOffset: 2694 -- name: 'Cross‑Library Compatibility: Fairlearn, scikit‑learn, and Estimator APIs' +- name: 'Cross-Library Compatibility: Fairlearn, scikit-learn, and Estimator APIs' startOffset: 2694 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2694 endOffset: 2780 @@ -150,7 +150,7 @@ quotableClips: startOffset: 3054 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3054 endOffset: 3130 -- name: 'Contributing to Fairlearn: Discord, Good‑First Issues, and Sprints' +- name: 'Contributing to Fairlearn: Discord, Good-First Issues, and Sprints' startOffset: 3130 url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3130 endOffset: 3341 @@ -184,8 +184,8 @@ transcript: sec: 0 time: 0:00 who: Alexey -- header: 'Guest Introduction: Tamara’s Open‑Source Roles (Fairlearn, scikit‑learn, - Skope‑Rules)' +- header: 'Guest Introduction: Tamara’s Open-Source Roles (Fairlearn, scikit-learn, + Skope-Rules)' - line: Yes, that’s correct. sec: 151 time: '2:31' @@ -407,7 +407,7 @@ transcript: sec: 834 time: '13:54' who: Tamara -- header: 'Fairness in AI: Credit Scoring Use Case and Real‑World Impact' +- header: 'Fairness in AI: Credit Scoring Use Case and Real-World Impact' - line: That sounds quite abstract. Can we narrow it down to AI? sec: 892 time: '14:52' @@ -518,7 +518,7 @@ transcript: sec: 1291 time: '21:31' who: Tamara -- header: 'Sensitive Group Selection: Domain‑Specific Decisions in Credit Models' +- header: 'Sensitive Group Selection: Domain-Specific Decisions in Credit Models' - line: Okay, so what exactly does Fairlearn, or similar tools, provide? Let’s take credit scoring as an example. Say we have a model, like a decision tree or logistic regression, that predicts loan decisions. Fairlearn then analyzes how the model @@ -652,7 +652,7 @@ transcript: sec: 1991 time: '33:11' who: Tamara -- header: 'Moderation Case Study: Cross‑Functional Teams and Domain Expertise' +- header: 'Moderation Case Study: Cross-Functional Teams and Domain Expertise' - line: I was reflecting on my personal experience with making these decisions. I worked on a moderation team for an online marketplace, deciding whether an item should go live for purchase or be blocked. We discussed factors like the model's @@ -670,7 +670,7 @@ transcript: sec: 2123 time: '35:23' who: Alexey -- header: 'Human‑in‑the‑Loop: Essential Component for Fair AI Systems' +- header: 'Human-in-the-Loop: Essential Component for Fair AI Systems' - line: 'Yes, and you mentioned something really important: the human in the loop. It’s a central component of all AI systems. If we want them to be fair, we need humans in the loop. Before any decision can have a real impact, there has to be @@ -721,7 +721,7 @@ transcript: sec: 2352 time: '39:12' who: Tamara -- header: 'Joining Probable: From Open‑Source Contributions to a Role' +- header: 'Joining Probable: From Open-Source Contributions to a Role' - line: So, how did you get involved in the project? Last time we spoke, Probable didn’t exist yet. How did it happen? You were doing LPiano back then, right? sec: 2358 @@ -805,7 +805,7 @@ transcript: sec: 2640 time: '44:00' who: Tamara -- header: 'Cross‑Library Compatibility: Fairlearn, scikit‑learn, and Estimator APIs' +- header: 'Cross-Library Compatibility: Fairlearn, scikit-learn, and Estimator APIs' - line: The most interesting part of my work has been ensuring cross-library compatibility. This means making all Fairlearn estimators compatible with Psyched Learn, and ensuring compatibility as Psyched Learn transitions to version 1.6. People should @@ -936,7 +936,7 @@ transcript: sec: 3054 time: '50:54' who: Tamara -- header: 'Contributing to Fairlearn: Discord, Good‑First Issues, and Sprints' +- header: 'Contributing to Fairlearn: Discord, Good-First Issues, and Sprints' - line: That’s interesting. You read my mind again. I was about to ask how someone can contribute to Fairlearn if they’re interested. If you live in Berlin, they can join the meetup, right? And there will be opportunities to contribute, as @@ -1072,18 +1072,18 @@ transcript: sec: 3554 time: '59:14' who: Alexey -context: 'Context: This episode follows Tamara’s journey from software and music‑tech - engineering into computational linguistics and open‑source stewardship, and uses - concrete case studies (credit‑scoring fairness, moderation systems) plus tool discussions +context: 'Context: This episode follows Tamara’s journey from software and music-tech + engineering into computational linguistics and open-source stewardship, and uses + concrete case studies (credit-scoring fairness, moderation systems) plus tool discussions (Fairlearn, interpretability packages, secure model serialization) to examine how technical choices, metrics, and developer practices translate into real societal outcomes. Recurring threads include tradeoffs in fairness metrics, the necessity - of domain expertise and human‑in‑the‑loop processes, the engineering challenges + of domain expertise and human-in-the-loop processes, the engineering challenges of interoperable, secure ML tooling, and the role of community and practitioner education in shaping responsible ML. Core: The unifying idea is that building fair, trustworthy AI is a sociotechnical - engineering task: it requires not just algorithms but pragmatic, community‑driven + engineering task: it requires not just algorithms but pragmatic, community-driven tools, secure software practices, clear interpretability, and organizational processes that embed human judgment and domain knowledge so technical models produce just, accountable outcomes in the real world.' diff --git a/_podcast/feature-engineering-model-monitoring-and-data-governance.md b/_podcast/feature-engineering-model-monitoring-and-data-governance.md index eeba2942..4883e648 100644 --- a/_podcast/feature-engineering-model-monitoring-and-data-governance.md +++ b/_podcast/feature-engineering-model-monitoring-and-data-governance.md @@ -99,7 +99,7 @@ quotableClips: startOffset: 2968 url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2968 endOffset: 3042 -- name: 'Relationship-building: informal check‑ins, lunch & beer networking' +- name: 'Relationship-building: informal check-ins, lunch & beer networking' startOffset: 3042 url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3042 endOffset: 3349 @@ -827,7 +827,7 @@ transcript: sec: 2968 time: '49:28' who: Alexey -- header: 'Relationship-building: informal check‑ins, lunch & beer networking' +- header: 'Relationship-building: informal check-ins, lunch & beer networking' - line: Then there’s educating why data science is important, which is something that you, as a data professional, should also be able to do. For instance saying, “Okay, you really need to be careful about this value. You really need to pay attention diff --git a/_podcast/freelance-data-engineering-pricing-and-clients.md b/_podcast/freelance-data-engineering-pricing-and-clients.md index 82b8b2f4..890c945e 100644 --- a/_podcast/freelance-data-engineering-pricing-and-clients.md +++ b/_podcast/freelance-data-engineering-pricing-and-clients.md @@ -92,7 +92,7 @@ quotableClips: startOffset: 2385 url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2385 endOffset: 2457 -- name: 'Work Location: Choosing Remote or On‑Site Projects' +- name: 'Work Location: Choosing Remote or On-Site Projects' startOffset: 2457 url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2457 endOffset: 2492 @@ -933,7 +933,7 @@ transcript: sec: 2389 time: '39:49' who: Adrian -- header: 'Work Location: Choosing Remote or On‑Site Projects' +- header: 'Work Location: Choosing Remote or On-Site Projects' - line: When you work with your clients, do you normally choose if you want to work remotely or on-site, or is it up to the client? sec: 2457 diff --git a/_podcast/freelancing-in-machine-learning.md b/_podcast/freelancing-in-machine-learning.md index 719355b1..f5624680 100644 --- a/_podcast/freelancing-in-machine-learning.md +++ b/_podcast/freelancing-in-machine-learning.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/freelancing-in-machine-learning-mikio-braun/id1541710331?i=1000532612872 description: 'Learn freelancing in machine learning: pricing, client acquisition, and proposals to win ML consulting gigs, scale sustainably, and secure steady income' -intro: 'How do you move from academic research or in‑house ML engineering to a sustainable freelance career in machine learning — getting clients, pricing your work, and delivering production systems? In this episode, Mikio Braun, who transitioned from TU Berlin into ML roles at Zalando and GetYourGuide and now consults on machine learning production, infrastructure, and teams, walks through that path step by step.

We cover the practical parts of freelancing in machine learning: launching first clients, sourcing leads through network and referrals, and demand generation with LinkedIn, talks, and podcasts; pre‑sales tactics like free intro calls, problem discovery, and clear proposals; pricing models and rate‑setting strategies; financial planning, capacity management, and avoiding burnout; plus specialization, productizing consulting, and scaling options (agency, product, or return to employment). The episode also addresses administrative essentials for freelancers in Germany (registration, VAT, payments), accounting choices, professional liability, and how to compete in a global remote market. Listen for concrete advice on client‑finding, scope discipline, and deliverables so you can evaluate whether freelancing in machine learning is the right next step and how to start with a safety net.' +intro: 'How do you move from academic research or in-house ML engineering to a sustainable freelance career in machine learning — getting clients, pricing your work, and delivering production systems? In this episode, Mikio Braun, who transitioned from TU Berlin into ML roles at Zalando and GetYourGuide and now consults on machine learning production, infrastructure, and teams, walks through that path step by step.

We cover the practical parts of freelancing in machine learning: launching first clients, sourcing leads through network and referrals, and demand generation with LinkedIn, talks, and podcasts; pre-sales tactics like free intro calls, problem discovery, and clear proposals; pricing models and rate-setting strategies; financial planning, capacity management, and avoiding burnout; plus specialization, productizing consulting, and scaling options (agency, product, or return to employment). The episode also addresses administrative essentials for freelancers in Germany (registration, VAT, payments), accounting choices, professional liability, and how to compete in a global remote market. Listen for concrete advice on client-finding, scope discipline, and deliverables so you can evaluate whether freelancing in machine learning is the right next step and how to start with a safety net.' topics: - freelance - consulting @@ -59,7 +59,7 @@ quotableClips: startOffset: 928 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=928 endOffset: 1149 -- name: Intro Calls & Pre‑sales — Free Meetings, Qualification, and Trust Building +- name: Intro Calls & Pre-sales — Free Meetings, Qualification, and Trust Building startOffset: 1149 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1149 endOffset: 1297 @@ -71,7 +71,7 @@ quotableClips: startOffset: 1338 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1338 endOffset: 1432 -- name: Pricing Models — Hourly, Fixed‑Price, and Value‑Based Tradeoffs +- name: Pricing Models — Hourly, Fixed-Price, and Value-Based Tradeoffs startOffset: 1432 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1432 endOffset: 1777 @@ -95,7 +95,7 @@ quotableClips: startOffset: 2326 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2326 endOffset: 2424 -- name: Income Comparison — Freelance Earnings vs. Full‑Time Salary +- name: Income Comparison — Freelance Earnings vs. Full-Time Salary startOffset: 2424 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2424 endOffset: 2479 @@ -119,7 +119,7 @@ quotableClips: startOffset: 3017 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3017 endOffset: 3165 -- name: Client‑Finding Lessons — Clarity in Writing and Scope Discipline +- name: Client-Finding Lessons — Clarity in Writing and Scope Discipline startOffset: 3165 url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3165 endOffset: 3210 @@ -477,7 +477,7 @@ transcript: sec: 1142 time: '19:02' who: Mikio -- header: Intro Calls & Pre‑sales — Free Meetings, Qualification, and Trust Building +- header: Intro Calls & Pre-sales — Free Meetings, Qualification, and Trust Building - line: The other thing you said, "Now you get a client. The interesting part is what happens next". First, you have this intro call. You figure out if you can help the client. Right? How does it look like? Let's say you get a LinkedIn message @@ -566,7 +566,7 @@ transcript: sec: 1420 time: '23:40' who: Mikio -- header: Pricing Models — Hourly, Fixed‑Price, and Value‑Based Tradeoffs +- header: Pricing Models — Hourly, Fixed-Price, and Value-Based Tradeoffs - line: We already have a question here. How to decide on this rate? The question is about the daily rate. We had some prior chats about this, you also mentioned something about a trade-off between pay per day or per hour or per project. Maybe @@ -873,7 +873,7 @@ transcript: sec: 2423 time: '40:23' who: Alexey -- header: Income Comparison — Freelance Earnings vs. Full‑Time Salary +- header: Income Comparison — Freelance Earnings vs. Full-Time Salary - line: Yeah. I do normal 10-to-6 days. But not all of that time is billed. The actual client hours make maybe up to half of it. And then there's other stuff like, like this [podcast], or working on talks, or just learning something. @@ -1129,7 +1129,7 @@ transcript: sec: 3156 time: '52:36' who: Mikio -- header: Client‑Finding Lessons — Clarity in Writing and Scope Discipline +- header: Client-Finding Lessons — Clarity in Writing and Scope Discipline - line: We have quite a few questions. This one is very interesting. What is your most relevant learning when finding clients? What did you learn from this process of finding clients? diff --git a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md index e395edb9..bb55f1d9 100644 --- a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md +++ b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md @@ -19,7 +19,7 @@ description: Build a data science portfolio with open-source computer vision & t hands-on projects, GitHub proof, and interview-ready ML skills. intro: How do you move from a biology background into machine learning and build a data science portfolio that actually gets noticed? In this episode, Isabella Bicalho - — a machine learning engineer and data scientist with three years of hands‑on AI + — a machine learning engineer and data scientist with three years of hands-on AI development and roots in computational research — walks through practical approaches for showcasing skills with open-source, computer vision, and transformer projects.

We cover how to translate domain knowledge from biology into ML problem @@ -1088,9 +1088,9 @@ context: 'Context: Isabella Bicalho’s episode traces a career arc from biology teaching, and community engagement—illustrating practical projects, networking, and pedagogical work as the vehicles for growth. - Core: The unifying idea is that continuous, community‑centered, project‑based learning—combining - hands‑on applied work, open‑source contribution, mentorship, clear communication, - and judicious use of AI tools—serves as the most effective pathway to build job‑ready + Core: The unifying idea is that continuous, community-centered, project-based learning—combining + hands-on applied work, open-source contribution, mentorship, clear communication, + and judicious use of AI tools—serves as the most effective pathway to build job-ready skills, bridge disciplines, and create real-world impact in data science and ML.' --- Links: diff --git a/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md index 63052d89..dab4bb1a 100644 --- a/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md +++ b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md @@ -1206,13 +1206,13 @@ context: 'Context — This episode moves from the guest’s finance-to-self-driv validation pipelines, staged releases, edge cases), system-level questions (reinforcement learning vs perception, multimodal LLMs), and practical career/project advice. - Core — Building trustworthy, real‑world AI is an engineering-driven cycle that tightly - couples pragmatic sensor and model choices, efficient on‑device inference, rigorous + Core — Building trustworthy, real-world AI is an engineering-driven cycle that tightly + couples pragmatic sensor and model choices, efficient on-device inference, rigorous data and validation pipelines, staged safe deployment, and ethical/social purpose: the episode’s unifying idea is that successful AI systems aren’t just about better algorithms but about integrating perception, hardware constraints, data practices, - testing, and human-centered impact into a continuous, safety‑first development process - that scales across domains from autonomous vehicles to assistive tech and public‑health + testing, and human-centered impact into a continuous, safety-first development process + that scales across domains from autonomous vehicles to assistive tech and public-health applications.' --- Links: diff --git a/_podcast/from-data-freelancer-to-startup-open-source-products.md b/_podcast/from-data-freelancer-to-startup-open-source-products.md index 79d54494..3e8dd879 100644 --- a/_podcast/from-data-freelancer-to-startup-open-source-products.md +++ b/_podcast/from-data-freelancer-to-startup-open-source-products.md @@ -16,19 +16,19 @@ links: youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw description: 'Discover how to build an open-source data product for Python devs: bootstrap, ship DLT transforms, and drive bottom-up adoption to find PMF.' -intro: How do you move from freelancing to building an open‑source data company that - wins via bottom‑up adoption? In this episode Adrian Brudaru — an economics graduate +intro: How do you move from freelancing to building an open-source data company that + wins via bottom-up adoption? In this episode Adrian Brudaru — an economics graduate who pivoted to business analysis in Berlin, then spent years freelancing before - co‑founding a data startup — walks through that transition and the practical tradeoffs + co-founding a data startup — walks through that transition and the practical tradeoffs he encountered.

We cover lessons from freelancing and agency work, why they chose product over agency growth, and the recurring pain of stakeholder alignment versus technical setup. Adrian explains DLT — a declarative JSON→relational transformation - for data pipelines — and why the product targets Python users as a developer‑focused + for data pipelines — and why the product targets Python users as a developer-focused library. Hear how workshops, documentation, and live support doubled as product validation, how scrappy bootstrapping and consulting revenue funded early payroll, - and what signals indicate product–market fit for open‑source tooling.

If - you’re building open‑source data tools, developer tooling, or plotting a bottom‑up - go‑to‑market, this episode offers concrete tactics on iteration, docs-as-product, + and what signals indicate product–market fit for open-source tooling.

If + you’re building open-source data tools, developer tooling, or plotting a bottom-up + go-to-market, this episode offers concrete tactics on iteration, docs-as-product, ecosystem partnerships, and positioning against platforms like Airbyte/Fivetran — helping you prioritize engineering, adoption, and sustainable monetization. topics: @@ -47,7 +47,7 @@ quotableClips: startOffset: 0 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=0 endOffset: 113 -- name: 'Episode Overview: Building an Open‑Source Data Company' +- name: 'Episode Overview: Building an Open-Source Data Company' startOffset: 113 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=113 endOffset: 199 @@ -63,11 +63,11 @@ quotableClips: startOffset: 320 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=320 endOffset: 438 -- name: 'Freelancing Lifestyle: Flexibility and Long‑Term Boredom' +- name: 'Freelancing Lifestyle: Flexibility and Long-Term Boredom' startOffset: 438 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=438 endOffset: 526 -- name: 'Subcontracting Growth: Agency‑like Management Tradeoffs' +- name: 'Subcontracting Growth: Agency-like Management Tradeoffs' startOffset: 526 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=526 endOffset: 651 @@ -87,7 +87,7 @@ quotableClips: startOffset: 976 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=976 endOffset: 1071 -- name: 'Anti‑patterns: Dumping JSON into Data Warehouses' +- name: 'Anti-patterns: Dumping JSON into Data Warehouses' startOffset: 1071 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1071 endOffset: 1178 @@ -99,7 +99,7 @@ quotableClips: startOffset: 1410 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1410 endOffset: 1523 -- name: 'Team Formation: Meeting Co‑founders Through Projects' +- name: 'Team Formation: Meeting Co-founders Through Projects' startOffset: 1523 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1523 endOffset: 1659 @@ -123,7 +123,7 @@ quotableClips: startOffset: 2248 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2248 endOffset: 2450 -- name: 'Product Identity: DLT as a Developer‑Focused Library' +- name: 'Product Identity: DLT as a Developer-Focused Library' startOffset: 2450 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2450 endOffset: 2483 @@ -135,7 +135,7 @@ quotableClips: startOffset: 2640 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2640 endOffset: 2876 -- name: 'Current Focus: Leading Go‑to‑Market and Bottom‑Up Strategy' +- name: 'Current Focus: Leading Go-to-Market and Bottom-Up Strategy' startOffset: 2876 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2876 endOffset: 3053 @@ -143,7 +143,7 @@ quotableClips: startOffset: 3053 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3053 endOffset: 3310 -- name: 'Roadmap: Paid Complement to the Open‑Source Library' +- name: 'Roadmap: Paid Complement to the Open-Source Library' startOffset: 3310 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3310 endOffset: 3430 @@ -151,7 +151,7 @@ quotableClips: startOffset: 3430 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3430 endOffset: 3491 -- name: 'Positioning vs Platforms: Library‑First vs Airbyte/Fivetran' +- name: 'Positioning vs Platforms: Library-First vs Airbyte/Fivetran' startOffset: 3491 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3491 endOffset: 3641 @@ -159,13 +159,13 @@ quotableClips: startOffset: 3641 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3641 endOffset: 3656 -- name: Episode Wrap‑Up and Next Steps +- name: Episode Wrap-Up and Next Steps startOffset: 3656 url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3656 endOffset: 3583 transcript: - header: Podcast Introduction -- header: 'Episode Overview: Building an Open‑Source Data Company' +- header: 'Episode Overview: Building an Open-Source Data Company' - line: This week, we'll talk about building an open source data company, and not just building but… It's not the first time we have our guest, Adrian, on this podcast. Before, we spoke about being a data freelancer, so we'll talk about building @@ -284,7 +284,7 @@ transcript: sec: 361 time: '6:01' who: Alexey -- header: 'Freelancing Lifestyle: Flexibility and Long‑Term Boredom' +- header: 'Freelancing Lifestyle: Flexibility and Long-Term Boredom' - line: Exactly. The customer typically already wants to know how much they're roughly going to pay for what they get. They don't actually care about your hourly rate, generally. They care about the final outcome and what it costs them. @@ -314,7 +314,7 @@ transcript: sec: 521 time: '8:41' who: Alexey -- header: 'Subcontracting Growth: Agency‑like Management Tradeoffs' +- header: 'Subcontracting Growth: Agency-like Management Tradeoffs' - line: Go fishing on a Wednesday, for example. You can decide every day what you're doing more or less. Of course, you need to be civilized and do it within the boundaries of other people working with you. But you do have a lot of autonomy and freedom. @@ -462,7 +462,7 @@ transcript: sec: 976 time: '16:16' who: Adrian -- header: 'Anti‑patterns: Dumping JSON into Data Warehouses' +- header: 'Anti-patterns: Dumping JSON into Data Warehouses' - line: From what I heard from you… I know a bit about the product (the tool) you’re working on. What I heard is –when you have a data warehouse, you don't just have it for the sake of having it, you need to put some data in it. Then you have a @@ -603,7 +603,7 @@ transcript: sec: 1509 time: '25:09' who: Alexey -- header: 'Team Formation: Meeting Co‑founders Through Projects' +- header: 'Team Formation: Meeting Co-founders Through Projects' - line: So it's a classic story – we met at work. On my last project, the guy that hired me had been working for this company for six years. He had previously founded some companies. And basically, I ended up working with him to build this Growth @@ -903,7 +903,7 @@ transcript: sec: 2448 time: '40:48' who: Alexey -- header: 'Product Identity: DLT as a Developer‑Focused Library' +- header: 'Product Identity: DLT as a Developer-Focused Library' - line: Yes. It's pretty simple. It's called Data Load Tool (DLT for short). I often like to tell people, “Don't think of it as a data loading tool, think of it as a pipeline building tool.” And the reason for this is because it's a developer @@ -1062,7 +1062,7 @@ transcript: sec: 2871 time: '47:51' who: Alexey -- header: 'Current Focus: Leading Go‑to‑Market and Bottom‑Up Strategy' +- header: 'Current Focus: Leading Go-to-Market and Bottom-Up Strategy' - line: Exactly. Basically, what this means is that you need to figure out what needs to happen next – figure out some kind of way to do it – and then try to get help to do more of it in a better way, if that pays off, kind of. So I'm doing a lot @@ -1185,7 +1185,7 @@ transcript: sec: 3302 time: '55:02' who: Alexey -- header: 'Roadmap: Paid Complement to the Open‑Source Library' +- header: 'Roadmap: Paid Complement to the Open-Source Library' - line: Yes, it's very hard to raise money for just research, right? Basically, we have got a product market fit with our library. Now we're working towards a paid solution. That paid solution would be something complimentary. It wouldn't limit @@ -1244,7 +1244,7 @@ transcript: sec: 3485 time: '58:05' who: Alexey -- header: 'Positioning vs Platforms: Library‑First vs Airbyte/Fivetran' +- header: 'Positioning vs Platforms: Library-First vs Airbyte/Fivetran' - line: We don't really want to go… Airbyte is a platform. We’ll never be a platform in that way. Even if we do offer some kind of orchestration, that is not our selling point. We don't want to be another Fivetran. Airbyte, currently, is kind of trying @@ -1290,7 +1290,7 @@ transcript: sec: 3643 time: '1:00:43' who: Adrian -- header: Episode Wrap‑Up and Next Steps +- header: Episode Wrap-Up and Next Steps - line: Okay. That's all we have time for today. We are a bit… We took three more minutes than we should have. Thanks a lot for joining us today and sharing your experience. I'm really curious. I think the last time we had an interview was @@ -1309,11 +1309,11 @@ transcript: sec: 3696 time: '1:01:36' who: Alexey -context: Turning hands‑on consulting and hard‑won data engineering experience into - a library‑first, open‑source company that solves a concrete pain—declarative JSON→relational +context: Turning hands-on consulting and hard-won data engineering experience into + a library-first, open-source company that solves a concrete pain—declarative JSON→relational transformations for Python users—by validating through workshops and docs, iterating - with real user feedback, and scaling via bottom‑up adoption, ecosystem integrations, - and paid complementary offerings rather than agency growth or platform lock‑in. + with real user feedback, and scaling via bottom-up adoption, ecosystem integrations, + and paid complementary offerings rather than agency growth or platform lock-in. --- Links: diff --git a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md index a9132ad9..c1203b7d 100644 --- a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md +++ b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md @@ -21,10 +21,10 @@ intro: How do you pivot from DevOps to data engineering without starting over? I this episode Agita Jaunzeme — a DevOps/DataOps engineer, manager, community builder and NGO founder — breaks down practical strategies for career transitions that center on automation, open source participation, and volunteering.

Agita draws - on experience across corporate, startup, open source and non‑governmental sectors + on experience across corporate, startup, open source and non-governmental sectors and shares how automation and DevOps practices translate to data engineering and DataOps. We discuss using open source projects to build credibility, volunteering - and community work to gain hands‑on experience and networks, and concrete approaches + and community work to gain hands-on experience and networks, and concrete approaches to getting promoted or making purposeful career pivots. Agita also talks about designing work that aligns with passion and purpose, including founding an NGO to support expats and locals in Porto.

Listeners will come away with actionable ideas diff --git a/_podcast/from-game-ai-to-modern-ai-agents.md b/_podcast/from-game-ai-to-modern-ai-agents.md index e6b05900..adf805c5 100644 --- a/_podcast/from-game-ai-to-modern-ai-agents.md +++ b/_podcast/from-game-ai-to-modern-ai-agents.md @@ -1048,16 +1048,16 @@ transcript: who: Michael context: 'Context: The episode follows a two-decade arc from game-AI research and evolutionary/RL methods through industry product leadership to present work on LLM-driven - multi‑agent assistants—covering technical deep dives (prompt engineering, orchestration + multi-agent assistants—covering technical deep dives (prompt engineering, orchestration vs flow, sequential thinking servers, code generation, procedural content), tooling and deployment challenges (local models, model specialization, monitoring), and career/publishing lessons. - Core narrative: The unifying idea is that practical, production‑ready AI agents - are built by applying game‑AI engineering principles—minimal, modular task decomposition; - evolutionary and learning‑based search; and clear orchestration patterns—to modern - LLMs and multi‑agent systems, balancing creative capabilities with efficiency, tooling, - and real‑world deployability.' + Core narrative: The unifying idea is that practical, production-ready AI agents + are built by applying game-AI engineering principles—minimal, modular task decomposition; + evolutionary and learning-based search; and clear orchestration patterns—to modern + LLMs and multi-agent systems, balancing creative capabilities with efficiency, tooling, + and real-world deployability.' --- Links: diff --git a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md index 5879b045..aebf3f69 100644 --- a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md +++ b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md @@ -21,10 +21,10 @@ intro: How do you move from collider physics to industry data science while keep rigorous research software engineering practices, succeeding in interviews, and giving or getting effective mentorship? In this episode Anastasia Karavdina — a particle physicist turned data scientist who worked on Large Hadron Collider experiments - and later built AI solutions at Blue Yonder and Kaufland e‑commerce — walks through + and later built AI solutions at Blue Yonder and Kaufland e-commerce — walks through that journey.

We start with collider basics (particle acceleration, detector imaging, event volumes, and roles in large collaborations) to show the data scale - and statistical thinking that map to industry. Anastasia explains dual hardware‑and‑analysis + and statistical thinking that map to industry. Anastasia explains dual hardware-and-analysis roles, how multivariate analysis translates to machine learning, and concrete research software engineering practices like version control and CI/CD. She also covers interview prep (technical fit, behavioral stories, cultural fit in Germany) and evolving hiring diff --git a/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md index 52160a62..5659bd38 100644 --- a/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md +++ b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md @@ -84,7 +84,7 @@ quotableClips: startOffset: 1392 url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 endOffset: 1491 -- name: 'Infrastructure Choices: Self‑Hosted Tooling vs DBT Cloud' +- name: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' startOffset: 1491 url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 endOffset: 1506 @@ -105,7 +105,7 @@ quotableClips: startOffset: 2026 url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 endOffset: 2130 -- name: 'Nontraditional Background: Classics to Data — Just‑In‑Time Learning and Udemy +- name: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and Udemy SQL' startOffset: 2130 url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 @@ -135,7 +135,7 @@ quotableClips: startOffset: 3130 url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 endOffset: 3226 -- name: 'Contact & Wrap‑Up: Finding Nikola on LinkedIn and Episode Close' +- name: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' startOffset: 3226 url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 endOffset: 3274 @@ -566,7 +566,7 @@ transcript: sec: 1487 time: '24:47' who: Nikola -- header: 'Infrastructure Choices: Self‑Hosted Tooling vs DBT Cloud' +- header: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' - line: Do you host all these things yourself? For example, when it comes to DBT, do you use their cloud? sec: 1491 @@ -756,7 +756,7 @@ transcript: sec: 2127 time: '35:27' who: Alexey -- header: 'Nontraditional Background: Classics to Data — Just‑In‑Time Learning and +- header: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and Udemy SQL' - line: No, I studied classics, which are Latin and ancient Greek. [laughs] sec: 2130 @@ -1066,7 +1066,7 @@ transcript: sec: 3221 time: '53:41' who: Nikola -- header: 'Contact & Wrap‑Up: Finding Nikola on LinkedIn and Episode Close' +- header: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' - line: Profoundly Optimistic is also a good name. [both laugh] If somebody has questions for you, how can they find you? Is it LinkedIn or are there some other ways to contact you? diff --git a/_podcast/from-math-graduate-to-data-analytics.md b/_podcast/from-math-graduate-to-data-analytics.md index edfecb9d..8c6c08fa 100644 --- a/_podcast/from-math-graduate-to-data-analytics.md +++ b/_podcast/from-math-graduate-to-data-analytics.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=qh6-HDhw2xY description: 'Discover data analytics: build a portfolio, master SQL & networking, interview prep, cold outreach and project READMEs to land job offers faster.' -intro: 'How do you actually break into data analytics — and what combination of networking, portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan Pablo Murillo, an AI and data professional now at Google with prior roles as an Amazon Business Intelligence Engineer and data scientist at T‑Mobile, walks through a practical path from math grad to analytics roles.

We cover the full playbook: where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present projects for hiring managers. Juan addresses bootcamp trade‑offs, networking wins from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, finding contract or pro bono work, and resume/STAR interview prep. He also discusses role realities for BI and analytics engineering and employer branding to build credibility.

Listen for actionable steps and specific tactics—how to structure three portfolio projects, message templates for outreach, and interview preparation tips—to help you break into data analytics, improve SQL interview readiness, and turn public work into job opportunities.' +intro: 'How do you actually break into data analytics — and what combination of networking, portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan Pablo Murillo, an AI and data professional now at Google with prior roles as an Amazon Business Intelligence Engineer and data scientist at T-Mobile, walks through a practical path from math grad to analytics roles.

We cover the full playbook: where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present projects for hiring managers. Juan addresses bootcamp trade-offs, networking wins from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, finding contract or pro bono work, and resume/STAR interview prep. He also discusses role realities for BI and analytics engineering and employer branding to build credibility.

Listen for actionable steps and specific tactics—how to structure three portfolio projects, message templates for outreach, and interview preparation tips—to help you break into data analytics, improve SQL interview readiness, and turn public work into job opportunities.' topics: - career transition - data analytics @@ -62,7 +62,7 @@ quotableClips: startOffset: 1110 url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1110 endOffset: 1217 -- name: 'Uncrowded Doors: Alternative Job‑Hunting Strategies' +- name: 'Uncrowded Doors: Alternative Job-Hunting Strategies' startOffset: 1217 url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1217 endOffset: 1286 @@ -443,7 +443,7 @@ transcript: sec: 1201 time: '20:01' who: Alexey -- header: 'Uncrowded Doors: Alternative Job‑Hunting Strategies' +- header: 'Uncrowded Doors: Alternative Job-Hunting Strategies' - line: The market is tough for people without experience. If you're in that group, you have to hustle. You have to look for alternative ways to get in front of a hiring manager. You have to think outside the box. You have to look for the uncrowded diff --git a/_podcast/from-physics-to-computer-vision-career-transition.md b/_podcast/from-physics-to-computer-vision-career-transition.md index cce3d2a6..fbf41f2f 100644 --- a/_podcast/from-physics-to-computer-vision-career-transition.md +++ b/_podcast/from-physics-to-computer-vision-career-transition.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/from-physics-to-machine-learning-tatiana-gabruseva/id1541710331?i=1000521740775 description: 'Master computer vision & deep learning with a clear roadmap: Kaggle projects, mentorship strategies and interview prep to land roles and build deployed models.' -intro: How do you switch into computer vision and deep learning from a non‑industry background — and build a portfolio that lands interviews? In this episode, Tatiana Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master now working as a Senior ML Engineer at Cork University Hospital, maps a practical career-change roadmap. Drawing on her move from a physics PhD during maternity leave, Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), hands‑on projects (Kaggle competitions, internships, Omdena‑style collaborations, end‑to‑end pet projects with data collection, labeling, deployment and Docker), and where to start Kaggle with minimal Python.

You’ll hear tactical advice on mentorship — finding and nurturing long‑term mentors — plus networking, team building for competitions and papers, and overcoming impostor syndrome with mock interviews and LeetCode practice. She also shares prioritization strategies (Pareto, outsourcing), mental rehearsal techniques, boundary setting, and self‑care to avoid burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, and connect with the data science community to accelerate a switch into computer vision and deep learning +intro: How do you switch into computer vision and deep learning from a non-industry background — and build a portfolio that lands interviews? In this episode, Tatiana Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master now working as a Senior ML Engineer at Cork University Hospital, maps a practical career-change roadmap. Drawing on her move from a physics PhD during maternity leave, Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), hands-on projects (Kaggle competitions, internships, Omdena-style collaborations, end-to-end pet projects with data collection, labeling, deployment and Docker), and where to start Kaggle with minimal Python.

You’ll hear tactical advice on mentorship — finding and nurturing long-term mentors — plus networking, team building for competitions and papers, and overcoming impostor syndrome with mock interviews and LeetCode practice. She also shares prioritization strategies (Pareto, outsourcing), mental rehearsal techniques, boundary setting, and self-care to avoid burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, and connect with the data science community to accelerate a switch into computer vision and deep learning topics: - career transition - physics diff --git a/_podcast/from-semiconductor-data-to-applied-machine-learning.md b/_podcast/from-semiconductor-data-to-applied-machine-learning.md index b5e2d919..1789b6d6 100644 --- a/_podcast/from-semiconductor-data-to-applied-machine-learning.md +++ b/_podcast/from-semiconductor-data-to-applied-machine-learning.md @@ -1160,10 +1160,10 @@ transcript: sec: 4388 time: '1:13:08' who: Dashel -context: 'A single through-line: the episode is about a hands‑on, end‑to‑end journey - into applied machine learning — a multidisciplinary career pivot powered by self‑education - and cohort/community support that takes messy, high‑frequency industrial data through - pragmatic tool‑building, model development, explainability tradeoffs, and MLOps +context: 'A single through-line: the episode is about a hands-on, end-to-end journey + into applied machine learning — a multidisciplinary career pivot powered by self-education + and cohort/community support that takes messy, high-frequency industrial data through + pragmatic tool-building, model development, explainability tradeoffs, and MLOps (APIs, containers, Terraform, ONNX) into real production impact, with a commitment to teaching and scaling that practice to others.' --- diff --git a/_podcast/from-software-engineering-to-leading-data-science-teams.md b/_podcast/from-software-engineering-to-leading-data-science-teams.md index 20086eaf..2dbafc14 100644 --- a/_podcast/from-software-engineering-to-leading-data-science-teams.md +++ b/_podcast/from-software-engineering-to-leading-data-science-teams.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=xyTfqIWeKf8 description: 'Learn to transition into a Data Science Manager: master search engineering, machine learning and leadership to hire, scale teams and measure business impact.' -intro: How do you move from hands-on software engineering into leading data science teams while staying effective on search and machine learning projects? In this episode Sadat Anwar — a people‑centric Data Science Manager and former software engineer fluent in Java, Scala and Python — maps his path from an electronics and informatics background to research in computer vision at Fraunhofer and production search work at OLX.

We cover practical search engineering topics (Solr autoscaling, decoupling search from a monolith, Kotlin services with Python ML satellites), early ML projects and experimentation strategies (master’s thesis on neural nets, 20% time wins, “act before you think”), and engineering safety nets like feature flags, backups and monitoring. Sadat also walks through the promotion/hiring process, documenting leadership evidence, people management challenges (conflict resolution, hiring, motivation loss when stepping away from code), and transitioning into data science management with NLP, trust & safety and fraud detection responsibilities.

Listen for concrete advice on measuring managerial impact, leveraging EM experience to lead data science teams, and tactical steps for engineers aiming to become data science managers in search and ML domains +intro: How do you move from hands-on software engineering into leading data science teams while staying effective on search and machine learning projects? In this episode Sadat Anwar — a people-centric Data Science Manager and former software engineer fluent in Java, Scala and Python — maps his path from an electronics and informatics background to research in computer vision at Fraunhofer and production search work at OLX.

We cover practical search engineering topics (Solr autoscaling, decoupling search from a monolith, Kotlin services with Python ML satellites), early ML projects and experimentation strategies (master’s thesis on neural nets, 20% time wins, “act before you think”), and engineering safety nets like feature flags, backups and monitoring. Sadat also walks through the promotion/hiring process, documenting leadership evidence, people management challenges (conflict resolution, hiring, motivation loss when stepping away from code), and transitioning into data science management with NLP, trust & safety and fraud detection responsibilities.

Listen for concrete advice on measuring managerial impact, leveraging EM experience to lead data science teams, and tactical steps for engineers aiming to become data science managers in search and ML domains topics: - career transition - software engineering @@ -97,7 +97,7 @@ quotableClips: startOffset: 2026 url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2026 endOffset: 2176 -- name: 'Transition Pain Points: Dopamine Loss, Dropping Hands‑On Coding, and Withdrawal' +- name: 'Transition Pain Points: Dopamine Loss, Dropping Hands-On Coding, and Withdrawal' startOffset: 2176 url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2176 endOffset: 2448 @@ -941,7 +941,7 @@ transcript: sec: 2174 time: '36:14' who: Alexey -- header: 'Transition Pain Points: Dopamine Loss, Dropping Hands‑On Coding, and Withdrawal' +- header: 'Transition Pain Points: Dopamine Loss, Dropping Hands-On Coding, and Withdrawal' - line: When you’re coding, right. You create a merge request – bam, that's dopamine. You get an approval – another shot of dopamine. You hit the merge button – dopamine. Deploy – dopamine. A/B test started – dopamine. There’s dopamine throughout the diff --git a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md index edefe978..c1bad849 100644 --- a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md +++ b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md @@ -42,7 +42,7 @@ quotableClips: startOffset: 13 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=13 endOffset: 64 -- name: 'Career Pivot: Transition from full‑stack engineering to data science' +- name: 'Career Pivot: Transition from full-stack engineering to data science' startOffset: 64 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=64 endOffset: 161 @@ -54,11 +54,11 @@ quotableClips: startOffset: 287 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=287 endOffset: 413 -- name: 'Leadership Learning: Trial‑and‑error development of soft skills' +- name: 'Leadership Learning: Trial-and-error development of soft skills' startOffset: 413 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=413 endOffset: 541 -- name: 'Problem Framing: Technical context and product‑level understanding' +- name: 'Problem Framing: Technical context and product-level understanding' startOffset: 541 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=541 endOffset: 693 @@ -70,7 +70,7 @@ quotableClips: startOffset: 925 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=925 endOffset: 1042 -- name: 'ML Project Complexity: Resource needs and cross‑functional buy‑in' +- name: 'ML Project Complexity: Resource needs and cross-functional buy-in' startOffset: 1042 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1042 endOffset: 1248 @@ -78,11 +78,11 @@ quotableClips: startOffset: 1248 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1248 endOffset: 1398 -- name: 'Demo Design: Visualizations and user‑centric proof‑of‑concepts' +- name: 'Demo Design: Visualizations and user-centric proof-of-concepts' startOffset: 1398 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1398 endOffset: 1575 -- name: 'Risk Communication: Explaining model trade‑offs without raw accuracy' +- name: 'Risk Communication: Explaining model trade-offs without raw accuracy' startOffset: 1575 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1575 endOffset: 1697 @@ -114,7 +114,7 @@ quotableClips: startOffset: 2437 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2437 endOffset: 2675 -- name: 'Full‑Stack ML: Importance of software engineering for production ML' +- name: 'Full-Stack ML: Importance of software engineering for production ML' startOffset: 2675 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2675 endOffset: 2878 @@ -126,7 +126,7 @@ quotableClips: startOffset: 3037 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3037 endOffset: 3182 -- name: Episode Wrap‑Up and Final Remarks +- name: Episode Wrap-Up and Final Remarks startOffset: 3182 url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3182 endOffset: 3203 @@ -152,7 +152,7 @@ transcript: sec: 56 time: 0:56 who: Alexey -- header: 'Career Pivot: Transition from full‑stack engineering to data science' +- header: 'Career Pivot: Transition from full-stack engineering to data science' - line: Before we go into our main topic of these unwritten rules, let's start with your background. Can you tell us about your career journeys so far? sec: 64 @@ -291,7 +291,7 @@ transcript: sec: 377 time: '6:17' who: Jack -- header: 'Leadership Learning: Trial‑and‑error development of soft skills' +- header: 'Leadership Learning: Trial-and-error development of soft skills' - line: There’s no school for VPs of data science, right? [Jack agrees] So you just have to… How do you actually learn these skills? sec: 413 @@ -343,7 +343,7 @@ transcript: sec: 509 time: '8:29' who: Alexey -- header: 'Problem Framing: Technical context and product‑level understanding' +- header: 'Problem Framing: Technical context and product-level understanding' - line: Yeah, I would say it's a mix of trial and error and it's also a mix of observing what works for others. Another thing that is not really taught very often in any kind of school environment is the importance of technical problem framing and @@ -489,7 +489,7 @@ transcript: sec: 1004 time: '16:44' who: Jack -- header: 'ML Project Complexity: Resource needs and cross‑functional buy‑in' +- header: 'ML Project Complexity: Resource needs and cross-functional buy-in' - line: Okay. We'll see how relevant it is – I hope it is relevant to the actual discussion we plan to have today, which is about the rules for success in machine learning. Probably it is related. I just want to summarize. If you want to be a technical @@ -593,7 +593,7 @@ transcript: sec: 1302 time: '21:42' who: Jack -- header: 'Demo Design: Visualizations and user‑centric proof‑of‑concepts' +- header: 'Demo Design: Visualizations and user-centric proof-of-concepts' - line: You said visuals are important to them – what do you mean by that? Is having a demo with a user interface where they can play around important, or did you mean something else? Or did you mean planting a picture in their head or something @@ -655,7 +655,7 @@ transcript: sec: 1526 time: '25:26' who: Jack -- header: 'Risk Communication: Explaining model trade‑offs without raw accuracy' +- header: 'Risk Communication: Explaining model trade-offs without raw accuracy' - line: If you start talking about accuracy – you say, “Okay, this model is 70% accurate,” which may or may not be a good number, depending on the model, but to the stakeholders, it might sound scary like, “Ooh, 30% error rate. 30% of the time, it will make @@ -1064,7 +1064,7 @@ transcript: sec: 2672 time: '44:32' who: Jack -- header: 'Full‑Stack ML: Importance of software engineering for production ML' +- header: 'Full-Stack ML: Importance of software engineering for production ML' - line: Well, I guess we have time for one or two more rules. I guess you have a bunch of them, right? What's the third one? sec: 2675 @@ -1235,7 +1235,7 @@ transcript: sec: 3040 time: '50:40' who: Jack -- header: Episode Wrap‑Up and Final Remarks +- header: Episode Wrap-Up and Final Remarks - line: We will all subscribe – follow you on LinkedIn – and we will see all the updates about your new endeavor. I don't like saying good luck because you probably don't need luck – you need something like perseverance, more – but luck is also important. @@ -1251,15 +1251,15 @@ transcript: time: '53:36' who: Jack context: 'Context — A career arc from software engineer to VP of ML frames concrete - stories about promotion, informal leadership, stakeholder selling, demo-driven buy‑in, - rapid prototyping, baseline-first experiments, domain immersion, and building full‑stack + stories about promotion, informal leadership, stakeholder selling, demo-driven buy-in, + rapid prototyping, baseline-first experiments, domain immersion, and building full-stack production capabilities. Core narrative — Success in applied machine learning is not primarily about squeezing marginal accuracy from models but about bridging technical craft and business impact: - become a product‑focused, full‑stack practitioner and leader who rapidly validates + become a product-focused, full-stack practitioner and leader who rapidly validates hypotheses with simple baselines and demos, speaks the language of stakeholders, - builds trust and reputation, communicates trade‑offs clearly, and embeds ML into + builds trust and reputation, communicates trade-offs clearly, and embeds ML into real user workflows so technical work directly drives measurable outcomes.' --- Links: diff --git a/_podcast/get-data-scientist-job.md b/_podcast/get-data-scientist-job.md index 362053ce..83343494 100644 --- a/_podcast/get-data-scientist-job.md +++ b/_podcast/get-data-scientist-job.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/standing-out-as-a-data-scientist-luke-whipps/id1541710331?i=1000502844994 description: Master data scientist resumes, portfolios & interviews—insider recruiter workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster -intro: How do you actually land a data scientist role — from a resume that passes screening to a portfolio that wins interviews and an offer that closes? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast with 8+ years recruiting experience, walks through the recruiter workflow and practical steps data scientists can use to improve hiring outcomes.

We cover Luke’s six‑stage recruitment process (role definition to close), how to define data scientist roles across companies, and recruiter expectations for CV design, information hierarchy, and industry/use‑case alignment. Learn how to structure portfolios to link tech stack to concrete projects, craft a clear career narrative that demonstrates business impact, and prepare for interviews and negotiations. Junior candidates will get guidance on choosing an industry and showing purpose; academics learn how to productize research for industry. You’ll also hear tactical advice on tailored applications, LinkedIn outreach, candidate funnel sizes, salary signals, job‑title alignment, and acceptable tenure patterns.

Listen to gain actionable tips for resumes, portfolios, interviews, and working effectively with recruiters to increase your chances of landing a data scientist role +intro: How do you actually land a data scientist role — from a resume that passes screening to a portfolio that wins interviews and an offer that closes? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast with 8+ years recruiting experience, walks through the recruiter workflow and practical steps data scientists can use to improve hiring outcomes.

We cover Luke’s six-stage recruitment process (role definition to close), how to define data scientist roles across companies, and recruiter expectations for CV design, information hierarchy, and industry/use-case alignment. Learn how to structure portfolios to link tech stack to concrete projects, craft a clear career narrative that demonstrates business impact, and prepare for interviews and negotiations. Junior candidates will get guidance on choosing an industry and showing purpose; academics learn how to productize research for industry. You’ll also hear tactical advice on tailored applications, LinkedIn outreach, candidate funnel sizes, salary signals, job-title alignment, and acceptable tenure patterns.

Listen to gain actionable tips for resumes, portfolios, interviews, and working effectively with recruiters to increase your chances of landing a data scientist role topics: - data science - career growth @@ -38,11 +38,11 @@ quotableClips: startOffset: 177 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=177 endOffset: 223 -- name: 'Neural AI origin: founding principles and non‑transactional recruiting' +- name: 'Neural AI origin: founding principles and non-transactional recruiting' startOffset: 223 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=223 endOffset: 316 -- name: 'Community focus: podcasts, events and value‑driven talent work' +- name: 'Community focus: podcasts, events and value-driven talent work' startOffset: 316 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=316 endOffset: 422 @@ -50,7 +50,7 @@ quotableClips: startOffset: 422 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=422 endOffset: 455 -- name: 'Recruitment workflow: six‑stage process from definition to close' +- name: 'Recruitment workflow: six-stage process from definition to close' startOffset: 455 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=455 endOffset: 495 @@ -70,7 +70,7 @@ quotableClips: startOffset: 847 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=847 endOffset: 975 -- name: Industry and use‑case alignment on resumes for better matches +- name: Industry and use-case alignment on resumes for better matches startOffset: 975 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=975 endOffset: 1190 @@ -90,7 +90,7 @@ quotableClips: startOffset: 1639 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1639 endOffset: 1810 -- name: 'Job‑hopping: red flags, ideal tenure and acceptable exceptions' +- name: 'Job-hopping: red flags, ideal tenure and acceptable exceptions' startOffset: 1810 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1810 endOffset: 1942 @@ -122,7 +122,7 @@ quotableClips: startOffset: 3142 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3142 endOffset: 3407 -- name: 'CV formats & length: country differences and the two‑page guideline' +- name: 'CV formats & length: country differences and the two-page guideline' startOffset: 3407 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3407 endOffset: 3531 @@ -138,7 +138,7 @@ quotableClips: startOffset: 3727 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3727 endOffset: 4057 -- name: 'Episode summary: purpose‑driven candidates and standing out as a data scientist' +- name: 'Episode summary: purpose-driven candidates and standing out as a data scientist' startOffset: 4057 url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=4057 endOffset: 4127 @@ -182,7 +182,7 @@ transcript: sec: 177 time: '2:57' who: Luke -- header: 'Neural AI origin: founding principles and non‑transactional recruiting' +- header: 'Neural AI origin: founding principles and non-transactional recruiting' - line: Neural came around from a couple of different perspectives. One of those perspectives was… I was just about to turn 30. I had two choices that I could have made. I could either continue working for businesses that I have to become a part of and @@ -206,7 +206,7 @@ transcript: sec: 256 time: '4:16' who: Luke -- header: 'Community focus: podcasts, events and value‑driven talent work' +- header: 'Community focus: podcasts, events and value-driven talent work' - line: For me, as I say Neural was built on the back of wanting to create something different that is value and community driven, rather than just, “Hey, do you want to work on a new business?” So yeah, we started in early 2020. So probably not @@ -249,7 +249,7 @@ transcript: sec: 422 time: '7:02' who: Luke -- header: 'Recruitment workflow: six‑stage process from definition to close' +- header: 'Recruitment workflow: six-stage process from definition to close' - line: One of the big problems – or the big challenges of the work that we do is – that every single company is different. That in itself is a challenge, because you need to first understand the problems and the different challenges that those @@ -396,7 +396,7 @@ transcript: sec: 940 time: '15:40' who: Luke -- header: Industry and use‑case alignment on resumes for better matches +- header: Industry and use-case alignment on resumes for better matches - line: Number one is, I’ll instantly look for the crossover between the business and the role that they're applying for with the company that they're currently working in. The reason I say that is not because it's a name game, or… It's not @@ -616,7 +616,7 @@ transcript: sec: 1803 time: '30:03' who: Luke -- header: 'Job‑hopping: red flags, ideal tenure and acceptable exceptions' +- header: 'Job-hopping: red flags, ideal tenure and acceptable exceptions' - line: We have a question. How often is too often when changing jobs? What is currently the average time that the person stays in a job? What would be a red flag for you? @@ -999,7 +999,7 @@ transcript: sec: 3371 time: '56:11' who: Alexey -- header: 'CV formats & length: country differences and the two‑page guideline' +- header: 'CV formats & length: country differences and the two-page guideline' - line: I think the ideal length is two pages. I don't think you can get enough content on one page to make it sing. Three pages is borderline too much. But two pages is ideal. That's the balance between having enough deep content that will give @@ -1146,7 +1146,7 @@ transcript: sec: 4037 time: '1:07:17' who: Alexey -- header: 'Episode summary: purpose‑driven candidates and standing out as a data scientist' +- header: 'Episode summary: purpose-driven candidates and standing out as a data scientist' - line: I thought about this for a while. One thing that really makes candidates stand out to me — it's understanding their purpose, and doubling down on that. Once you do that, everything else becomes easy. If you don't have that, and you're diff --git a/_podcast/get-junior-data-job-and-transferable-skills.md b/_podcast/get-junior-data-job-and-transferable-skills.md index 7fcc6390..5ed42e14 100644 --- a/_podcast/get-junior-data-job-and-transferable-skills.md +++ b/_podcast/get-junior-data-job-and-transferable-skills.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=_U8GrYJvmJM description: 'Master landing junior data jobs: craft achievement-based CVs, highlight transferable skills, ace interviews and beat imposter syndrome with coach tips.' -intro: Struggling to land a junior data job—how do you turn non‑linear experience into a recruiter‑ready CV, prepare for interviews, and push past imposter syndrome? In this episode, Lindsay McQuade, a transformational coach with 20+ years across management consulting, higher education and tech and former Senior Career & Development Coach at SPICED Academy, guides listeners through practical steps for junior data roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED training rated 94% “very good/excellent”) to explain CV writing for data roles, achievement‑based resumes, interview prep and negotiation. Topics include reframing past experience into evidence, identifying transferable skills for data analyst/scientist/engineer roles, tailoring applications by industry, the ikigai framework for career focus, and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, objective feedback strategies, and structured learning and T‑shaped skills to build confidence. Tune in for clear job search strategy (balanced volume and targeted applications), how to choose a career coach, and practical LinkedIn networking tips to convert applications into interviews +intro: Struggling to land a junior data job—how do you turn non-linear experience into a recruiter-ready CV, prepare for interviews, and push past imposter syndrome? In this episode, Lindsay McQuade, a transformational coach with 20+ years across management consulting, higher education and tech and former Senior Career & Development Coach at SPICED Academy, guides listeners through practical steps for junior data roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED training rated 94% “very good/excellent”) to explain CV writing for data roles, achievement-based resumes, interview prep and negotiation. Topics include reframing past experience into evidence, identifying transferable skills for data analyst/scientist/engineer roles, tailoring applications by industry, the ikigai framework for career focus, and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, objective feedback strategies, and structured learning and T-shaped skills to build confidence. Tune in for clear job search strategy (balanced volume and targeted applications), how to choose a career coach, and practical LinkedIn networking tips to convert applications into interviews topics: - career growth dateadded: 2022-02-12 @@ -28,7 +28,7 @@ quotableClips: startOffset: 68 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=68 endOffset: 302 -- name: Spiced Academy Programs Overview (Full‑Stack & Data Science) +- name: Spiced Academy Programs Overview (Full-Stack & Data Science) startOffset: 302 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=302 endOffset: 360 @@ -40,7 +40,7 @@ quotableClips: startOffset: 560 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=560 endOffset: 711 -- name: Reframing Past Experience into Recruiter‑Friendly Evidence +- name: Reframing Past Experience into Recruiter-Friendly Evidence startOffset: 711 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=711 endOffset: 782 @@ -48,7 +48,7 @@ quotableClips: startOffset: 782 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=782 endOffset: 906 -- name: Achievement‑Based CV Writing vs Responsibility Lists +- name: Achievement-Based CV Writing vs Responsibility Lists startOffset: 906 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=906 endOffset: 1014 @@ -88,7 +88,7 @@ quotableClips: startOffset: 2091 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2091 endOffset: 2225 -- name: Objective Feedback vs Distorted Self‑Perception +- name: Objective Feedback vs Distorted Self-Perception startOffset: 2225 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2225 endOffset: 2517 @@ -100,7 +100,7 @@ quotableClips: startOffset: 2695 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2695 endOffset: 2828 -- name: 'Building Confidence: Structured Learning Paths & T‑Shaped Skills' +- name: 'Building Confidence: Structured Learning Paths & T-Shaped Skills' startOffset: 2828 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2828 endOffset: 3070 @@ -120,7 +120,7 @@ quotableClips: startOffset: 3510 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3510 endOffset: 3564 -- name: Episode Wrap‑Up & Final Career Coaching Takeaways +- name: Episode Wrap-Up & Final Career Coaching Takeaways startOffset: 3564 url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3564 endOffset: 3519 @@ -197,7 +197,7 @@ transcript: sec: 106 time: '1:46' who: Lindsay -- header: Spiced Academy Programs Overview (Full‑Stack & Data Science) +- header: Spiced Academy Programs Overview (Full-Stack & Data Science) - line: Interesting. Can you tell us a few words about the school – Spiced Academy? What do you do there? sec: 302 @@ -324,7 +324,7 @@ transcript: sec: 648 time: '10:48' who: Alexey -- header: Reframing Past Experience into Recruiter‑Friendly Evidence +- header: Reframing Past Experience into Recruiter-Friendly Evidence - line: Well, I think the first thing is to have a very honest and open conversation about what they think they failed at. So you give them the space to let all of this come out and then encourage them to think about what their successes were. @@ -382,7 +382,7 @@ transcript: sec: 883 time: '14:43' who: Alexey -- header: Achievement‑Based CV Writing vs Responsibility Lists +- header: Achievement-Based CV Writing vs Responsibility Lists - line: Yeah. I'm sure there will have been some very complex analytical things that they've had to work out. But we don't need to know the details of this and they shouldn’t use law terminology that we don't understand – we just want to extract @@ -784,7 +784,7 @@ transcript: sec: 2221 time: '37:01' who: Alexey -- header: Objective Feedback vs Distorted Self‑Perception +- header: Objective Feedback vs Distorted Self-Perception - line: So imagine this person's got someone to do this offset coding challenge. How far are they gonna get with this? Right? I would imagine there's a good chance they're gonna get found out at the technical interview stage. But let's imagine @@ -959,7 +959,7 @@ transcript: sec: 2781 time: '46:21' who: Alexey -- header: 'Building Confidence: Structured Learning Paths & T‑Shaped Skills' +- header: 'Building Confidence: Structured Learning Paths & T-Shaped Skills' - line: Yes, you're right. Yeah, what do you do? I would say this – when we're changing careers, this is a particularly fragile time for the imposter syndrome. As you said, in the workplace it can be even more challenging. Part of the reason for @@ -1198,7 +1198,7 @@ transcript: sec: 3524 time: '58:44' who: Lindsay -- header: Episode Wrap‑Up & Final Career Coaching Takeaways +- header: Episode Wrap-Up & Final Career Coaching Takeaways - line: Okay, yeah. Thanks a lot. Thanks for your time. We should be wrapping up. Thanks a lot for sharing all this advice with us. And thanks, everyone, especially Michael, who needed to wake up at 6am to watch this. [laughs] I hope it was worth diff --git a/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md index 89cc0b80..be964fcd 100644 --- a/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md +++ b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md @@ -106,7 +106,7 @@ quotableClips: startOffset: 3163 url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3163 endOffset: 3251 -- name: 'Support mechanisms: regular check-ins, rubber‑duck channels, async help' +- name: 'Support mechanisms: regular check-ins, rubber-duck channels, async help' startOffset: 3251 url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3251 endOffset: 3380 @@ -1153,7 +1153,7 @@ transcript: sec: 3189 time: '53:09' who: Katie -- header: 'Support mechanisms: regular check-ins, rubber‑duck channels, async help' +- header: 'Support mechanisms: regular check-ins, rubber-duck channels, async help' - line: One of the concerns I heard from juniors is that they are afraid of interrupting other people (seniors). The seniors are busy, “This is a very senior person, they have a lot of stuff to work on.” They don't feel that they should be interrupting diff --git a/_podcast/how-to-break-into-data-science.md b/_podcast/how-to-break-into-data-science.md index de46f0b2..2344ce50 100644 --- a/_podcast/how-to-break-into-data-science.md +++ b/_podcast/how-to-break-into-data-science.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=oUycqtMoYr8 description: 'Master data science job hunt and portfolio tactics: actionable projects, recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster.' -intro: How do you actually break into data science, build a portfolio that gets interviews, and stay sane while every new AI model vies for your attention? In this episode Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI (founder of “So you want to be a data scientist?”) — walks through a practical career playbook for job hunting, portfolio building, and coping with FOMO and imposter syndrome.

We cover Mısra’s career path from big data engineering to developer advocacy, what a data scientist’s day‑to‑day looks like, and the typical deliverables hiring managers expect (models, pipelines, reports, presentations). She explains role variants (consultant, in‑house, freelance), tradeoffs between generalist and specialist tracks, and when a master’s or PhD matters. You’ll get concrete job‑hunt tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why real‑world datasets (like NYC Open Data) matter. The episode also includes a clear, high‑level overview of DALL·E 2 and diffusion models, plus strategies for staying current (conferences vs social media) and knowing when a new framework is “good enough.”

Listen to learn actionable steps to refine your portfolio, present data science value to stakeholders, and manage FOMO while advancing your career +intro: How do you actually break into data science, build a portfolio that gets interviews, and stay sane while every new AI model vies for your attention? In this episode Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI (founder of “So you want to be a data scientist?”) — walks through a practical career playbook for job hunting, portfolio building, and coping with FOMO and imposter syndrome.

We cover Mısra’s career path from big data engineering to developer advocacy, what a data scientist’s day-to-day looks like, and the typical deliverables hiring managers expect (models, pipelines, reports, presentations). She explains role variants (consultant, in-house, freelance), tradeoffs between generalist and specialist tracks, and when a master’s or PhD matters. You’ll get concrete job-hunt tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why real-world datasets (like NYC Open Data) matter. The episode also includes a clear, high-level overview of DALL·E 2 and diffusion models, plus strategies for staying current (conferences vs social media) and knowing when a new framework is “good enough.”

Listen to learn actionable steps to refine your portfolio, present data science value to stakeholders, and manage FOMO while advancing your career topics: - data science - career growth @@ -38,7 +38,7 @@ quotableClips: startOffset: 251 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=251 endOffset: 389 -- name: 'Data Scientist Day‑to‑Day: Explaining the Role to Non‑Tech Audiences' +- name: 'Data Scientist Day-to-Day: Explaining the Role to Non-Tech Audiences' startOffset: 389 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=389 endOffset: 541 @@ -46,7 +46,7 @@ quotableClips: startOffset: 541 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=541 endOffset: 658 -- name: 'Role Variants: Consultant, In‑House, and Freelance Responsibilities' +- name: 'Role Variants: Consultant, In-House, and Freelance Responsibilities' startOffset: 658 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=658 endOffset: 849 @@ -58,11 +58,11 @@ quotableClips: startOffset: 943 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=943 endOffset: 1221 -- name: 'DALL·E 2 Overview: Text‑to‑Image Capabilities' +- name: 'DALL·E 2 Overview: Text-to-Image Capabilities' startOffset: 1221 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1221 endOffset: 1301 -- name: 'Diffusion Models: High‑Level Explanation' +- name: 'Diffusion Models: High-Level Explanation' startOffset: 1301 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1301 endOffset: 1659 @@ -82,7 +82,7 @@ quotableClips: startOffset: 2412 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2412 endOffset: 2567 -- name: 'Preferred Setup: Advantages of In‑House Data Science Roles' +- name: 'Preferred Setup: Advantages of In-House Data Science Roles' startOffset: 2567 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2567 endOffset: 2853 @@ -90,7 +90,7 @@ quotableClips: startOffset: 2853 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2853 endOffset: 3032 -- name: 'Breaking In: Job‑Hunting Strategies for Entry‑Level Data Scientists' +- name: 'Breaking In: Job-Hunting Strategies for Entry-Level Data Scientists' startOffset: 3032 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3032 endOffset: 3271 @@ -102,7 +102,7 @@ quotableClips: startOffset: 3429 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3429 endOffset: 3494 -- name: 'Real‑World Datasets: Using NYC Open Data and Dirty Data Examples' +- name: 'Real-World Datasets: Using NYC Open Data and Dirty Data Examples' startOffset: 3494 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3494 endOffset: 3702 @@ -237,7 +237,7 @@ transcript: sec: 375 time: '6:15' who: Misra -- header: 'Data Scientist Day‑to‑Day: Explaining the Role to Non‑Tech Audiences' +- header: 'Data Scientist Day-to-Day: Explaining the Role to Non-Tech Audiences' - line: Well, coming back to our main topic – data scientists at work. Maybe there are some parallels to data developer advocates, but you were a data scientist quite recently. Coming back to this topic, imagine you're a data scientist, not @@ -330,7 +330,7 @@ transcript: sec: 541 time: '9:01' who: Misra -- header: 'Role Variants: Consultant, In‑House, and Freelance Responsibilities' +- header: 'Role Variants: Consultant, In-House, and Freelance Responsibilities' - line: Was this something that you did at IBM, or is this something that maybe consultants tend to do more often? Like creating presentations and reports? sec: 658 @@ -515,7 +515,7 @@ transcript: sec: 1139 time: '18:59' who: Misra -- header: 'DALL·E 2 Overview: Text‑to‑Image Capabilities' +- header: 'DALL·E 2 Overview: Text-to-Image Capabilities' - line: How does DALL·E work? Maybe you can tell us in a few sentences for those who have the fear of missing out. Because I do. I see these awesome pictures. I think the way it works, as a black box, you give it some prompt like a piece of text, @@ -541,7 +541,7 @@ transcript: sec: 1241 time: '20:41' who: Misra -- header: 'Diffusion Models: High‑Level Explanation' +- header: 'Diffusion Models: High-Level Explanation' - line: Yeah. Well, like it cannot imagine [audio cuts out] how it works. Like all these formulas that are there – they're just scary. I cannot imagine what it looks like for things like DALL·E like must be insane. @@ -941,7 +941,7 @@ transcript: sec: 2563 time: '42:43' who: Misra -- header: 'Preferred Setup: Advantages of In‑House Data Science Roles' +- header: 'Preferred Setup: Advantages of In-House Data Science Roles' - line: Okay, I see that we have quite a lot of questions. I also prepared questions for you, but I think it's better to go through the questions. The first question is about the types of data scientists and this is something we talked about at @@ -1110,7 +1110,7 @@ transcript: sec: 3029 time: '50:29' who: Misra -- header: 'Breaking In: Job‑Hunting Strategies for Entry‑Level Data Scientists' +- header: 'Breaking In: Job-Hunting Strategies for Entry-Level Data Scientists' - line: Okay. For newer data scientists – from courses like yours or boot camps – how would you suggest they break into the oversaturated market for entry-level data scientists? @@ -1255,7 +1255,7 @@ transcript: sec: 3429 time: '57:09' who: Alexey -- header: 'Real‑World Datasets: Using NYC Open Data and Dirty Data Examples' +- header: 'Real-World Datasets: Using NYC Open Data and Dirty Data Examples' - line: Yeah, I think that's a good point. Obviously, people are doing the same projects. But I think with these projects, what you're trying to show is not how great of a model that you're building. The model you build might suck and that's fine, diff --git a/_podcast/how-to-grow-your-ml-engineering-career.md b/_podcast/how-to-grow-your-ml-engineering-career.md index f8e00b4a..064728de 100644 --- a/_podcast/how-to-grow-your-ml-engineering-career.md +++ b/_podcast/how-to-grow-your-ml-engineering-career.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=cUxZBXQgZaU description: Discover career transitions into ML, prompt engineering and LLMs—practical debugging tips, transferable skills, hiring insights, and real platform lessons -intro: How do you move from web and game development into building machine learning platforms and working with LLMs—and what practical skills carry over? In this episode Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, gaming and online retail, and currently an ML Platform engineer and internal consultant at Zalando, answers that question through concrete examples and career lessons.

We trace Krzysztof’s path from HTML5, Objective‑C, Swift and Unity to Python, ML platform work (the zflow library and pipeline architecture), and hands‑on experiments with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions between stacks and roles, platform consulting—training, onboarding and user support—prompt engineering tips, debugging strategies (rubber ducking, divide‑and‑conquer), and a real Postgres optimization troubleshooting case. He also discusses transferable skills like SQL, Git and shell, T‑shaped expertise, hiring dynamics, and how to get unstuck with ChatGPT and problem decomposition.

Listen to gain practical guidance on ML platforms, prompt engineering, debugging techniques, and career strategy for transitioning into ML and LLM work—plus actionable resources and prioritization tactics you can apply immediately +intro: How do you move from web and game development into building machine learning platforms and working with LLMs—and what practical skills carry over? In this episode Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, gaming and online retail, and currently an ML Platform engineer and internal consultant at Zalando, answers that question through concrete examples and career lessons.

We trace Krzysztof’s path from HTML5, Objective-C, Swift and Unity to Python, ML platform work (the zflow library and pipeline architecture), and hands-on experiments with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions between stacks and roles, platform consulting—training, onboarding and user support—prompt engineering tips, debugging strategies (rubber ducking, divide-and-conquer), and a real Postgres optimization troubleshooting case. He also discusses transferable skills like SQL, Git and shell, T-shaped expertise, hiring dynamics, and how to get unstuck with ChatGPT and problem decomposition.

Listen to gain practical guidance on ML platforms, prompt engineering, debugging techniques, and career strategy for transitioning into ML and LLM work—plus actionable resources and prioritization tactics you can apply immediately topics: - machine learning - career transitions @@ -36,7 +36,7 @@ quotableClips: startOffset: 132 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=132 endOffset: 384 -- name: 'Mobile & Game Development: HTML5, Objective‑C, Swift, and Unity' +- name: 'Mobile & Game Development: HTML5, Objective-C, Swift, and Unity' startOffset: 384 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=384 endOffset: 425 @@ -56,7 +56,7 @@ quotableClips: startOffset: 959 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=959 endOffset: 1068 -- name: 'From Engineer to Consultant: Reduced Hands‑on Coding' +- name: 'From Engineer to Consultant: Reduced Hands-on Coding' startOffset: 1068 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1068 endOffset: 1106 @@ -84,11 +84,11 @@ quotableClips: startOffset: 2014 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2014 endOffset: 2123 -- name: 'T‑Shaped Expertise: Depth, Breadth, and Career Strategy' +- name: 'T-Shaped Expertise: Depth, Breadth, and Career Strategy' startOffset: 2123 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2123 endOffset: 2257 -- name: 'Debugging as a Strength: Rubber Duck, Divide‑and‑Conquer, and Mentoring' +- name: 'Debugging as a Strength: Rubber Duck, Divide-and-Conquer, and Mentoring' startOffset: 2257 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2257 endOffset: 2692 @@ -108,7 +108,7 @@ quotableClips: startOffset: 3263 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3263 endOffset: 3382 -- name: 'Prioritization Techniques: To‑Do Lists, Deadlines, and Focus' +- name: 'Prioritization Techniques: To-Do Lists, Deadlines, and Focus' startOffset: 3382 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3382 endOffset: 3491 @@ -116,7 +116,7 @@ quotableClips: startOffset: 3491 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3491 endOffset: 3632 -- name: Episode Wrap‑up and Key Takeaways +- name: Episode Wrap-up and Key Takeaways startOffset: 3632 url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3632 endOffset: 3582 @@ -272,7 +272,7 @@ transcript: sec: 374 time: '6:14' who: Alexey -- header: 'Mobile & Game Development: HTML5, Objective‑C, Swift, and Unity' +- header: 'Mobile & Game Development: HTML5, Objective-C, Swift, and Unity' - line: I didn't use anything myself. When I was there, it was a very exciting time because Wooga was trying different technology. We started with HTML5, which was a big thing at the time, around 2012. We built and shipped again, but we decided @@ -500,7 +500,7 @@ transcript: sec: 1057 time: '17:37' who: Alexey -- header: 'From Engineer to Consultant: Reduced Hands‑on Coding' +- header: 'From Engineer to Consultant: Reduced Hands-on Coding' - line: Not so much, I must say. I think this is a bit of a disadvantage that I don't code, especially when it comes to more complex problems – I can't solve them anymore as a consultant, compared to my previous role as a software engineer. I still @@ -854,7 +854,7 @@ transcript: sec: 2114 time: '35:14' who: Alexey -- header: 'T‑Shaped Expertise: Depth, Breadth, and Career Strategy' +- header: 'T-Shaped Expertise: Depth, Breadth, and Career Strategy' - line: I was also thinking about how it affects recruiting and finding a job. Very often I saw that companies look for experts. Sometimes it could be a bit scary, like you see a job ad and you see, “We want five years of experience with large @@ -895,7 +895,7 @@ transcript: sec: 2252 time: '37:32' who: Alexey -- header: 'Debugging as a Strength: Rubber Duck, Divide‑and‑Conquer, and Mentoring' +- header: 'Debugging as a Strength: Rubber Duck, Divide-and-Conquer, and Mentoring' - line: I’m not sure which letter it would be, Maybe an M. For example, I got pretty good at using Git. Very often, I help my colleagues to resolve problems with history, with rebasing, and things like that. I really also like Shell scripting and learning @@ -1227,7 +1227,7 @@ transcript: sec: 3375 time: '56:15' who: Alexey -- header: 'Prioritization Techniques: To‑Do Lists, Deadlines, and Focus' +- header: 'Prioritization Techniques: To-Do Lists, Deadlines, and Focus' - line: What I do is – I'm a huge fan of to-do lists. If you just write down everything that is on your mind – just try to capture it. And then you can prioritize. Then you can also just drop some things off the list. @@ -1314,7 +1314,7 @@ transcript: sec: 3624 time: '1:00:24' who: Alexey -- header: Episode Wrap‑up and Key Takeaways +- header: Episode Wrap-up and Key Takeaways - line: Sure. sec: 3691 time: '1:01:31' diff --git a/_podcast/how-to-switch-to-ml-tech-without-experience.md b/_podcast/how-to-switch-to-ml-tech-without-experience.md index 109cee76..811cbe35 100644 --- a/_podcast/how-to-switch-to-ml-tech-without-experience.md +++ b/_podcast/how-to-switch-to-ml-tech-without-experience.md @@ -75,7 +75,7 @@ quotableClips: startOffset: 1661 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1661 endOffset: 1932 -- name: 'System Skills: Terminal, Dual‑Boot Linux & Hands‑on Troubleshooting' +- name: 'System Skills: Terminal, Dual-Boot Linux & Hands-on Troubleshooting' startOffset: 1932 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1932 endOffset: 2054 @@ -83,11 +83,11 @@ quotableClips: startOffset: 2054 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2054 endOffset: 2172 -- name: 'Open Source Hack Evenings: Mentorship with scikit‑learn & Gene.ai' +- name: 'Open Source Hack Evenings: Mentorship with scikit-learn & Gene.ai' startOffset: 2172 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2172 endOffset: 2283 -- name: 'Hybrid Events & Outreach: Remote Reach vs. In‑Person Help' +- name: 'Hybrid Events & Outreach: Remote Reach vs. In-Person Help' startOffset: 2283 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2283 endOffset: 2451 @@ -107,7 +107,7 @@ quotableClips: startOffset: 3032 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3032 endOffset: 3336 -- name: 'Ecosia Overview: Green Search Engine, Tree‑Planting Mission & Backend (Go)' +- name: 'Ecosia Overview: Green Search Engine, Tree-Planting Mission & Backend (Go)' startOffset: 3336 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3336 endOffset: 3490 @@ -119,7 +119,7 @@ quotableClips: startOffset: 3593 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3593 endOffset: 3628 -- name: Episode Wrap‑Up and Closing Remarks +- name: Episode Wrap-Up and Closing Remarks startOffset: 3628 url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3628 endOffset: 3572 @@ -689,7 +689,7 @@ transcript: sec: 1929 time: '32:09' who: Alexey -- header: 'System Skills: Terminal, Dual‑Boot Linux & Hands‑on Troubleshooting' +- header: 'System Skills: Terminal, Dual-Boot Linux & Hands-on Troubleshooting' - line: Yeah, I did some courses on this, for sure. I think the only thing with doing it in a browser-based tool is you're not actually on your machine. I remember during the Rails Girls Summer of Code, I wanted to dual-loop my machine into Linux, @@ -769,7 +769,7 @@ transcript: sec: 2169 time: '36:09' who: Alexey -- header: 'Open Source Hack Evenings: Mentorship with scikit‑learn & Gene.ai' +- header: 'Open Source Hack Evenings: Mentorship with scikit-learn & Gene.ai' - line: Yeah [chuckles]. Well, actually, we ran one in January and I think we had at least three folks from that event then go on to speak at PyCon.de, which is really awesome. Right now, we're trying to experiment a lot more with being much @@ -802,7 +802,7 @@ transcript: sec: 2267 time: '37:47' who: Alexey -- header: 'Hybrid Events & Outreach: Remote Reach vs. In‑Person Help' +- header: 'Hybrid Events & Outreach: Remote Reach vs. In-Person Help' - line: I mean, yeah, that's true. [cross-talk] It's interesting. I think there's different overhead with online. But what we would love to do is hybrid. We found being remote allowed us to reach more people, because we could also do collaborations @@ -1126,7 +1126,7 @@ transcript: sec: 3329 time: '55:29' who: Jessica -- header: 'Ecosia Overview: Green Search Engine, Tree‑Planting Mission & Backend (Go)' +- header: 'Ecosia Overview: Green Search Engine, Tree-Planting Mission & Backend (Go)' - line: I see that we don't have a lot of time left and I really wanted to talk to you about the company where you work – Ecosia. I hope I'm pronouncing it correctly. I know that you're doing some amazing stuff there. So can you tell us more about @@ -1209,7 +1209,7 @@ transcript: sec: 3606 time: '1:00:06' who: Jessica -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: Okay, thanks. Thanks a lot for joining us today, for sharing your story, for telling us about how you did that. And thanks, everyone, for joining us today as well, for asking questions, for being here today. I think that's all for today. diff --git a/_podcast/human-centered-ai-automatic-speech-recognition.md b/_podcast/human-centered-ai-automatic-speech-recognition.md index ba956c31..01b9671b 100644 --- a/_podcast/human-centered-ai-automatic-speech-recognition.md +++ b/_podcast/human-centered-ai-automatic-speech-recognition.md @@ -19,16 +19,16 @@ description: Discover ASR solutions for disordered speech and accents—boost re intro: How can automatic speech recognition (ASR) better serve people with disordered speech and diverse accents? In this episode Katarzyna Foremniak, a computational linguist with over 10 years in NLP who has built language models for Audi and Porsche - and teaches at the University of Warsaw, examines human‑centered ASR for atypical + and teaches at the University of Warsaw, examines human-centered ASR for atypical and accented speech. We trace her move from linguistics to computational approaches - and cover core phonetics and morpho‑syntax concepts that matter for speech recognition. + and cover core phonetics and morpho-syntax concepts that matter for speech recognition.

Key topics include distinctions between accents and speech disorders, limitations of standard ASR datasets, strategies for disordered speech recognition such as specialized - datasets, data augmentation and synthetic variations, multimodal ASR with lip‑reading, - and transfer learning for fine‑tuning with limited data. We also discuss data collection + datasets, data augmentation and synthetic variations, multimodal ASR with lip-reading, + and transfer learning for fine-tuning with limited data. We also discuss data collection challenges (GDPR, clinical data), bilingualism effects, stammering and fluency, pronunciation issues like Polish consonant clusters, and practical workflows including - Amazon Transcribe plus LLM post‑processing. Deployment tradeoffs—model size, on‑device + Amazon Transcribe plus LLM post-processing. Deployment tradeoffs—model size, on-device setups, automotive voice use cases—and assistive applications round out the conversation.

Listeners interested in speech recognition, disordered speech, accents, and ethical data practices will gain practical technical strategies and a clearer @@ -36,7 +36,7 @@ intro: How can automatic speech recognition (ASR) better serve people with disor dateadded: 2024-10-10 duration: PT00H57M19S quotableClips: -- name: 'Episode Introduction: Human‑Centered AI for Disordered Speech' +- name: 'Episode Introduction: Human-Centered AI for Disordered Speech' startOffset: 0 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=0 endOffset: 486 @@ -48,11 +48,11 @@ quotableClips: startOffset: 546 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=546 endOffset: 802 -- name: 'Linguistics Meets Computer Science: Data‑driven Approaches' +- name: 'Linguistics Meets Computer Science: Data-driven Approaches' startOffset: 802 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=802 endOffset: 925 -- name: 'Phonetics & Morpho‑syntax Explained: Core Concepts for ASR' +- name: 'Phonetics & Morpho-syntax Explained: Core Concepts for ASR' startOffset: 925 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=925 endOffset: 1233 @@ -84,11 +84,11 @@ quotableClips: startOffset: 2227 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2227 endOffset: 2253 -- name: 'Multimodal ASR: Integrating Lip‑reading and Visual Cues' +- name: 'Multimodal ASR: Integrating Lip-reading and Visual Cues' startOffset: 2253 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2253 endOffset: 2417 -- name: 'Transfer Learning for ASR: Fine‑tuning with Limited Data' +- name: 'Transfer Learning for ASR: Fine-tuning with Limited Data' startOffset: 2417 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2417 endOffset: 2470 @@ -108,7 +108,7 @@ quotableClips: startOffset: 2716 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2716 endOffset: 2777 -- name: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post‑processing' +- name: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post-processing' startOffset: 2777 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2777 endOffset: 2848 @@ -120,7 +120,7 @@ quotableClips: startOffset: 3087 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3087 endOffset: 3245 -- name: 'Personalized ASR: User Adaptation, Fine‑tuning, and On‑device Setup' +- name: 'Personalized ASR: User Adaptation, Fine-tuning, and On-device Setup' startOffset: 3245 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3245 endOffset: 3480 @@ -132,7 +132,7 @@ quotableClips: startOffset: 3602 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3602 endOffset: 3713 -- name: 'In‑Car Voice Recognition: Automotive Use Cases and Limitations' +- name: 'In-Car Voice Recognition: Automotive Use Cases and Limitations' startOffset: 3713 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3713 endOffset: 3807 @@ -140,16 +140,16 @@ quotableClips: startOffset: 3807 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3807 endOffset: 3853 -- name: 'Closing Reflections: Human‑Centered AI Priorities & Further Reading' +- name: 'Closing Reflections: Human-Centered AI Priorities & Further Reading' startOffset: 3853 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3853 endOffset: 3925 -- name: Episode Sign‑off and Guest Thanks +- name: Episode Sign-off and Guest Thanks startOffset: 3925 url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3925 endOffset: 3439 transcript: -- header: 'Episode Introduction: Human‑Centered AI for Disordered Speech' +- header: 'Episode Introduction: Human-Centered AI for Disordered Speech' - header: Guest Introduction & Career Highlights (Katarzyna Foremniak) - line: This week, we'll talk about human-centered AI for disordered speech recognition. We have a special guest today—Katarzyna Foremniak is a computational linguist @@ -243,7 +243,7 @@ transcript: sec: 745 time: '12:25' who: Katarzyna -- header: 'Linguistics Meets Computer Science: Data‑driven Approaches' +- header: 'Linguistics Meets Computer Science: Data-driven Approaches' - line: You’ve touched on how important the use of data is. Is it safe to say that computational linguistics merges linguistics and computer science? sec: 802 @@ -280,7 +280,7 @@ transcript: sec: 904 time: '15:04' who: Katarzyna -- header: 'Phonetics & Morpho‑syntax Explained: Core Concepts for ASR' +- header: 'Phonetics & Morpho-syntax Explained: Core Concepts for ASR' - line: In your biography, summarized by GPT, it mentions that you specialize in phonetics, morpho-syntax, and sentiment analysis. I’m familiar with sentiment analysis, but could you explain what phonetics and morpho-syntax are? @@ -514,13 +514,13 @@ transcript: sec: 2251 time: '37:31' who: Alexey -- header: 'Multimodal ASR: Integrating Lip‑reading and Visual Cues' +- header: 'Multimodal ASR: Integrating Lip-reading and Visual Cues' - line: Another strategy is using multimodal outputs. While we learn from audio, adding visual data—such as lip reading or gesture recognition— sec: 2253 time: '37:33' who: Katarzyna -- header: 'Transfer Learning for ASR: Fine‑tuning with Limited Data' +- header: 'Transfer Learning for ASR: Fine-tuning with Limited Data' - line: Yeah, not yet, of course. But I've worked with images, and in a typical situation, you have an ImageNet neural network trained on ImageNet. Then you have your own data, which could be tractors or anything else not included in ImageNet. You might @@ -612,7 +612,7 @@ transcript: sec: 2751 time: '45:51' who: Katarzyna -- header: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post‑processing' +- header: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post-processing' - line: By the way, I use automatic speech recognition for podcast episodes after recording. I utilize Amazon Transcribe, which is supposed to recognize English. sec: 2777 @@ -684,7 +684,7 @@ transcript: sec: 3151 time: '52:31' who: Alexey -- header: 'Personalized ASR: User Adaptation, Fine‑tuning, and On‑device Setup' +- header: 'Personalized ASR: User Adaptation, Fine-tuning, and On-device Setup' - line: I guess with personalization, the way it works is I first need to train it as a user. It asks me, "Hey, can you pronounce this sentence?" I record myself saying the sentence, and then it asks me to pronounce something else. I do this @@ -845,7 +845,7 @@ transcript: sec: 3708 time: '1:01:48' who: Alexey -- header: 'In‑Car Voice Recognition: Automotive Use Cases and Limitations' +- header: 'In-Car Voice Recognition: Automotive Use Cases and Limitations' - line: And it's parking, and it's parking! Everything you need and what is planned by the producers and car designers includes opening the windows, air conditioning, seat heating, steering wheel heating, radio, calling, etc. That’s also an interesting @@ -887,7 +887,7 @@ transcript: sec: 3850 time: '1:04:10' who: Katarzyna -- header: 'Closing Reflections: Human‑Centered AI Priorities & Further Reading' +- header: 'Closing Reflections: Human-Centered AI Priorities & Further Reading' - line: I think we covered only three questions out of—I don’t know how many we prepared, but it was... sec: 3853 @@ -916,7 +916,7 @@ transcript: sec: 3913 time: '1:05:13' who: Alexey -- header: Episode Sign‑off and Guest Thanks +- header: Episode Sign-off and Guest Thanks - line: Thank you. Thank you for the invitation, and really congratulations on the great series of podcasts, but also for the fantastic platform that you created. I feel really impressed, and as I said at the beginning, I feel honored to be @@ -926,14 +926,14 @@ transcript: who: Katarzyna context: 'Context: The episode surveys how linguistics and computational methods intersect to address limitations of mainstream ASR for people with disordered, accented, or - atypical speech — covering phonetics and morpho‑syntax foundations, distinctions + atypical speech — covering phonetics and morpho-syntax foundations, distinctions between accent and disorder, modern ASR advances and failure modes, data collection and GDPR constraints, targeted datasets and augmentation, multimodal and transfer - approaches, personalization and on‑device deployment, and the ethical/assistive + approaches, personalization and on-device deployment, and the ethical/assistive implications. - Core: Build ASR systems that are human‑centered and linguistically informed—prioritizing - inclusive data practices, phonetics‑aware modeling, adaptive techniques (augmentation, + Core: Build ASR systems that are human-centered and linguistically informed—prioritizing + inclusive data practices, phonetics-aware modeling, adaptive techniques (augmentation, transfer learning, multimodal cues, personalization), and ethical deployment—so speech technology recognizes and respects the communicative diversity and needs of people with disordered or atypical speech.' diff --git a/_podcast/human-centered-mlops-and-model-monitoring.md b/_podcast/human-centered-mlops-and-model-monitoring.md index 74bc2fee..6cf4292a 100644 --- a/_podcast/human-centered-mlops-and-model-monitoring.md +++ b/_podcast/human-centered-mlops-and-model-monitoring.md @@ -48,7 +48,7 @@ quotableClips: startOffset: 626 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=626 endOffset: 742 -- name: 'Stakeholder Engagement: pairing, availability, and buy‑in' +- name: 'Stakeholder Engagement: pairing, availability, and buy-in' startOffset: 742 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=742 endOffset: 827 @@ -72,7 +72,7 @@ quotableClips: startOffset: 1474 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1474 endOffset: 1634 -- name: 'ML Incident Response: post‑mortems and ML‑specific recovery steps' +- name: 'ML Incident Response: post-mortems and ML-specific recovery steps' startOffset: 1634 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1634 endOffset: 1763 @@ -80,7 +80,7 @@ quotableClips: startOffset: 1763 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1763 endOffset: 1931 -- name: 'Root‑Cause Debugging: applying Five Whys to ML product issues' +- name: 'Root-Cause Debugging: applying Five Whys to ML product issues' startOffset: 1931 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1931 endOffset: 2201 @@ -96,11 +96,11 @@ quotableClips: startOffset: 2300 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2300 endOffset: 2366 -- name: 'Post‑Mortem Evidence: facts, blameless analysis, and investigation steps' +- name: 'Post-Mortem Evidence: facts, blameless analysis, and investigation steps' startOffset: 2366 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2366 endOffset: 2523 -- name: 'Action Items: turning post‑mortems into tickets and process changes' +- name: 'Action Items: turning post-mortems into tickets and process changes' startOffset: 2523 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2523 endOffset: 2651 @@ -120,7 +120,7 @@ quotableClips: startOffset: 2968 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2968 endOffset: 3030 -- name: 'End‑User Research: mystery shopping and direct user testing' +- name: 'End-User Research: mystery shopping and direct user testing' startOffset: 3030 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3030 endOffset: 3159 @@ -137,7 +137,7 @@ quotableClips: startOffset: 3388 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3388 endOffset: 3566 -- name: 'Wrap‑Up & Contact: where to find Lina and episode closing' +- name: 'Wrap-Up & Contact: where to find Lina and episode closing' startOffset: 3566 url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3566 endOffset: 3499 @@ -340,7 +340,7 @@ transcript: sec: 729 time: '12:09' who: Lina -- header: 'Stakeholder Engagement: pairing, availability, and buy‑in' +- header: 'Stakeholder Engagement: pairing, availability, and buy-in' - line: This is before you even start doing anything, right? You have an idea about something cool, you sit down, and you spend some time in front of a Google document or Word document, or maybe just a notepad. You try to write everything down, you @@ -596,7 +596,7 @@ transcript: sec: 1615 time: '26:55' who: Lina -- header: 'ML Incident Response: post‑mortems and ML‑specific recovery steps' +- header: 'ML Incident Response: post-mortems and ML-specific recovery steps' - line: Let's say we agreed with everyone on this, and we say, “Okay, the system should be responsive within one hour. If something happens for 10 minutes, nothing bad happens, but it would come back in one hour.” So you will define all these service @@ -665,7 +665,7 @@ transcript: sec: 1928 time: '32:08' who: Alexey -- header: 'Root‑Cause Debugging: applying Five Whys to ML product issues' +- header: 'Root-Cause Debugging: applying Five Whys to ML product issues' - line: I thought that he must have. So let's use the post mortem format to debug this Okay. It's the ‘last seen’ box – some of my colleagues spend some time debugging the problem, not noticing it's not recommendation box. First thing, apply the @@ -794,7 +794,7 @@ transcript: sec: 2354 time: '39:14' who: Lina -- header: 'Post‑Mortem Evidence: facts, blameless analysis, and investigation steps' +- header: 'Post-Mortem Evidence: facts, blameless analysis, and investigation steps' - line: I wanted to ask you a bit about this ‘post mortem’ format. We also have a question in chat. What does the format look like? I think one thing that you mentioned is that you need to ask the “five why's” – you don't jump to conclusions immediately. @@ -831,7 +831,7 @@ transcript: sec: 2508 time: '41:48' who: Alexey -- header: 'Action Items: turning post‑mortems into tickets and process changes' +- header: 'Action Items: turning post-mortems into tickets and process changes' - line: First you get the facts. If it's a backend service, it's likely “The service was down from that time to that time.” As in our women’s bag example, it might be a screenshot, or it might be return values. We put all the factual information @@ -997,7 +997,7 @@ transcript: sec: 3019 time: '50:19' who: Alexey -- header: 'End‑User Research: mystery shopping and direct user testing' +- header: 'End-User Research: mystery shopping and direct user testing' - line: It depends on what project I'm working on. I do talk to end users in some cases. I also do mystery shopping. Mystery shopping is basically when you go through the process yourself. I was optimizing the credit process application in my current @@ -1185,7 +1185,7 @@ transcript: sec: 3557 time: '59:17' who: Alexey -- header: 'Wrap‑Up & Contact: where to find Lina and episode closing' +- header: 'Wrap-Up & Contact: where to find Lina and episode closing' - line: Thank you for having me. And if anyone wants to connect more – I'm hanging out in the MLOps channel sometimes. Also on LinkedIn. Or if anyone wants to write a blog post together or just generally share? Yeah. Look me up. diff --git a/_podcast/interpretable-machine-learning.md b/_podcast/interpretable-machine-learning.md index 1b6c0109..d2b992d7 100644 --- a/_podcast/interpretable-machine-learning.md +++ b/_podcast/interpretable-machine-learning.md @@ -24,7 +24,7 @@ intro: How can you reliably trust a machine learning model’s predictions in re topics include a SHAP deep dive with practical Python examples for attributing predictions, conformal prediction for calibrated uncertainty and creating prediction sets, and the difference between explainable AI and interpretable machine learning. He also - discusses using interpretability to debug models, maintain hands‑on skills through + discusses using interpretability to debug models, maintain hands-on skills through competitions, and document experiments for reproducible insights.

If you want concrete tools to evaluate model trust—how to quantify uncertainty, interpret feature effects with SHAP, and produce reliable prediction sets with conformal methods—this @@ -46,7 +46,7 @@ quotableClips: startOffset: 92 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=92 endOffset: 225 -- name: Becoming a Full‑Time Technical Writer +- name: Becoming a Full-Time Technical Writer startOffset: 225 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=225 endOffset: 397 @@ -70,11 +70,11 @@ quotableClips: startOffset: 837 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=837 endOffset: 955 -- name: 'Publishing in Public: Chapter‑by‑Chapter Workflow' +- name: 'Publishing in Public: Chapter-by-Chapter Workflow' startOffset: 955 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=955 endOffset: 1027 -- name: 'Self‑Publishing vs Publishers: Control, Editors, Royalties' +- name: 'Self-Publishing vs Publishers: Control, Editors, Royalties' startOffset: 1027 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1027 endOffset: 1138 @@ -95,11 +95,11 @@ quotableClips: startOffset: 1577 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1577 endOffset: 1800 -- name: 'Work Style: Solo Writing, Collaboration, and Co‑authoring' +- name: 'Work Style: Solo Writing, Collaboration, and Co-authoring' startOffset: 1800 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1800 endOffset: 1987 -- name: 'Staying Hands‑On: Competitions to Maintain Practical Skills' +- name: 'Staying Hands-On: Competitions to Maintain Practical Skills' startOffset: 1987 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1987 endOffset: 2181 @@ -119,11 +119,11 @@ quotableClips: startOffset: 2916 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2916 endOffset: 3000 -- name: 'Becoming a Full‑Time Author: Timeframe, Income, and Workload' +- name: 'Becoming a Full-Time Author: Timeframe, Income, and Workload' startOffset: 3000 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3000 endOffset: 3229 -- name: 'Publishing Logistics: Leanpub, Amazon KDP, and Print‑on‑Demand' +- name: 'Publishing Logistics: Leanpub, Amazon KDP, and Print-on-Demand' startOffset: 3229 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3229 endOffset: 3376 @@ -131,7 +131,7 @@ quotableClips: startOffset: 3376 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3376 endOffset: 3413 -- name: Closing Remarks and Episode Wrap‑Up +- name: Closing Remarks and Episode Wrap-Up startOffset: 3413 url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3413 endOffset: 3380 @@ -212,7 +212,7 @@ transcript: sec: 208 time: '3:28' who: Alexey -- header: Becoming a Full‑Time Technical Writer +- header: Becoming a Full-Time Technical Writer - line: '[chuckles] Yeah. And I also didn''t feel so free when writing the Master''s thesis. But when I started the book, I wrote it in the open, got feedback, and could just write how I felt. I [could] put in some jokes and not hide behind math @@ -432,7 +432,7 @@ transcript: sec: 933 time: '15:33' who: Alexey -- header: 'Publishing in Public: Chapter‑by‑Chapter Workflow' +- header: 'Publishing in Public: Chapter-by-Chapter Workflow' - line: Yeah, good question. For me, I already did some blogging before, but I always quit after a few months. But I already had a little bit of experience with writing more freely. With the book, it wasn't like, “Hide in my room for two years, and @@ -447,7 +447,7 @@ transcript: sec: 955 time: '15:55' who: Christoph -- header: 'Self‑Publishing vs Publishers: Control, Editors, Royalties' +- header: 'Self-Publishing vs Publishers: Control, Editors, Royalties' - line: But also, as we spoke briefly at the beginning, you don't have a publisher – you published on your own, right? [Christoph agrees] This requires a lot of self-discipline. If you don't publish a chapter, in the case with a publisher, @@ -703,7 +703,7 @@ transcript: sec: 1743 time: '29:03' who: Christoph -- header: 'Work Style: Solo Writing, Collaboration, and Co‑authoring' +- header: 'Work Style: Solo Writing, Collaboration, and Co-authoring' - line: Okay. Now you’ve been working as a technical book writer for a year, right? Do you feel lonely when you just write by yourself? Do you miss colleagues? sec: 1800 @@ -754,7 +754,7 @@ transcript: sec: 1901 time: '31:41' who: Christoph -- header: 'Staying Hands‑On: Competitions to Maintain Practical Skills' +- header: 'Staying Hands-On: Competitions to Maintain Practical Skills' - line: So you said you would write it faster because your co-author has other commitments. Then you also don't invest as much time as you could potentially because you probably… What I'm trying to say is that you probably still have time [left over]. So are @@ -1129,7 +1129,7 @@ transcript: sec: 2964 time: '49:24' who: Christoph -- header: 'Becoming a Full‑Time Author: Timeframe, Income, and Workload' +- header: 'Becoming a Full-Time Author: Timeframe, Income, and Workload' - line: Maybe also the question is, “How can I be a full-time technical author? What are my steps?” sec: 3000 @@ -1214,7 +1214,7 @@ transcript: sec: 3202 time: '53:22' who: Christoph -- header: 'Publishing Logistics: Leanpub, Amazon KDP, and Print‑on‑Demand' +- header: 'Publishing Logistics: Leanpub, Amazon KDP, and Print-on-Demand' - line: What do you use for publishing? Because you also have physical copies. I know there are websites where you can sell digital products – PDFs, videos, whatever. But you have physical books. @@ -1310,7 +1310,7 @@ transcript: sec: 3405 time: '56:45' who: Christoph -- header: Closing Remarks and Episode Wrap‑Up +- header: Closing Remarks and Episode Wrap-Up - line: Yeah, thanks. Indeed, it was nice. Unfortunately, this is all the time we have for today. sec: 3413 @@ -1327,18 +1327,18 @@ transcript: time: '57:02' who: Alexey context: 'Context: Christoph Molnar’s journey from statistician and Kaggle competitor - to full‑time technical author frames a consistent practice: hands‑on modeling, careful + to full-time technical author frames a consistent practice: hands-on modeling, careful documentation, and public, iterative teaching about interpretable machine learning techniques (SHAP, conformal prediction, etc.), plus the practical mechanics of publishing and staying current. Core narrative: At the episode’s center is the idea that trustworthy, useful machine learning emerges not from opaque accuracy chasing but from a disciplined loop of - hands‑on experimentation, clear interpretation, and open communication — using interpretable + hands-on experimentation, clear interpretation, and open communication — using interpretable methods and calibrated uncertainty to debug and understand models, keeping meticulous logs and competitions to stay sharp, and publishing incrementally (with feedback and transparency) to teach others while refining your own understanding. This unified - through‑line ties together the technical tools, the writing and publishing choices, + through-line ties together the technical tools, the writing and publishing choices, and the everyday workflows that make complex ML accessible, reproducible, and actionable.' --- Links: diff --git a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md index c9a88e47..908a1f4c 100644 --- a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md +++ b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md @@ -20,7 +20,7 @@ intro: How can knowledge graphs and large language models (LLMs) be combined to automotive R&D — from crash simulation insights to reproducible reports? In this episode Anahita Pakiman, a data scientist-engineer who moved from mechanical engineering and finite element analysis (FEA) into applied AI and now works as Senior Knowledge - Graph-Data Scientist Consultant at brox IT‑Solutions, walks through practical strategies + Graph-Data Scientist Consultant at brox IT-Solutions, walks through practical strategies and tradeoffs.

We cover FEA vs machine learning, optimization and topology in crash simulations, and why teams adopt Neo4j for semantic reporting and load-path detection. Anahita explains graph vs tabular representations, moving from knowledge diff --git a/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md index ba4bad81..a6254ca2 100644 --- a/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md +++ b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/conquering-the-last-mile-in-data-caitlin-moorman/id1541710331?i=1000539421886 description: Learn last-mile data delivery, build data products for the modern data stack, boost adoption, embed analytics in decisions, and prove measurable ROI -intro: 'How do you turn a powerful modern data stack into analytics people actually use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove Recommerce and former data lead in crowdfunding and self-publishing, walks through the last-mile data delivery challenges that block adoption and offers practical approaches to build data products that drive decisions.

We define the “last mile” in data delivery and contrast modern data stack capabilities with last‑mile execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), treating data as a product, user research to diagnose poor adoption, and simplifying A/B testing reporting for decision‑makers. Caitlin outlines a product‑design mindset—outcome‑first projects, persona-driven abstractions, low‑fidelity prototyping, and embedding metrics in meetings—to prove impact and build advocacy. She also covers cultural barriers, measuring hard‑to‑track work with proxies, scoping narrow slices, recruiting advocates, and using growth marketing as an early use case.

Listen to learn actionable frameworks and experiments you can use to improve data adoption, design usable data products, and measure tangible wins that create momentum in your organization.' +intro: 'How do you turn a powerful modern data stack into analytics people actually use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove Recommerce and former data lead in crowdfunding and self-publishing, walks through the last-mile data delivery challenges that block adoption and offers practical approaches to build data products that drive decisions.

We define the “last mile” in data delivery and contrast modern data stack capabilities with last-mile execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), treating data as a product, user research to diagnose poor adoption, and simplifying A/B testing reporting for decision-makers. Caitlin outlines a product-design mindset—outcome-first projects, persona-driven abstractions, low-fidelity prototyping, and embedding metrics in meetings—to prove impact and build advocacy. She also covers cultural barriers, measuring hard-to-track work with proxies, scoping narrow slices, recruiting advocates, and using growth marketing as an early use case.

Listen to learn actionable frameworks and experiments you can use to improve data adoption, design usable data products, and measure tangible wins that create momentum in your organization.' topics: - data analytics - tools @@ -115,7 +115,7 @@ quotableClips: startOffset: 3491 url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3491 endOffset: 3690 -- name: 'Advice for aspiring analysts: curiosity, business impact, and on‑the‑job +- name: 'Advice for aspiring analysts: curiosity, business impact, and on-the-job learning' startOffset: 3690 url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3690 @@ -1007,7 +1007,7 @@ transcript: you do step two, you can do step three. And then there are circular projects, where you don't know what you don't know. And a lot of data projects fall into this category. -- header: 'Advice for aspiring analysts: curiosity, business impact, and on‑the‑job +- header: 'Advice for aspiring analysts: curiosity, business impact, and on-the-job learning' - line: I'll share more about this, but the very high-level overview is first just to set expectations. Acknowledge ahead of time that it is a circular project. diff --git a/_podcast/lean-mlops-for-startups.md b/_podcast/lean-mlops-for-startups.md index 326b3895..da50702a 100644 --- a/_podcast/lean-mlops-for-startups.md +++ b/_podcast/lean-mlops-for-startups.md @@ -54,7 +54,7 @@ quotableClips: startOffset: 714 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=714 endOffset: 774 -- name: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock‑in' +- name: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock-in' startOffset: 774 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=774 endOffset: 906 @@ -66,11 +66,11 @@ quotableClips: startOffset: 1058 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1058 endOffset: 1159 -- name: 'Portability vs Managed Services: Avoiding Vendor Lock‑In (Vertex AI, SageMaker)' +- name: 'Portability vs Managed Services: Avoiding Vendor Lock-In (Vertex AI, SageMaker)' startOffset: 1159 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1159 endOffset: 1295 -- name: 'Low‑Code Trade-offs: Speed vs Future Flexibility' +- name: 'Low-Code Trade-offs: Speed vs Future Flexibility' startOffset: 1295 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1295 endOffset: 1342 @@ -78,7 +78,7 @@ quotableClips: startOffset: 1342 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1342 endOffset: 1650 -- name: 'End‑to‑End Ownership: Multidisciplinary Work in Startups' +- name: 'End-to-End Ownership: Multidisciplinary Work in Startups' startOffset: 1650 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1650 endOffset: 1777 @@ -98,7 +98,7 @@ quotableClips: startOffset: 2148 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2148 endOffset: 2274 -- name: 'AI‑Assisted Coding: Productivity Gains and Technical Debt Risks' +- name: 'AI-Assisted Coding: Productivity Gains and Technical Debt Risks' startOffset: 2274 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2274 endOffset: 2401 @@ -106,7 +106,7 @@ quotableClips: startOffset: 2401 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2401 endOffset: 2592 -- name: 'Early‑Career Advice: Mentorship, Pairing, and Role Selection' +- name: 'Early-Career Advice: Mentorship, Pairing, and Role Selection' startOffset: 2592 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2592 endOffset: 2650 @@ -134,11 +134,11 @@ quotableClips: startOffset: 3343 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3343 endOffset: 3429 -- name: 'On‑Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' +- name: 'On-Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' startOffset: 3429 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3429 endOffset: 3609 -- name: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade‑offs' +- name: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade-offs' startOffset: 3609 url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3609 endOffset: 3701 @@ -313,7 +313,7 @@ transcript: sec: 714 time: '11:54' who: Nemanja -- header: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock‑in' +- header: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock-in' - line: Going on-premise is hard for a startup unless it makes a lot of sense. I think it’s a no-brainer for startups to go for the cloud. However, there needs to be a clever decision because migrating from one cloud to another can be slow and @@ -411,7 +411,7 @@ transcript: sec: 1109 time: '18:29' who: Nemanja -- header: 'Portability vs Managed Services: Avoiding Vendor Lock‑In (Vertex AI, SageMaker)' +- header: 'Portability vs Managed Services: Avoiding Vendor Lock-In (Vertex AI, SageMaker)' - line: I tried Kubeflow, and it was a huge pain because of all the YAML files and Kubernetes complexity. Maybe it makes sense in the long run, but at the beginning, you might just need Flask or something simpler. @@ -448,7 +448,7 @@ transcript: sec: 1274 time: '21:14' who: Alexey -- header: 'Low‑Code Trade-offs: Speed vs Future Flexibility' +- header: 'Low-Code Trade-offs: Speed vs Future Flexibility' - line: Yes, that makes you more portable. Some startups might want to start as fast as possible using low-code solutions. If you can only hire a data scientist and not a proper software or systems engineer, you might go with a low-code platform. @@ -545,7 +545,7 @@ transcript: sec: 1631 time: '27:11' who: Nemanja -- header: 'End‑to‑End Ownership: Multidisciplinary Work in Startups' +- header: 'End-to-End Ownership: Multidisciplinary Work in Startups' - line: Startups also pivot frequently. A small, young startup might shift directions completely based on client demands. One client might leave, and another might request something entirely different. This kind of abrupt change keeps things @@ -760,7 +760,7 @@ transcript: sec: 2267 time: '37:47' who: Nemanja -- header: 'AI‑Assisted Coding: Productivity Gains and Technical Debt Risks' +- header: 'AI-Assisted Coding: Productivity Gains and Technical Debt Risks' - line: But you know what I see as a risk now with LLMs and AI-assisted coding? sec: 2274 time: '37:54' @@ -908,7 +908,7 @@ transcript: sec: 2586 time: '43:06' who: Alexey -- header: 'Early‑Career Advice: Mentorship, Pairing, and Role Selection' +- header: 'Early-Career Advice: Mentorship, Pairing, and Role Selection' - line: For juniors, is it better to join a corporation or a more established company? sec: 2592 time: '43:12' @@ -1260,7 +1260,7 @@ transcript: sec: 3412 time: '56:52' who: Nemanja -- header: 'On‑Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' +- header: 'On-Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' - line: 'Right. So maybe let’s take one last question. You mentioned you have experience with on-premise systems. Most corporations you’ve worked with have preferred on-premise over cloud solutions. Luka is asking: Do you think on-premise will be the future @@ -1328,7 +1328,7 @@ transcript: sec: 3604 time: '1:00:04' who: Nemanja -- header: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade‑offs' +- header: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade-offs' - line: Dask is a mature tool, and I know it works in a distributed manner like Spark. However, I haven’t seen it widely used in the industry. Companies usually default to Spark for distributed processing. My limited success with Dask doesn’t mean @@ -1360,16 +1360,16 @@ transcript: sec: 3726 time: '1:02:06' who: Nemanja -context: 'The episode’s single unifying idea is pragmatic trade‑offs: how to move +context: 'The episode’s single unifying idea is pragmatic trade-offs: how to move fast and deliver value in ML-driven products and careers while deliberately managing - the risks that speed introduces—technical debt, vendor lock‑in, operational overhead, + the risks that speed introduces—technical debt, vendor lock-in, operational overhead, and team burnout. Every segment circles back to the same decision framework: choose lean, observable, portable primitives and SaaS or managed services pragmatically to ship quickly; invest in minimal, automatable MLOps and instrumentation so you can iterate safely; and prioritize foundational skills, mentorship, and ownership - to sustain learning and long‑term flexibility. In short, be intentional about early + to sustain learning and long-term flexibility. In short, be intentional about early architectural, tooling, and career choices—opt for simplicity and visibility to - accelerate outcomes today while preserving the ability to evolve, scale, and de‑risk + accelerate outcomes today while preserving the ability to evolve, scale, and de-risk tomorrow.' --- Links: diff --git a/_podcast/machine-learning-engineering-production-best-practices.md b/_podcast/machine-learning-engineering-production-best-practices.md index 8b443ede..83eb404b 100644 --- a/_podcast/machine-learning-engineering-production-best-practices.md +++ b/_podcast/machine-learning-engineering-production-best-practices.md @@ -80,7 +80,7 @@ quotableClips: startOffset: 2173 url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2173 endOffset: 2357 -- name: 'Novel Algorithm Risks: Transfer learning vs building white‑paper solutions' +- name: 'Novel Algorithm Risks: Transfer learning vs building white-paper solutions' startOffset: 2357 url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2357 endOffset: 2663 @@ -114,7 +114,7 @@ quotableClips: startOffset: 3553 url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3553 endOffset: 3747 -- name: 'AI‑First Tradeoffs: Talent needs, retention, and budget realities' +- name: 'AI-First Tradeoffs: Talent needs, retention, and budget realities' startOffset: 3747 url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3747 endOffset: 3874 @@ -126,7 +126,7 @@ quotableClips: startOffset: 4078 url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4078 endOffset: 4371 -- name: 'Contact & Resources: LinkedIn, podcast appearances, and early‑access book' +- name: 'Contact & Resources: LinkedIn, podcast appearances, and early-access book' startOffset: 4371 url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4371 endOffset: 4301 @@ -724,7 +724,7 @@ transcript: sec: 2353 time: '39:13' who: Ben -- header: 'Novel Algorithm Risks: Transfer learning vs building white‑paper solutions' +- header: 'Novel Algorithm Risks: Transfer learning vs building white-paper solutions' - line: Let's say we have something more complex – maybe a novel algorithm – that we want to try. We heard that right now deep learning is very popular, so we want to try it for our problem. Should we do this? Is this necessary? What kind of @@ -1096,7 +1096,7 @@ transcript: sec: 3745 time: '1:02:25' who: Ben -- header: 'AI‑First Tradeoffs: Talent needs, retention, and budget realities' +- header: 'AI-First Tradeoffs: Talent needs, retention, and budget realities' - line: One question from Akshat. “It makes sense to solve problems with uncool techniques. But there are companies who are AI First – they want to show off and say that they have AI capabilities. So what about them?” @@ -1267,7 +1267,7 @@ transcript: sec: 4310 time: '1:11:50' who: Ben -- header: 'Contact & Resources: LinkedIn, podcast appearances, and early‑access book' +- header: 'Contact & Resources: LinkedIn, podcast appearances, and early-access book' - line: Okay, thanks. So, let's finish. How can people find you? sec: 4371 time: '1:12:51' diff --git a/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md index 3929d1e8..b0f697e0 100644 --- a/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md +++ b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=YxijEUoDCfw description: 'Discover asteroid mining: machine learning & hyperspectral spectroscopy to detect water for ISRU—learn detection methods, datasets, mission design & tools.' -intro: How can we reliably detect water on near‑Earth asteroids using machine learning and hyperspectral spectroscopy to enable in‑situ resource utilization (ISRU)? In this episode Daynan Crull—co‑founder of Karman+ and lead of its science and technology effort—walks through the science and engineering needed to find and characterize asteroid water for space missions. Drawing on his background in remote sensing and ML, Daynan explains hyperspectral infrared signatures for water detection, spectral classification approaches, and the limits of ground truth from returned samples and meteorites. Along the way we cover relevant astronomical data types (images, hyperspectral bands, time series), asteroid features like photometry and rotation, observability challenges, and ML tasks from signal processing to orbit linking and synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), sampling and extraction concepts, economic use cases for water‑as‑fuel, and the cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, machine learning for water detection, and the datasets and infrastructure to get involved in ISRU research and missions +intro: How can we reliably detect water on near-Earth asteroids using machine learning and hyperspectral spectroscopy to enable in-situ resource utilization (ISRU)? In this episode Daynan Crull—co-founder of Karman+ and lead of its science and technology effort—walks through the science and engineering needed to find and characterize asteroid water for space missions. Drawing on his background in remote sensing and ML, Daynan explains hyperspectral infrared signatures for water detection, spectral classification approaches, and the limits of ground truth from returned samples and meteorites. Along the way we cover relevant astronomical data types (images, hyperspectral bands, time series), asteroid features like photometry and rotation, observability challenges, and ML tasks from signal processing to orbit linking and synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), sampling and extraction concepts, economic use cases for water-as-fuel, and the cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, machine learning for water detection, and the datasets and infrastructure to get involved in ISRU research and missions topics: - machine learning - astronomy diff --git a/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md index 5a1561e3..9fafc3b7 100644 --- a/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md +++ b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=jsAxUd_bZpw description: Learn attribution, media mix modeling & cookieless tracking to measure uplift, TV/offline impact and automate MMM for faster acquisition & retention -intro: How can marketing teams reliably measure ad impact, allocate budget across channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin‑based mathematician and data scientist specializing in statistical learning, time series, Bayesian and geometric methods — walks through practical marketing data science approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless tracking.

We cover attribution basics and multi‑channel ambiguity, MMM techniques including regression, ad‑stock and saturation, and campaign uplift estimation using time‑series counterfactuals. Juan explains measuring TV and offline channels, the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention and purchase‑frequency modeling. You’ll also hear about uplift A/B testing design, modeling benchmarks (start simple), MMM retraining cadence, learning decay rates with Bayesian regression, and building a marketing data function with the right data integrations and cross‑functional collaboration.

If you want actionable guidance on attribution models, media mix optimization, privacy‑aware tracking, and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, common pitfalls, and learning resources to help practitioners improve measurement and decision‑making +intro: How can marketing teams reliably measure ad impact, allocate budget across channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin-based mathematician and data scientist specializing in statistical learning, time series, Bayesian and geometric methods — walks through practical marketing data science approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless tracking.

We cover attribution basics and multi-channel ambiguity, MMM techniques including regression, ad-stock and saturation, and campaign uplift estimation using time-series counterfactuals. Juan explains measuring TV and offline channels, the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention and purchase-frequency modeling. You’ll also hear about uplift A/B testing design, modeling benchmarks (start simple), MMM retraining cadence, learning decay rates with Bayesian regression, and building a marketing data function with the right data integrations and cross-functional collaboration.

If you want actionable guidance on attribution models, media mix optimization, privacy-aware tracking, and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, common pitfalls, and learning resources to help practitioners improve measurement and decision-making topics: - marketing - machine learning diff --git a/_podcast/make-money-with-machine-learning-roles-skills.md b/_podcast/make-money-with-machine-learning-roles-skills.md index a922a1a9..fda9189f 100644 --- a/_podcast/make-money-with-machine-learning-roles-skills.md +++ b/_podcast/make-money-with-machine-learning-roles-skills.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/new-roles-key-skills-to-monetize-machine-learning-vin/id1541710331?i=1000512720281 description: 'Master monetize machine learning: convert ML models into ARR/MRR using MLOps and team roles to drive revenue, adoption and measurable business impact.' -intro: How do you turn machine learning models into recurring revenue—ARR and MRR—rather than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner and engineer strategist who has brought products to market with ARR in the $100’s of millions, breaks down practical steps to monetize machine learning.

We explore why a revenue-first mindset changes ML strategy, how to translate models into C‑suite metrics like ARR/MRR, and when to prioritize revenue versus cost‑savings. Vin outlines the three core team roles for monetization, the research artifacts and experimental process that make models production-ready, and real category-creation examples from companies such as Amazon, Google, and Stitch Fix. For startups he explains the “angry users + data scientists” product recipe.

You’ll also get frameworks for ML product management—turning strategy into researchable use cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, and product metrics for adoption (usage, task time, decision quality, pricing impact). This episode delivers actionable guidance for leaders, product managers, and engineers seeking to convert ML into sustainable ARR and MRR +intro: How do you turn machine learning models into recurring revenue—ARR and MRR—rather than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner and engineer strategist who has brought products to market with ARR in the $100’s of millions, breaks down practical steps to monetize machine learning.

We explore why a revenue-first mindset changes ML strategy, how to translate models into C-suite metrics like ARR/MRR, and when to prioritize revenue versus cost-savings. Vin outlines the three core team roles for monetization, the research artifacts and experimental process that make models production-ready, and real category-creation examples from companies such as Amazon, Google, and Stitch Fix. For startups he explains the “angry users + data scientists” product recipe.

You’ll also get frameworks for ML product management—turning strategy into researchable use cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, and product metrics for adoption (usage, task time, decision quality, pricing impact). This episode delivers actionable guidance for leaders, product managers, and engineers seeking to convert ML into sustainable ARR and MRR topics: - machine learning - monetization @@ -39,11 +39,11 @@ quotableClips: startOffset: 477 url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=477 endOffset: 727 -- name: 'ARR & MRR: translating models into C‑suite revenue metrics' +- name: 'ARR & MRR: translating models into C-suite revenue metrics' startOffset: 727 url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=727 endOffset: 959 -- name: 'Revenue vs. cost‑savings: business model metrics for ML products' +- name: 'Revenue vs. cost-savings: business model metrics for ML products' startOffset: 959 url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=959 endOffset: 1215 @@ -336,7 +336,7 @@ transcript: sec: 712 time: '11:52' who: Vin -- header: 'ARR & MRR: translating models into C‑suite revenue metrics' +- header: 'ARR & MRR: translating models into C-suite revenue metrics' - line: We definitely will go there, but it's interesting to know how companies actually evaluate the value that data scientists can bring. Coming back to your LinkedIn profile. You bio mentions that you built and brought products to market with ARR @@ -400,7 +400,7 @@ transcript: sec: 926 time: '15:26' who: Alexey -- header: 'Revenue vs. cost‑savings: business model metrics for ML products' +- header: 'Revenue vs. cost-savings: business model metrics for ML products' - line: What are the other important things that people on this level care about? In addition to this annual recurring revenue and monthly recurring revenue. What are the other money related metrics that they care about? diff --git a/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md index 7b860f51..4c5d2428 100644 --- a/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md +++ b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md @@ -16,7 +16,7 @@ links: apple: TODO description: 'Discover practical mentoring strategies for tech careers: find mentors, master cold outreach, run effective sessions, start paid mentorship & boost leadership.' -intro: 'Struggling to find a mentor — or wondering how to become one — in a fast-moving tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI leader with 15+ years driving enterprise data transformations and a career arc from mining engineering to data engineering and leadership, walks through practical mentoring strategies for tech professionals. We define mentoring (purpose, scope, types), explore early models like Thoughtworks’ sponsorship, and show how to find mentors through networks, platforms, and cold outreach — with concrete outreach best practices: specificity, background, and follow‑up. Rahul covers preparing mentoring sessions (goals, agendas), mentoring formats (one‑off advice vs long‑term relationships), and how to start as a mentor using simple first steps and platforms. Topics include benefits of mentoring, transferable workplace guidance, developing people skills (empathy, listening), balancing technical work and leadership, tackling imposter syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining development plans. Listen to gain actionable steps, templates, and mindset shifts to both secure meaningful mentorship and build a sustainable mentoring practice in your tech career.' +intro: 'Struggling to find a mentor — or wondering how to become one — in a fast-moving tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI leader with 15+ years driving enterprise data transformations and a career arc from mining engineering to data engineering and leadership, walks through practical mentoring strategies for tech professionals. We define mentoring (purpose, scope, types), explore early models like Thoughtworks’ sponsorship, and show how to find mentors through networks, platforms, and cold outreach — with concrete outreach best practices: specificity, background, and follow-up. Rahul covers preparing mentoring sessions (goals, agendas), mentoring formats (one-off advice vs long-term relationships), and how to start as a mentor using simple first steps and platforms. Topics include benefits of mentoring, transferable workplace guidance, developing people skills (empathy, listening), balancing technical work and leadership, tackling imposter syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining development plans. Listen to gain actionable steps, templates, and mindset shifts to both secure meaningful mentorship and build a sustainable mentoring practice in your tech career.' dateadded: 2021-02-23 @@ -41,7 +41,7 @@ quotableClips: startOffset: 770 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=770 endOffset: 990 -- name: 'Cold Outreach Best Practices: Specificity, Background, and Follow‑up' +- name: 'Cold Outreach Best Practices: Specificity, Background, and Follow-up' startOffset: 990 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=990 endOffset: 1180 @@ -49,7 +49,7 @@ quotableClips: startOffset: 1180 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1180 endOffset: 1350 -- name: 'Mentoring Formats: One‑Off Advice vs. Long‑Term Relationships' +- name: 'Mentoring Formats: One-Off Advice vs. Long-Term Relationships' startOffset: 1350 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1350 endOffset: 1510 @@ -69,7 +69,7 @@ quotableClips: startOffset: 2010 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=2010 endOffset: 2200 -- name: 'Common Mentee Challenges: Imposter Syndrome & Tech‑vs‑Management Choices' +- name: 'Common Mentee Challenges: Imposter Syndrome & Tech-vs-Management Choices' startOffset: 2200 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=2200 endOffset: 2390 @@ -93,7 +93,7 @@ quotableClips: startOffset: 3020 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=3020 endOffset: 3160 -- name: 'Maintaining Development Plans: Review, Visibility, and Follow‑through' +- name: 'Maintaining Development Plans: Review, Visibility, and Follow-through' startOffset: 3160 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=3160 endOffset: 3290 diff --git a/_podcast/mindful-data-strategy-for-business-impact.md b/_podcast/mindful-data-strategy-for-business-impact.md index e002680a..582717ec 100644 --- a/_podcast/mindful-data-strategy-for-business-impact.md +++ b/_podcast/mindful-data-strategy-for-business-impact.md @@ -51,7 +51,7 @@ quotableClips: startOffset: 385 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=385 endOffset: 500 -- name: 'Wabi‑sabi applied to data: accepting imperfection and communicating it' +- name: 'Wabi-sabi applied to data: accepting imperfection and communicating it' startOffset: 500 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=500 endOffset: 588 @@ -63,7 +63,7 @@ quotableClips: startOffset: 707 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=707 endOffset: 849 -- name: 'Data quality metaphor: Lego bricks and pragmatic trade‑offs' +- name: 'Data quality metaphor: Lego bricks and pragmatic trade-offs' startOffset: 849 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=849 endOffset: 1052 @@ -91,7 +91,7 @@ quotableClips: startOffset: 1756 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1756 endOffset: 1847 -- name: Dashboard traffic‑light system for data reliability (green/yellow/red) +- name: Dashboard traffic-light system for data reliability (green/yellow/red) startOffset: 1847 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1847 endOffset: 1998 @@ -131,7 +131,7 @@ quotableClips: startOffset: 3551 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3551 endOffset: 3623 -- name: 'Executive ad‑hoc requests: elicit intent and quantify expected impact' +- name: 'Executive ad-hoc requests: elicit intent and quantify expected impact' startOffset: 3623 url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3623 endOffset: 3753 @@ -298,7 +298,7 @@ transcript: sec: 494 time: '8:14' who: Alexey -- header: 'Wabi‑sabi applied to data: accepting imperfection and communicating it' +- header: 'Wabi-sabi applied to data: accepting imperfection and communicating it' - line: Wabi Sabi Your Data which is basically a Japanese concept about accepting the imperfections, the perfect imperfections basically and this is basically what I think about data. This is a lot of my philosophy about how to handle data. It's @@ -432,7 +432,7 @@ transcript: sec: 811 time: '13:31' who: Alexey -- header: 'Data quality metaphor: Lego bricks and pragmatic trade‑offs' +- header: 'Data quality metaphor: Lego bricks and pragmatic trade-offs' - line: Correct and you know I always explain it and I say also data is like Lego bricks. We can connect and we can build a lot of buildings inside it. sec: 849 @@ -803,7 +803,7 @@ transcript: sec: 1810 time: '30:10' who: Lior -- header: Dashboard traffic‑light system for data reliability (green/yellow/red) +- header: Dashboard traffic-light system for data reliability (green/yellow/red) - line: 'Second, for the CEO, we can add a traffic light indicator on the dashboard: green, yellow, red.' sec: 1847 @@ -1290,7 +1290,7 @@ transcript: sec: 3599 time: '59:59' who: Lior -- header: 'Executive ad‑hoc requests: elicit intent and quantify expected impact' +- header: 'Executive ad-hoc requests: elicit intent and quantify expected impact' - line: 'Thanks. Another question: how do you handle ad hoc requests from executives?' sec: 3623 time: '1:00:23' @@ -1402,13 +1402,13 @@ transcript: time: '1:06:05' who: Alexey context: 'Context: The episode examines a practical approach to data work—moving from - engineering to product thinking, accepting imperfect data (wabi‑sabi), diagnosing + engineering to product thinking, accepting imperfect data (wabi-sabi), diagnosing trust failures, prioritizing maintenance/rollout/innovation, using simple reliability signals and feedback loops, and aligning team time and processes to measurable business impact—especially as generative AI raises readiness demands and legacy systems require pragmatic replacement. - Core: Adopt a mindful, impact‑first data strategy that accepts and communicates + Core: Adopt a mindful, impact-first data strategy that accepts and communicates inevitable imperfection, prioritizes process and measurable business outcomes over perfect tooling, and restores trust through clear signals, closed feedback loops, and disciplined allocation of maintenance, rollout, and innovation effort so data diff --git a/_podcast/ml-product-manager-and-mlops-platform-strategy.md b/_podcast/ml-product-manager-and-mlops-platform-strategy.md index 593d9b03..a570f449 100644 --- a/_podcast/ml-product-manager-and-mlops-platform-strategy.md +++ b/_podcast/ml-product-manager-and-mlops-platform-strategy.md @@ -124,7 +124,7 @@ quotableClips: startOffset: 2983 url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2983 endOffset: 3165 -- name: 'Non‑Technical Transitions: Feasibility of moving into ML product roles' +- name: 'Non-Technical Transitions: Feasibility of moving into ML product roles' startOffset: 3165 url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3165 endOffset: 3344 @@ -1020,7 +1020,7 @@ transcript: sec: 3150 time: '52:30' who: Alexey -- header: 'Non‑Technical Transitions: Feasibility of moving into ML product roles' +- header: 'Non-Technical Transitions: Feasibility of moving into ML product roles' - line: It's easier for them to become a traditional software engineering PMs, I would say. Because in this role, you go into writing specifications, understanding the requirements, etc.. If they have some understanding of machine learning philosophy, diff --git a/_podcast/mlops-and-ml-engineering-in-finance.md b/_podcast/mlops-and-ml-engineering-in-finance.md index 13083db9..fe677a9e 100644 --- a/_podcast/mlops-and-ml-engineering-in-finance.md +++ b/_podcast/mlops-and-ml-engineering-in-finance.md @@ -1288,14 +1288,14 @@ transcript: who: Nemanja context: 'Context: Nemanja’s story and the episode’s segments trace practical ML work in regulated finance—moving from research to ML engineering in legacy, governance-heavy - environments—covering real constraints (on‑prem infra, approvals), concrete ML Ops + environments—covering real constraints (on-prem infra, approvals), concrete ML Ops responsibilities (CI/CD, deployment, monitoring, model/data versioning), tactical shortcuts, team and platform patterns, and the skills and career moves that enable this work. Core: The unifying idea is that bringing ML into production in conservative, regulated organizations succeeds not through ideal tools or big rewrites but through a pragmatic, - engineering‑first, incremental approach—building minimal viable ML Ops (reproducible + engineering-first, incremental approach—building minimal viable ML Ops (reproducible pipelines, environments, monitoring, simple registries), integrating with existing DevOps/governance, reusing platform patterns, and focusing on practical skills and iterative delivery to earn trust and scale ML responsibly.' diff --git a/_podcast/mlops-at-scale-reproducibility-adoption.md b/_podcast/mlops-at-scale-reproducibility-adoption.md index fd723757..ce5076dd 100644 --- a/_podcast/mlops-at-scale-reproducibility-adoption.md +++ b/_podcast/mlops-at-scale-reproducibility-adoption.md @@ -17,11 +17,11 @@ links: description: Learn MLOps CI/CD and model monitoring to scale reliable deployments, accelerate delivery, ensure reproducibility, and drive model adoption in production. intro: 'How do you run MLOps at scale so models stay deployed, reproducible, and actually - adopted? In this episode Raphaël Hoogvliets—who leads a 12‑engineer team at Eneco + adopted? In this episode Raphaël Hoogvliets—who leads a 12-engineer team at Eneco and brings a career arc from agriculture into data science and MLOps—walks through practical approaches for CI/CD for ML, reproducibility, model monitoring, and adoption - strategy.

We cover the core trade‑offs between speed and robustness, design - choices for long‑term maintainability, and the team coordination needed to scale + strategy.

We cover the core trade-offs between speed and robustness, design + choices for long-term maintainability, and the team coordination needed to scale ML: evangelists, tech translators, and technical leads. Raphaël explains why a centralized MLOps platform team often works as an enabling layer, how MLOps should support product teams, and how to drive adoption through iteration, feedback loops, and developer @@ -51,7 +51,7 @@ quotableClips: startOffset: 521 url: https://youtube.com/watch?v=rMq63r3zi4c&t=521 endOffset: 636 -- name: Design Choices and Long‑Term Tradeoffs in ML projects +- name: Design Choices and Long-Term Tradeoffs in ML projects startOffset: 636 url: https://youtube.com/watch?v=rMq63r3zi4c&t=636 endOffset: 817 @@ -267,7 +267,7 @@ transcript: sec: 553 time: '9:13' who: Raphaël -- header: Design Choices and Long‑Term Tradeoffs in ML projects +- header: Design Choices and Long-Term Tradeoffs in ML projects - line: 'Back to MLOps — your LinkedIn profile has an interesting tagline: “Creating the future’s technical debt today.” What does that mean?' sec: 636 diff --git a/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md index 76956b35..ed451cdf 100644 --- a/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md +++ b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/feature-stores-cutting-through-the-hype-willem-pienaar/id1541710331?i=1000508782957 description: Discover feature store use cases, real-time features with Feast & Tecton, build scalable MLOps to speed production, cut duplication and detect drift -intro: How do you reliably build and serve real‑time features for production ML without rework, duplication, or training/serving skew? In this episode, Willem Pienaar — engineering lead at Tecton and creator of Feast — walks through what feature stores solve in MLOps and how they enable real‑time feature engineering. We define feature stores, compare feature creation vs retrieval (SQL, Python, APIs, on‑demand transforms), and illustrate a production real‑time fraud detection lookup. Willem separates hype from value, explains organizational challenges like team silos and speed to production, and outlines the platform role across materialization, serving, and validation.

You’ll get practical coverage of Feast (open‑source) and Tecton (enterprise), architecture components (transform engine, storage, serving, registry, monitoring), and when online tabular use cases require a feature store versus when it’s overkill. The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch (Flink, Spark), validation and monitoring (drift detection, Great Expectations, TFDV), backfilling strategies, ownership and governance, and getting started resources (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete next steps for productionizing features in your MLOps stack +intro: How do you reliably build and serve real-time features for production ML without rework, duplication, or training/serving skew? In this episode, Willem Pienaar — engineering lead at Tecton and creator of Feast — walks through what feature stores solve in MLOps and how they enable real-time feature engineering. We define feature stores, compare feature creation vs retrieval (SQL, Python, APIs, on-demand transforms), and illustrate a production real-time fraud detection lookup. Willem separates hype from value, explains organizational challenges like team silos and speed to production, and outlines the platform role across materialization, serving, and validation.

You’ll get practical coverage of Feast (open-source) and Tecton (enterprise), architecture components (transform engine, storage, serving, registry, monitoring), and when online tabular use cases require a feature store versus when it’s overkill. The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch (Flink, Spark), validation and monitoring (drift detection, Great Expectations, TFDV), backfilling strategies, ownership and governance, and getting started resources (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete next steps for productionizing features in your MLOps stack topics: - machine learning - MLOps @@ -42,11 +42,11 @@ quotableClips: startOffset: 660 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=660 endOffset: 870 -- name: 'Feature Creation vs Retrieval: SQL, Python, APIs, and On‑Demand Transforms' +- name: 'Feature Creation vs Retrieval: SQL, Python, APIs, and On-Demand Transforms' startOffset: 870 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=870 endOffset: 990 -- name: 'Production Example: Real‑Time Fraud Detection Feature Lookup' +- name: 'Production Example: Real-Time Fraud Detection Feature Lookup' startOffset: 990 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=990 endOffset: 1110 @@ -66,7 +66,7 @@ quotableClips: startOffset: 1680 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=1680 endOffset: 1890 -- name: 'Feast Overview: Open‑Source Feature Store Design and Use Cases' +- name: 'Feast Overview: Open-Source Feature Store Design and Use Cases' startOffset: 1890 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=1890 endOffset: 2040 @@ -86,7 +86,7 @@ quotableClips: startOffset: 2550 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=2550 endOffset: 2700 -- name: 'Streaming vs Batch: Flink, Spark, and Real‑Time Feature Engineering' +- name: 'Streaming vs Batch: Flink, Spark, and Real-Time Feature Engineering' startOffset: 2700 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=2700 endOffset: 2850 diff --git a/_podcast/mlops-kubeflow-model-monitoring.md b/_podcast/mlops-kubeflow-model-monitoring.md index 0f8b9862..a4701d73 100644 --- a/_podcast/mlops-kubeflow-model-monitoring.md +++ b/_podcast/mlops-kubeflow-model-monitoring.md @@ -67,7 +67,7 @@ quotableClips: startOffset: 997 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=997 endOffset: 1208 -- name: 'The "MLOps Engineer" Debate: Title vs. Cross‑Functional Teams' +- name: 'The "MLOps Engineer" Debate: Title vs. Cross-Functional Teams' startOffset: 1208 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1208 endOffset: 1427 @@ -79,7 +79,7 @@ quotableClips: startOffset: 1621 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1621 endOffset: 1808 -- name: 'Advanced Maturity: Data‑Driven Triggers and Automated Retraining' +- name: 'Advanced Maturity: Data-Driven Triggers and Automated Retraining' startOffset: 1808 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1808 endOffset: 2007 @@ -107,7 +107,7 @@ quotableClips: startOffset: 2608 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2608 endOffset: 2761 -- name: 'Getting Started: Cloud‑Managed Pipelines and Simple Projects' +- name: 'Getting Started: Cloud-Managed Pipelines and Simple Projects' startOffset: 2761 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2761 endOffset: 2818 @@ -147,7 +147,7 @@ quotableClips: startOffset: 3684 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3684 endOffset: 3776 -- name: 'Breaking Silos: Language‑Agnostic Pipelines and Collaboration' +- name: 'Breaking Silos: Language-Agnostic Pipelines and Collaboration' startOffset: 3776 url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3776 endOffset: 3899 @@ -479,7 +479,7 @@ transcript: sec: 1191 time: '19:51' who: 'Alexey:' -- header: 'The "MLOps Engineer" Debate: Title vs. Cross‑Functional Teams' +- header: 'The "MLOps Engineer" Debate: Title vs. Cross-Functional Teams' - line: I don't think that it will become a title. Maybe people will have it because it's a fancy word now, like data scientist was 10 years ago. But I don't think this will become a role of a department. If it does, that would be a shame because @@ -652,7 +652,7 @@ transcript: sec: 1805 time: '30:05' who: 'Alexey:' -- header: 'Advanced Maturity: Data‑Driven Triggers and Automated Retraining' +- header: 'Advanced Maturity: Data-Driven Triggers and Automated Retraining' - line: Yes, it’s manual. But maybe it automatically adds your new features from your feature store, or if you have some data versioning system, and etc. So, that's the maturity level one. Then there is the visual – the ultimate goal that we should @@ -883,7 +883,7 @@ transcript: sec: 2712 time: '45:12' who: 'Alexey:' -- header: 'Getting Started: Cloud‑Managed Pipelines and Simple Projects' +- header: 'Getting Started: Cloud-Managed Pipelines and Simple Projects' - line: Yeah. sec: 2793 time: '46:33' @@ -1208,7 +1208,7 @@ transcript: sec: 3753 time: '1:02:33' who: 'Alexey:' -- header: 'Breaking Silos: Language‑Agnostic Pipelines and Collaboration' +- header: 'Breaking Silos: Language-Agnostic Pipelines and Collaboration' - line: Well, the good news is that these things are language-agnostic. Of course, there shouldn't be silos in the organization, especially based on language. At least the development departments should be working together. But the tooling diff --git a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md index 23ba422d..12ff0762 100644 --- a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md +++ b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md @@ -21,18 +21,18 @@ description: 'Learn vector databases, LLMs & semantic retrieval: RAG, embeddings intro: How do modern search systems combine vector databases, LLMs, and semantic retrieval to deliver relevant, reliable results—and when should you adopt each component? In this episode Atita Arora walks through that question from both historical and - practical angles. A long‑time contributor to information retrieval projects (including - Apache OpenNLP and Quepid) and author of posts on vectors in e‑commerce and the - open‑source Chorus implementation, Atita brings hands‑on experience plus ongoing - research into evaluating RAG systems and a commitment to user‑centric metrics and + practical angles. A long-time contributor to information retrieval projects (including + Apache OpenNLP and Quepid) and author of posts on vectors in e-commerce and the + open-source Chorus implementation, Atita brings hands-on experience plus ongoing + research into evaluating RAG systems and a commitment to user-centric metrics and inclusivity.

We cover the evolution from Solr/Lucene and the Semantic Web - era to NLP for query‑content matching; practical vector topics such as Qdrant, plug‑and‑play - vector search, and migration tradeoffs; and end‑to‑end RAG pipelines—Whisper transcripts, + era to NLP for query-content matching; practical vector topics such as Qdrant, plug-and-play + vector search, and migration tradeoffs; and end-to-end RAG pipelines—Whisper transcripts, chunking and embedding strategies, LangChain orchestration, prompt design, citations, - and multi‑level evaluation with human‑in‑the‑loop testing. You’ll also hear about - session‑based recommendations, personalization approaches, and curated learning + and multi-level evaluation with human-in-the-loop testing. You’ll also hear about + session-based recommendations, personalization approaches, and curated learning resources like Intro to Information Retrieval and Vector Hub. Listen to gain actionable - guidance on building and evaluating vector search and retrieval‑augmented generation + guidance on building and evaluating vector search and retrieval-augmented generation systems while avoiding common pitfalls like LLM hallucinations. dateadded: 2024-01-07 duration: PT00H59M13S @@ -57,7 +57,7 @@ quotableClips: startOffset: 689 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=689 endOffset: 1021 -- name: 'Vector databases overview: Qdrant and plug‑and‑play vector search' +- name: 'Vector databases overview: Qdrant and plug-and-play vector search' startOffset: 1021 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1021 endOffset: 1227 @@ -65,7 +65,7 @@ quotableClips: startOffset: 1227 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1227 endOffset: 1380 -- name: 'Evolution of search: NLP, personalization, learning‑to‑rank and LLMs' +- name: 'Evolution of search: NLP, personalization, learning-to-rank and LLMs' startOffset: 1380 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1380 endOffset: 1838 @@ -89,19 +89,19 @@ quotableClips: startOffset: 2569 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2569 endOffset: 2889 -- name: 'RAG evaluation: multi‑level metrics, offline tests and human‑in‑the‑loop' +- name: 'RAG evaluation: multi-level metrics, offline tests and human-in-the-loop' startOffset: 2889 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2889 endOffset: 3052 -- name: 'Evaluation reading: Human‑in‑the‑Loop and practical methodologies' +- name: 'Evaluation reading: Human-in-the-Loop and practical methodologies' startOffset: 3052 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3052 endOffset: 3127 -- name: 'Vector databases for ML: session‑based recommendations and re‑ranking' +- name: 'Vector databases for ML: session-based recommendations and re-ranking' startOffset: 3127 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3127 endOffset: 3294 -- name: 'Personalization approaches: session‑based vs collaborative filtering' +- name: 'Personalization approaches: session-based vs collaborative filtering' startOffset: 3294 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3294 endOffset: 3470 @@ -110,7 +110,7 @@ quotableClips: startOffset: 3470 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3470 endOffset: 3624 -- name: Episode wrap‑up, links and next steps +- name: Episode wrap-up, links and next steps startOffset: 3624 url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3624 endOffset: 3553 @@ -383,7 +383,7 @@ transcript: sec: 813 time: '13:33' who: Atita -- header: 'Vector databases overview: Qdrant and plug‑and‑play vector search' +- header: 'Vector databases overview: Qdrant and plug-and-play vector search' - line: Qdrant is a vector database, right? sec: 1021 time: '17:01' @@ -504,7 +504,7 @@ transcript: sec: 1244 time: '20:44' who: Atita -- header: 'Evolution of search: NLP, personalization, learning‑to‑rank and LLMs' +- header: 'Evolution of search: NLP, personalization, learning-to-rank and LLMs' - line: As somebody who has worked for 15 years in this area, you probably started with creating indices for Lucene in something similar to MapReduce without Hadoop in there. Now it has changed significantly since then. So now we’re talking about @@ -948,7 +948,7 @@ transcript: sec: 2887 time: '48:07' who: Atita -- header: 'RAG evaluation: multi‑level metrics, offline tests and human‑in‑the‑loop' +- header: 'RAG evaluation: multi-level metrics, offline tests and human-in-the-loop' - line: And then we were talking about evaluation because, right now, I have this RAG system with all the podcasts transcripts, but now I want to see if it's working fine. I can, of course, go ahead and test it – make 3, 4, 5 queries and then see, @@ -985,7 +985,7 @@ transcript: sec: 2954 time: '49:14' who: Atita -- header: 'Evaluation reading: Human‑in‑the‑Loop and practical methodologies' +- header: 'Evaluation reading: Human-in-the-Loop and practical methodologies' - line: So there's a book you mentioned, Human in the Loop. It's a book from Manning, right? [Atita agrees] It’s by Robert Monarch. sec: 3052 @@ -1023,7 +1023,7 @@ transcript: sec: 3122 time: '52:02' who: Alexey -- header: 'Vector databases for ML: session‑based recommendations and re‑ranking' +- header: 'Vector databases for ML: session-based recommendations and re-ranking' - line: I noticed that we have a question. The question is from Taras. Taras is asking, “Is there any application of vector databases for machine learning? For instance, could it be used for making the training of deep learning models faster? Maybe @@ -1070,7 +1070,7 @@ transcript: sec: 3288 time: '54:48' who: Atita -- header: 'Personalization approaches: session‑based vs collaborative filtering' +- header: 'Personalization approaches: session-based vs collaborative filtering' - line: What you mentioned is… With collaborative filtering, we would need to re-do the whole thing, right? Then the vectors we do from another training will be super different from the first training. What you mentioned right now with clicks updating @@ -1203,7 +1203,7 @@ transcript: sec: 3621 time: '1:00:21' who: Atita -- header: Episode wrap‑up, links and next steps +- header: Episode wrap-up, links and next steps - line: Yes. So please make a post when you publish that evaluation article. With that, I think, that's all we have time for today. Atita, thanks a lot for joining us today. @@ -1237,10 +1237,10 @@ transcript: time: '1:01:08' who: Atita context: 'Search today is less about keywords and more about constructing a reliable - retrieval‑plus‑generation system: the core through‑line is that effective modern + retrieval-plus-generation system: the core through-line is that effective modern search combines classical IR principles (indexing, ranking, evaluation) with semantic vector representations, embedding stores or vector databases, and LLMs—stitched - together by careful ingestion, orchestration, prompt design, and human‑in‑the‑loop + together by careful ingestion, orchestration, prompt design, and human-in-the-loop evaluation—to deliver accurate, contextualized, and personalized answers.' --- Links: diff --git a/_podcast/nlp-dataset-creation-annotation-tools-workflows.md b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md index 6ade50e1..d7d64782 100644 --- a/_podcast/nlp-dataset-creation-annotation-tools-workflows.md +++ b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=QggWydGrWoo description: 'Discover dataset creation, annotation & active learning: practical annotation UX, quality metrics, prototyping tips and tooling to accelerate NLP models.' -intro: How do you create high‑quality NLP datasets without breaking the budget? In this episode Christiaan Swart — an NLP practitioner with six years’ experience across email, complaints, pharma, and sales who cofounded Comtura (born from sales call transcription and CRM integration) — walks through practical methods for dataset creation and annotation.

We cover automated, manual, and hybrid pipelines; stakeholder alignment to de‑risk projects; in‑house vs. crowdsourcing trade‑offs; and building a living annotation guidebook for ambiguous cases. Chris explains model‑assisted annotation (pre‑labeling and interpretability layers), capturing expert knowledge, establishing human baselines, and improving annotation UX and productivity. You’ll also hear about annotation quality metrics (inter‑annotator agreement, throughput, fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, Label Studio, Snorkel, and Rubrics. Quick‑start tips using IPython widgets and Fast.ai, plus privacy and multilingual considerations (GDPR, anonymization), round out the conversation.

Listen to learn actionable strategies for cost‑effective dataset creation, annotation workflows, and tool choices that speed model development and produce reliable training data +intro: How do you create high-quality NLP datasets without breaking the budget? In this episode Christiaan Swart — an NLP practitioner with six years’ experience across email, complaints, pharma, and sales who cofounded Comtura (born from sales call transcription and CRM integration) — walks through practical methods for dataset creation and annotation.

We cover automated, manual, and hybrid pipelines; stakeholder alignment to de-risk projects; in-house vs. crowdsourcing trade-offs; and building a living annotation guidebook for ambiguous cases. Chris explains model-assisted annotation (pre-labeling and interpretability layers), capturing expert knowledge, establishing human baselines, and improving annotation UX and productivity. You’ll also hear about annotation quality metrics (inter-annotator agreement, throughput, fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, Label Studio, Snorkel, and Rubrics. Quick-start tips using IPython widgets and Fast.ai, plus privacy and multilingual considerations (GDPR, anonymization), round out the conversation.

Listen to learn actionable strategies for cost-effective dataset creation, annotation workflows, and tool choices that speed model development and produce reliable training data topics: - NLP - data @@ -33,7 +33,7 @@ quotableClips: startOffset: 82 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=82 endOffset: 144 -- name: Guest Background & Career in NLP and bio‑NLP +- name: Guest Background & Career in NLP and bio-NLP startOffset: 144 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=144 endOffset: 312 @@ -45,11 +45,11 @@ quotableClips: startOffset: 411 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=411 endOffset: 542 -- name: 'Stakeholder Alignment: Top‑down framing to de‑risk projects' +- name: 'Stakeholder Alignment: Top-down framing to de-risk projects' startOffset: 542 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=542 endOffset: 939 -- name: 'Annotation Strategy: In‑house vs. crowdsourcing trade‑offs' +- name: 'Annotation Strategy: In-house vs. crowdsourcing trade-offs' startOffset: 939 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=939 endOffset: 1116 @@ -57,7 +57,7 @@ quotableClips: startOffset: 1116 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1116 endOffset: 1257 -- name: 'Model‑Assisted Annotation: Pre‑labeling and interpretability layers' +- name: 'Model-Assisted Annotation: Pre-labeling and interpretability layers' startOffset: 1257 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1257 endOffset: 1441 @@ -73,7 +73,7 @@ quotableClips: startOffset: 2102 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2102 endOffset: 2262 -- name: 'Annotation Quality Metrics: Inter‑annotator agreement, throughput, fatigue' +- name: 'Annotation Quality Metrics: Inter-annotator agreement, throughput, fatigue' startOffset: 2262 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2262 endOffset: 2571 @@ -97,7 +97,7 @@ quotableClips: startOffset: 3154 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3154 endOffset: 3438 -- name: 'Quick‑start Collection: IPython widgets and Fast.ai for beginners' +- name: 'Quick-start Collection: IPython widgets and Fast.ai for beginners' startOffset: 3438 url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3438 endOffset: 3506 @@ -131,7 +131,7 @@ transcript: sec: 139 time: '2:19' who: Christiaan -- header: Guest Background & Career in NLP and bio‑NLP +- header: Guest Background & Career in NLP and bio-NLP - line: Before we go into our main topic of dataset creation and curation, let's start with your background. Can you tell us about your career journey so far? sec: 144 @@ -245,7 +245,7 @@ transcript: sec: 489 time: '8:09' who: Alexey -- header: 'Stakeholder Alignment: Top‑down framing to de‑risk projects' +- header: 'Stakeholder Alignment: Top-down framing to de-risk projects' - line: Yeah, I think this is the bottom-up view. But I think the top-down view is what I think most data scientists struggle with, actually. And I think I've made most of my mistakes from having this kind of bottom-up view rather than a more @@ -345,7 +345,7 @@ transcript: sec: 542 time: '9:02' who: Christiaan -- header: 'Annotation Strategy: In‑house vs. crowdsourcing trade‑offs' +- header: 'Annotation Strategy: In-house vs. crowdsourcing trade-offs' - line: Yeah, that was quite a lot to unpack – a lot of information. Let me try to summarize. I probably missed a few of the very important bits. When it comes to the process of actually collecting data, first of all, we need to have the process. @@ -434,7 +434,7 @@ transcript: sec: 1133 time: '18:53' who: Christiaan -- header: 'Model‑Assisted Annotation: Pre‑labeling and interpretability layers' +- header: 'Model-Assisted Annotation: Pre-labeling and interpretability layers' - line: This pre-labeling, I think I saw a tool that does something like this. Correct me if I'm wrong. We present a piece of a document and ask annotators to label it, right? It can be a part from the sales call and we say, “Okay, based on the @@ -708,7 +708,7 @@ transcript: sec: 2197 time: '36:37' who: Alexey -- header: 'Annotation Quality Metrics: Inter‑annotator agreement, throughput, fatigue' +- header: 'Annotation Quality Metrics: Inter-annotator agreement, throughput, fatigue' - line: Yeah, I think annotation user experience is massive and it's also measurable. I'm a huge fan of this whole annotation process. You can have a very quantitative and database approach to how you measure the impact of these things. For example, @@ -1002,7 +1002,7 @@ transcript: sec: 3221 time: '53:41' who: Christiaan -- header: 'Quick‑start Collection: IPython widgets and Fast.ai for beginners' +- header: 'Quick-start Collection: IPython widgets and Fast.ai for beginners' - line: In my personal experience, you can just start using IPython widgets, like widgets in Jupyter Notebook. It's super easy to start with. It's not as advanced as Snorkel or Prodigy, but if you need some binary classification case, then you diff --git a/_podcast/nlp-team-hiring-and-production-mlops.md b/_podcast/nlp-team-hiring-and-production-mlops.md index 3617a6e7..4742799a 100644 --- a/_podcast/nlp-team-hiring-and-production-mlops.md +++ b/_podcast/nlp-team-hiring-and-production-mlops.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/leading-nlp-teams-ivan-bilan/id1541710331?i=1000546053682 description: Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 & spaCy tactics to deploy, monitor and scale reliable LLM systems -intro: How do you structure an NLP team and build reliable production pipelines while weighing the tradeoffs between GPT‑3 and in‑house models? In this episode, Ivan Bilan, Engineering Manager at Personio working on Identity and Access Management, walks through practical answers from his transition from linguistics to production NLP and MLOps.

We cover hiring and team models (centralized vs cross‑disciplinary), what to look for in NLP engineers (tokenization, linguistics, deployment skills), and when to bring in linguists or conversational designers. Ivan breaks down the anatomy of an NLP production pipeline—data annotation, task engineering, testing, deployment, observability—and contrasts using GPT‑3 with building in‑house pipelines and open‑source tools like spaCy and Hugging Face for MVPs. He discusses inference optimization, privacy and bias risks with large language models, benchmarking limits, and practical microservice patterns for data‑intensive apps.

Listen to learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production +intro: How do you structure an NLP team and build reliable production pipelines while weighing the tradeoffs between GPT-3 and in-house models? In this episode, Ivan Bilan, Engineering Manager at Personio working on Identity and Access Management, walks through practical answers from his transition from linguistics to production NLP and MLOps.

We cover hiring and team models (centralized vs cross-disciplinary), what to look for in NLP engineers (tokenization, linguistics, deployment skills), and when to bring in linguists or conversational designers. Ivan breaks down the anatomy of an NLP production pipeline—data annotation, task engineering, testing, deployment, observability—and contrasts using GPT-3 with building in-house pipelines and open-source tools like spaCy and Hugging Face for MVPs. He discusses inference optimization, privacy and bias risks with large language models, benchmarking limits, and practical microservice patterns for data-intensive apps.

Listen to learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production topics: - NLP - machine learning @@ -59,7 +59,7 @@ quotableClips: startOffset: 714 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=714 endOffset: 847 -- name: 'Defining NLP Teams: Centralized vs Cross‑disciplinary Structures' +- name: 'Defining NLP Teams: Centralized vs Cross-disciplinary Structures' startOffset: 847 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=847 endOffset: 1005 @@ -79,7 +79,7 @@ quotableClips: startOffset: 1476 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1476 endOffset: 1579 -- name: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non‑coding Roles' +- name: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non-coding Roles' startOffset: 1579 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1579 endOffset: 1718 @@ -100,15 +100,15 @@ quotableClips: startOffset: 2097 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2097 endOffset: 2335 -- name: 'Large Language Models & Prompting: GPT‑3 Capabilities and Simplification' +- name: 'Large Language Models & Prompting: GPT-3 Capabilities and Simplification' startOffset: 2335 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2335 endOffset: 2585 -- name: 'GPT‑3 Limitations: Cost, Control, Bias & Privacy Risks' +- name: 'GPT-3 Limitations: Cost, Control, Bias & Privacy Risks' startOffset: 2585 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2585 endOffset: 2770 -- name: 'GPT‑3 vs In‑house Pipelines: MVP Strategy, Control & Open‑Source Alternatives' +- name: 'GPT-3 vs In-house Pipelines: MVP Strategy, Control & Open-Source Alternatives' startOffset: 2770 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2770 endOffset: 2919 @@ -116,7 +116,7 @@ quotableClips: startOffset: 2919 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2919 endOffset: 3177 -- name: 'AI Benchmarking: Human‑level Claims, Dataset Limits & Real‑world Gaps' +- name: 'AI Benchmarking: Human-level Claims, Dataset Limits & Real-world Gaps' startOffset: 3177 url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3177 endOffset: 3225 @@ -359,7 +359,7 @@ transcript: sec: 816 time: '13:36' who: Alexey -- header: 'Defining NLP Teams: Centralized vs Cross‑disciplinary Structures' +- header: 'Defining NLP Teams: Centralized vs Cross-disciplinary Structures' - line: Yeah, good question. It's more of an industry question. Do we even have separate designation for NLP teams? I think maybe a few years ago, this wasn't the case. You would just have a data science team and everything data science is done there @@ -574,7 +574,7 @@ transcript: sec: 1489 time: '24:49' who: Alexey -- header: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non‑coding Roles' +- header: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non-coding Roles' - line: Yeah, for sure. There are some specific tasks that would really benefit from that. I think that in the last two years, there was a new role forming in the world of data science, called “conversational designer”. It's basically a person @@ -776,7 +776,7 @@ transcript: sec: 2325 time: '38:45' who: Alexey -- header: 'Large Language Models & Prompting: GPT‑3 Capabilities and Simplification' +- header: 'Large Language Models & Prompting: GPT-3 Capabilities and Simplification' - line: Yeah, GPT-3 is on a whole different level. You don't need to do anything, really. The idea of GPT-3 is that it’s a smart lookup table. It has seen, I think, like 10% of the whole internet. That's what the data set was used to train it. @@ -868,7 +868,7 @@ transcript: sec: 2577 time: '42:57' who: Alexey -- header: 'GPT‑3 Limitations: Cost, Control, Bias & Privacy Risks' +- header: 'GPT-3 Limitations: Cost, Control, Bias & Privacy Risks' - line: Yeah. I mean, I don't know. They are trying to open source it now or something. I don't know. But I think you still have to pay for tokens in order to be able to use it. @@ -941,7 +941,7 @@ transcript: sec: 2750 time: '45:50' who: Alexey -- header: 'GPT‑3 vs In‑house Pipelines: MVP Strategy, Control & Open‑Source Alternatives' +- header: 'GPT-3 vs In-house Pipelines: MVP Strategy, Control & Open-Source Alternatives' - line: I would say, yes, because GPT-3 still isn't able to solve everything. It is able to solve most of the tasks to a good extent. But the question is, “Can it actually solve everything you need for it to be used in production – for it to @@ -1047,7 +1047,7 @@ transcript: sec: 3160 time: '52:40' who: Alexey -- header: 'AI Benchmarking: Human‑level Claims, Dataset Limits & Real‑world Gaps' +- header: 'AI Benchmarking: Human-level Claims, Dataset Limits & Real-world Gaps' - line: Yeah, I don't think so. I don't think there is any problem we have fully solved. There are papers that state something like “AI models are as good as humans” or “better than humans”. But this is all evaluated on a very small subset of data. diff --git a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md index 58821a95..55c797f4 100644 --- a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md +++ b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md @@ -17,9 +17,9 @@ links: description: Learn open source volunteering tactics for AI projects - data sourcing, hackathon MVP strategy, mentorship and portfolio-building to accelerate career momentum. intro: How can volunteering in open source AI projects accelerate your career while - delivering tangible community impact? In this episode Sara El‑Ateif — Google Developer - Expert in Machine Learning, Google PhD Fellow, co‑founder of AI Wonder Girls and - Evercoach‑certified business coach — walks through practical ways to build skills + delivering tangible community impact? In this episode Sara El-Ateif — Google Developer + Expert in Machine Learning, Google PhD Fellow, co-founder of AI Wonder Girls and + Evercoach-certified business coach — walks through practical ways to build skills and momentum through volunteering and open source work.

We cover Sara’s path from early AI interest to PhD research in multimodal learning and medical imaging, plus lessons from winning a Google PhD Fellowship. Hear concrete volunteer project @@ -27,7 +27,7 @@ intro: How can volunteering in open source AI projects accelerate your career wh — and learn data sourcing tactics using Open Images and creative collection. Sara explains how to find opportunities (LinkedIn, social media, mailing lists, WIML), differences between collaboration platforms like Omdena and Fruit Punch AI, and - how women‑led groups structure projects.

Listeners will get actionable + how women-led groups structure projects.

Listeners will get actionable advice on hackathon strategy, MVP mindset under data/compute constraints, pitching for volunteer roles, building a research network, and the data engineering tasks that matter (pipelines, dashboards, prep). Tune in to discover how open source and diff --git a/_podcast/open-source-ml-tools-strategy-and-business-models.md b/_podcast/open-source-ml-tools-strategy-and-business-models.md index aebcf554..7bdd5cf9 100644 --- a/_podcast/open-source-ml-tools-strategy-and-business-models.md +++ b/_podcast/open-source-ml-tools-strategy-and-business-models.md @@ -21,13 +21,13 @@ intro: How can open source ML tools stay healthy, useful, and financially sustai Research Advocate at Rasa, author of the Koaning blog, creator of the Algorithm Whiteboard playlist, and cofounder of Calm Code — walks through the real-world tradeoffs of scikit-learn governance, sustainability, and business models for ML tooling. -

We dig into scikit-learn’s history, NumFOCUS relationships, and the plugin‑versus‑core +

We dig into scikit-learn’s history, NumFOCUS relationships, and the plugin-versus-core strategy; practical maintainer issues like transitions, motivating volunteers, and using open source contributions as hiring signals; and the intersection of developer - relations and core engineering. Vincent also explores Calm Code’s low‑pressure teaching + relations and core engineering. Vincent also explores Calm Code’s low-pressure teaching philosophy, content and monetization choices, and platform decisions (Django, contributor hiring). Technical operations topics include CI cost optimization with custom runners - and sustainable compute examples (Leaf.cloud), plus hands‑on projects like Skrub’s + and sustainable compute examples (Leaf.cloud), plus hands-on projects like Skrub’s table vectorizer and GAP encoder for pragmatic tabular defaults.

Listeners will gain actionable insights on governance models, maintaining project health, and realistic business options — training, consulting, and partnerships — for anyone @@ -1006,12 +1006,12 @@ context: 'Context: This episode surveys the Scikit-Learn ecosystem, related proj career stories, governance, tooling choices, content production, maintainer handoffs, CI/cost concerns, and early business models. - Core unifying idea: Long-term health and impact of open-source machine‑learning + Core unifying idea: Long-term health and impact of open-source machine-learning projects depends not just on great code but on a deliberate integration of engineering excellence, community stewardship, accessible education, and sustainable operational/business practices — i.e., building pragmatic tools and clear learning paths while creating - incentives (training, consulting, platform models, cost‑efficient infrastructure, - and low‑pressure contributor experiences) that enable maintainers and contributors + incentives (training, consulting, platform models, cost-efficient infrastructure, + and low-pressure contributor experiences) that enable maintainers and contributors to keep projects useful, adoptable, and durable.' --- Links: diff --git a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md index 52f64ecf..8db1d103 100644 --- a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md +++ b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md @@ -46,7 +46,7 @@ quotableClips: startOffset: 189 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=189 endOffset: 219 -- name: 'Career Progression: Consulting, In‑house Roles, and Platform Data' +- name: 'Career Progression: Consulting, In-house Roles, and Platform Data' startOffset: 219 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=219 endOffset: 397 @@ -58,11 +58,11 @@ quotableClips: startOffset: 416 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=416 endOffset: 641 -- name: 'Amazon Research: Customer‑Focused, Production‑Oriented Work' +- name: 'Amazon Research: Customer-Focused, Production-Oriented Work' startOffset: 641 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=641 endOffset: 702 -- name: Using State‑of‑the‑Art Models and Publishing in Industry Tracks +- name: Using State-of-the-Art Models and Publishing in Industry Tracks startOffset: 702 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=702 endOffset: 986 @@ -78,7 +78,7 @@ quotableClips: startOffset: 1147 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1147 endOffset: 1391 -- name: 'Model‑in‑the‑Loop Annotation Study: Design and Rationale' +- name: 'Model-in-the-Loop Annotation Study: Design and Rationale' startOffset: 1391 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1391 endOffset: 1520 @@ -86,7 +86,7 @@ quotableClips: startOffset: 1520 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1520 endOffset: 1667 -- name: Model Evaluation Strategy and Stabilizing High‑Traffic Utterances +- name: Model Evaluation Strategy and Stabilizing High-Traffic Utterances startOffset: 1667 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1667 endOffset: 1903 @@ -94,7 +94,7 @@ quotableClips: startOffset: 1903 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1903 endOffset: 1927 -- name: 'Service Offerings: Generative AI Workshops and Use‑Case Discovery' +- name: 'Service Offerings: Generative AI Workshops and Use-Case Discovery' startOffset: 1927 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1927 endOffset: 2135 @@ -102,7 +102,7 @@ quotableClips: startOffset: 2135 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2135 endOffset: 2275 -- name: 'Self‑Employment Realities: Taxes, Health Insurance, and Admin' +- name: 'Self-Employment Realities: Taxes, Health Insurance, and Admin' startOffset: 2275 url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2275 endOffset: 2343 @@ -209,7 +209,7 @@ transcript: sec: 217 time: '3:37' who: Alexey -- header: 'Career Progression: Consulting, In‑house Roles, and Platform Data' +- header: 'Career Progression: Consulting, In-house Roles, and Platform Data' - line: I did the Master's in Berlin, yeah. This was a joint statistics program from the Free University, Humboldt University, and the Technical University. They have this Joint Master Program. Basically, I discovered data science and machine learning @@ -321,7 +321,7 @@ transcript: sec: 605 time: '10:05' who: Verena -- header: 'Amazon Research: Customer‑Focused, Production‑Oriented Work' +- header: 'Amazon Research: Customer-Focused, Production-Oriented Work' - line: You said that research at Amazon is not the same as research in academia. So what does research at Amazon look like? What do you actually do there? sec: 641 @@ -343,7 +343,7 @@ transcript: sec: 693 time: '11:33' who: Alexey -- header: Using State‑of‑the‑Art Models and Publishing in Industry Tracks +- header: Using State-of-the-Art Models and Publishing in Industry Tracks - line: Yeah. Basically, before you start, there's three things you need to look into. First, “What is the customer problem? How do we define the problem clearly?” Then, second, “What are the possible solutions?” and, “How are they going to solve the @@ -567,7 +567,7 @@ transcript: sec: 1323 time: '22:03' who: Verena -- header: 'Model‑in‑the‑Loop Annotation Study: Design and Rationale' +- header: 'Model-in-the-Loop Annotation Study: Design and Rationale' - line: I'm looking at your Google Scholar, and there is one paper with six citations that you published in 2021, which is, “Is it better to verify semi-supervised learning with a human in the loop or large scale NLU models?” It’s a long name. @@ -656,7 +656,7 @@ transcript: sec: 1662 time: '27:42' who: Verena -- header: Model Evaluation Strategy and Stabilizing High‑Traffic Utterances +- header: Model Evaluation Strategy and Stabilizing High-Traffic Utterances - line: How do you evaluate the performance of these models? I guess, you send it to the annotators – they say “Yes, no.” Right? sec: 1667 @@ -769,7 +769,7 @@ transcript: sec: 1903 time: '31:43' who: Alexey -- header: 'Service Offerings: Generative AI Workshops and Use‑Case Discovery' +- header: 'Service Offerings: Generative AI Workshops and Use-Case Discovery' - line: Yeah. I don't know if it's… I mean, it is different in the sense that I'm not working on one model anymore. But, of course, there is still going to be quite some overlap in terms of topic. As I said before, my goal is to support companies @@ -885,7 +885,7 @@ transcript: sec: 2274 time: '37:54' who: Verena -- header: 'Self‑Employment Realities: Taxes, Health Insurance, and Admin' +- header: 'Self-Employment Realities: Taxes, Health Insurance, and Admin' - line: When I became self-employed this year, I was very surprised by how expensive health insurance is in Germany. Maybe it's less expensive than in the States, but still, when all these costs (all these taxes) are hidden and being a full-time @@ -1364,12 +1364,12 @@ transcript: who: Alexey context: 'Context: The episode follows a journey from academic foundations in economics, Chinese, and statistics through industry research and platform data roles to independent - generative-AI consultancy, touching on technical approaches (SOTA models, model‑in‑the‑loop + generative-AI consultancy, touching on technical approaches (SOTA models, model-in-the-loop annotation, evaluation), product and business priorities, client acquisition and pitching, entrepreneurship realities, and community support initiatives. Core: The unifying idea is translating deep technical expertise into pragmatic, - production‑oriented generative-AI solutions that deliver measurable business impact—anchored + production-oriented generative-AI solutions that deliver measurable business impact—anchored in rigorous evaluation, stakeholder ownership, evidence-based communication (workshops, decks, case studies), continual learning, and a commitment to accessibility and mentorship while managing the practicalities of running a sustainable freelance diff --git a/_podcast/practical-llm-engineering-and-rag.md b/_podcast/practical-llm-engineering-and-rag.md index 0b2901e3..68b85b86 100644 --- a/_podcast/practical-llm-engineering-and-rag.md +++ b/_podcast/practical-llm-engineering-and-rag.md @@ -17,7 +17,7 @@ links: description: 'Discover LLM engineering and RAG best practices: practical prompting, evaluation methods and deployment workflows to boost accuracy and retrieval.' intro: How do you move from experimentation to reliable, production-ready LLM engineering - and retrieval-augmented generation (RAG)? In this episode Hugo Bowne‑Anderson — + and retrieval-augmented generation (RAG)? In this episode Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime data scientist, educator, and host of Vanishing Gradients — walks through practical patterns for building, evaluating, and scaling real-world LLM workflows.

We cover everyday LLM use cases (summaries, @@ -27,7 +27,7 @@ intro: How do you move from experimentation to reliable, production-ready LLM en quality control, how to design evaluation sets and failure analysis, and concrete chunking strategies (fixed length, sliding windows, context rotation) that unlock RAG performance. He also discusses when to add tooling or agentic capabilities, - a four‑step framework for agents, memory design tradeoffs, and a practical email + a four-step framework for agents, memory design tradeoffs, and a practical email assistant example using the Gmail API plus RAG.

Listen to learn actionable guidance on prioritizing RAG for quick business wins, building debuggable MVPs with logging and traces, and setting up evaluation and monitoring so your LLMs deliver @@ -36,7 +36,7 @@ dateadded: 2025-10-27 date: 2025-11-07 duration: PT01H01M30S quotableClips: -- name: Podcast Kickoff & Hugo Bowne‑Anderson Background +- name: Podcast Kickoff & Hugo Bowne-Anderson Background startOffset: 0 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=0 endOffset: 72 @@ -56,7 +56,7 @@ quotableClips: startOffset: 237 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=237 endOffset: 431 -- name: 'Consulting vs Advisory: Hands‑On Coding and Organizational Advice' +- name: 'Consulting vs Advisory: Hands-On Coding and Organizational Advice' startOffset: 431 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=431 endOffset: 504 @@ -124,15 +124,15 @@ quotableClips: startOffset: 3214 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3214 endOffset: 3381 -- name: 'Four‑Step Framework for Agents: Problem, Start Small, Data, Evaluation' +- name: 'Four-Step Framework for Agents: Problem, Start Small, Data, Evaluation' startOffset: 3381 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3381 endOffset: 3461 -- name: 'Memory Design: Retrieval‑Based Memory vs Multi‑Turn Conversation Memory' +- name: 'Memory Design: Retrieval-Based Memory vs Multi-Turn Conversation Memory' startOffset: 3461 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3461 endOffset: 3655 -- name: 'Episode Wrap‑Up: Key Takeaways, Courses, and Next Steps' +- name: 'Episode Wrap-Up: Key Takeaways, Courses, and Next Steps' startOffset: 3655 url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3655 endOffset: 3690 @@ -1235,13 +1235,13 @@ transcript: sec: 3690 time: '1:01:30' who: Alexey -context: 'Context: This episode surveys practical, hands‑on patterns—RAG, chunking, +context: 'Context: This episode surveys practical, hands-on patterns—RAG, chunking, prompting, generator–evaluator workflows, transcript pipelines, evaluation sets, monitoring, agents, memory design, and developer tooling—drawn from moving models from prototypes into real products across consulting, DevRel, and engineering roles. - Core: The unifying idea is pragmatic, iterative engineering of LLM‑powered systems: - prioritize retrieval‑first solutions that deliver immediate business value, instrument + Core: The unifying idea is pragmatic, iterative engineering of LLM-powered systems: + prioritize retrieval-first solutions that deliver immediate business value, instrument rigorous evaluation and monitoring (gold tests, failure analysis, generator–evaluator), automate pipelines and reproducible workflows, and only escalate to agentic tooling or persistent memory once data, metrics, and clear ROI justify the added complexity—treating diff --git a/_podcast/pragmatic-and-standardized-mlops.md b/_podcast/pragmatic-and-standardized-mlops.md index a2ed6f05..8e0c76fc 100644 --- a/_podcast/pragmatic-and-standardized-mlops.md +++ b/_podcast/pragmatic-and-standardized-mlops.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=q3DTR3Od1MA description: 'Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring to boost reproducibility, deployment reliability, and team productivity.' -intro: 'How do you build pragmatic, standardized MLOps across teams without chasing every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager with roots in econometrics and early work moving from R to Python — tackles MLOps as an organizational challenge, not just a technology problem.

Maria walks through core, actionable topics: building reusable CI/CD and standardized repos, choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines central MLOps responsibilities — infrastructure, registries, deployment patterns, and monitoring — and contrasts centralized platform teams with embedded feature teams and guardrails. You’ll hear practical advice on moving logic out of notebooks into packages and pipelines, conducting maturity assessments (reproducibility, testing, documentation), and securing DevOps buy‑in. The conversation also covers monitoring standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus retail use cases like demand forecasting and personalization.

Listen to learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring — and how to prioritize organizational change to make MLOps work in production.' +intro: 'How do you build pragmatic, standardized MLOps across teams without chasing every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager with roots in econometrics and early work moving from R to Python — tackles MLOps as an organizational challenge, not just a technology problem.

Maria walks through core, actionable topics: building reusable CI/CD and standardized repos, choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines central MLOps responsibilities — infrastructure, registries, deployment patterns, and monitoring — and contrasts centralized platform teams with embedded feature teams and guardrails. You’ll hear practical advice on moving logic out of notebooks into packages and pipelines, conducting maturity assessments (reproducibility, testing, documentation), and securing DevOps buy-in. The conversation also covers monitoring standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus retail use cases like demand forecasting and personalization.

Listen to learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring — and how to prioritize organizational change to make MLOps work in production.' topics: - MLOps dateadded: 2023-09-25 @@ -89,7 +89,7 @@ quotableClips: startOffset: 1626 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1626 endOffset: 1795 -- name: 'Standardization: cookie‑cutter repos, service principals, and Databricks +- name: 'Standardization: cookie-cutter repos, service principals, and Databricks integration' startOffset: 1795 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1795 @@ -99,11 +99,11 @@ quotableClips: startOffset: 2004 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2004 endOffset: 2069 -- name: 'Implementation timeline: technical build vs. organizational buy‑in and permissions' +- name: 'Implementation timeline: technical build vs. organizational buy-in and permissions' startOffset: 2069 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2069 endOffset: 2121 -- name: 'Securing DevOps buy‑in: expose pain, deliver standards, and enable internal +- name: 'Securing DevOps buy-in: expose pain, deliver standards, and enable internal audit' startOffset: 2121 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2121 @@ -112,7 +112,7 @@ quotableClips: startOffset: 2281 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2281 endOffset: 2369 -- name: 'Tool‑agnostic skills: learn fundamentals and stitch tools together end-to-end' +- name: 'Tool-agnostic skills: learn fundamentals and stitch tools together end-to-end' startOffset: 2369 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2369 endOffset: 2573 @@ -128,12 +128,12 @@ quotableClips: startOffset: 2982 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2982 endOffset: 3084 -- name: 'Cross‑brand model: centralized MLOps support for smaller brands and cooperation +- name: 'Cross-brand model: centralized MLOps support for smaller brands and cooperation with large brands' startOffset: 3084 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3084 endOffset: 3245 -- name: 'Learning recommendations: hands‑on projects, MLOps Zoomcamp, and pairing +- name: 'Learning recommendations: hands-on projects, MLOps Zoomcamp, and pairing with engineers' startOffset: 3245 url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3245 @@ -687,7 +687,7 @@ transcript: sec: 1722 time: '28:42' who: Alexey -- header: 'Standardization: cookie‑cutter repos, service principals, and Databricks +- header: 'Standardization: cookie-cutter repos, service principals, and Databricks integration' - line: 'Yeah, I guess so. I think the choice of the tooling is related to standardization. We work in a large corporate organization with 19 brands all over the world – @@ -753,7 +753,7 @@ transcript: sec: 2004 time: '33:24' who: Maria -- header: 'Implementation timeline: technical build vs. organizational buy‑in and +- header: 'Implementation timeline: technical build vs. organizational buy-in and permissions' - line: Amazing. How long did it take to implement this? sec: 2069 @@ -784,7 +784,7 @@ transcript: sec: 2119 time: '35:19' who: Maria -- header: 'Securing DevOps buy‑in: expose pain, deliver standards, and enable internal +- header: 'Securing DevOps buy-in: expose pain, deliver standards, and enable internal audit' - line: Did you do this yourself as a tech lead? [Maria agrees] Do you have any tips on how to address that if somebody is also facing some hesitation from the DevOps @@ -881,7 +881,7 @@ transcript: sec: 2350 time: '39:10' who: Maria -- header: 'Tool‑agnostic skills: learn fundamentals and stitch tools together end-to-end' +- header: 'Tool-agnostic skills: learn fundamentals and stitch tools together end-to-end' - line: What is also interesting, and the reason I'm asking that, is because in our MLOps course, we try to cover the fundamentals. We break down what we think MLOps is into multiple areas, which is something like experiment tracking, machine learning @@ -1108,7 +1108,7 @@ transcript: sec: 3067 time: '51:07' who: Maria -- header: 'Cross‑brand model: centralized MLOps support for smaller brands and cooperation +- header: 'Cross-brand model: centralized MLOps support for smaller brands and cooperation with large brands' - line: Does each of these brands have a separate team – and separate a bunch of teams – for data science and they do data science separately from the rest of the organization? @@ -1157,7 +1157,7 @@ transcript: sec: 3204 time: '53:24' who: Alexey -- header: 'Learning recommendations: hands‑on projects, MLOps Zoomcamp, and pairing +- header: 'Learning recommendations: hands-on projects, MLOps Zoomcamp, and pairing with engineers' - line: There is one question, “What is the course that you take to become an MLOps engineer?” diff --git a/_podcast/production-ml-pipelines-with-aws-and-kafka.md b/_podcast/production-ml-pipelines-with-aws-and-kafka.md index 88000f15..84611467 100644 --- a/_podcast/production-ml-pipelines-with-aws-and-kafka.md +++ b/_podcast/production-ml-pipelines-with-aws-and-kafka.md @@ -15,7 +15,7 @@ links: spotify: https://open.spotify.com/episode/0fFRCAYFCReMxEiq2RDVak apple: https://podcasts.apple.com/us/podcast/build-your-own-data-pipeline-andreas-kretz/id1541710331?i=1000527643914 -description: 'Learn to build data pipelines and deploy ML on AWS: productionize notebooks, cut ops risk, choose cost‑effective serving and orchestration.' +description: 'Learn to build data pipelines and deploy ML on AWS: productionize notebooks, cut ops risk, choose cost-effective serving and orchestration.' intro: 'How do you move models out of notebooks and into reliable production data pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas Kretz — the “Plumber of Data Science” — walks through the practical steps engineers and data scientists need to productionize notebooks and deploy ML systems.

Andreas, a data engineer focused on platform architecture, explains why data engineering demand is rising and why teams should hire both a data scientist and engineer early. We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, message queues). You’ll also hear about inference strategies, SageMaker endpoints vs precomputed predictions, model storage, and operational trade-offs.

Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, selecting tools, and getting from prototype to production with minimal operational risk. Find practical learning paths and project ideas to accelerate your data engineering skills.' topics: - data engineering @@ -63,7 +63,7 @@ quotableClips: startOffset: 1011 url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1011 endOffset: 1094 -- name: 'One-Person Feasibility: Tooling, cloud vs on‑prem, and schema design' +- name: 'One-Person Feasibility: Tooling, cloud vs on-prem, and schema design' startOffset: 1094 url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1094 endOffset: 1265 @@ -131,7 +131,7 @@ quotableClips: startOffset: 3141 url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3141 endOffset: 3292 -- name: 'Hands-on Projects: Build an e‑commerce pipeline; use Kaggle datasets' +- name: 'Hands-on Projects: Build an e-commerce pipeline; use Kaggle datasets' startOffset: 3292 url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3292 endOffset: 3453 @@ -139,7 +139,7 @@ quotableClips: startOffset: 3453 url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3453 endOffset: 3536 -- name: 'Convincing Stakeholders: Build a $0 proof‑of‑concept and quantify ROI' +- name: 'Convincing Stakeholders: Build a $0 proof-of-concept and quantify ROI' startOffset: 3536 url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3536 endOffset: 3725 @@ -406,7 +406,7 @@ transcript: sec: 1053 time: '17:33' who: Andreas -- header: 'One-Person Feasibility: Tooling, cloud vs on‑prem, and schema design' +- header: 'One-Person Feasibility: Tooling, cloud vs on-prem, and schema design' - line: That seems like a lot of work. Something that a data scientist – just one person – probably cannot really implement on his or her own, so it needs multiple people and a data engineer, at the very least. @@ -989,7 +989,7 @@ transcript: sec: 3286 time: '54:46' who: Andreas -- header: 'Hands-on Projects: Build an e‑commerce pipeline; use Kaggle datasets' +- header: 'Hands-on Projects: Build an e-commerce pipeline; use Kaggle datasets' - line: What I understood by talking to you now is that one of the most important skills data scientists need to have in order to pick up data engineering is cloud skills. But ‘cloud skills’ is such a broad term, right? But there are a couple @@ -1049,7 +1049,7 @@ transcript: sec: 3535 time: '58:55' who: Andreas -- header: 'Convincing Stakeholders: Build a $0 proof‑of‑concept and quantify ROI' +- header: 'Convincing Stakeholders: Build a $0 proof-of-concept and quantify ROI' - line: Good advice. So it's almost time for us to finish. But there is one interesting question. Maybe we can take a couple of minutes to answer it. Maybe it's a tough one. Let's try. “I'm trying to convince my company to start a data science department. diff --git a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md index beee1eef..d3df4041 100644 --- a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md +++ b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md @@ -124,7 +124,7 @@ quotableClips: startOffset: 3353 url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3353 endOffset: 3497 -- name: 'E‑commerce Personalization: Prototyping with embeddings and CLIP' +- name: 'E-commerce Personalization: Prototyping with embeddings and CLIP' startOffset: 3497 url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3497 endOffset: 3685 @@ -1037,7 +1037,7 @@ transcript: sec: 3494 time: '58:14' who: Alexey -- header: 'E‑commerce Personalization: Prototyping with embeddings and CLIP' +- header: 'E-commerce Personalization: Prototyping with embeddings and CLIP' - line: Also, for any questions that remain unanswered, I think there'll be a link to my LinkedIn – people should connect to me and shoot those questions over. For e-commerce, I think there is a huge opportunity to do real-time personalization diff --git a/_podcast/production-ready-ai-engineering.md b/_podcast/production-ready-ai-engineering.md index 7ad3313c..70b052e3 100644 --- a/_podcast/production-ready-ai-engineering.md +++ b/_podcast/production-ready-ai-engineering.md @@ -1146,7 +1146,7 @@ context: 'Context: a practitioner’s tour through the end-to-end work of turnin and models into reliable, efficient products—from Java and data engineering foundations to AI fine-tuning, prompt craft, tooling choices, and developer workflows. - Core: the episode’s through-line is a data‑centric engineering mindset for trustworthy, + Core: the episode’s through-line is a data-centric engineering mindset for trustworthy, production-ready AI: rigorous testing and pipeline design to ensure data trust, deliberate choices about models and tools for cost and performance, prompt and token-efficiency techniques to make inference practical, and pragmatic engineering patterns (architecture, diff --git a/_podcast/project-manager-to-data-scientist.md b/_podcast/project-manager-to-data-scientist.md index 40a2ec2b..f06d7516 100644 --- a/_podcast/project-manager-to-data-scientist.md +++ b/_podcast/project-manager-to-data-scientist.md @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3vF1B2mKwImsVC7h3NIDJW apple: https://podcasts.apple.com/us/podcast/transitioning-from-project-management-to-data-science/id1541710331?i=1000516467544 -description: 'Discover how project managers switch to data science: master machine learning, Python, CRISP‑DM, build a portfolio, and land data roles faster.' -intro: 'How do you move from project management into a data science career — and what skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data Scientist at momox GmbH, walks through her transition after four years as a project manager into three years researching fraud and anomaly detection and earning a degree in data analysis. We cover career foundations, the difference between analytics and data science, and a concrete learning strategy: assess strengths, target gaps, and build core skills in programming, statistics, and domain expertise.

Ksenia outlines recommended coursework (machine learning, time series, graph analysis), online resources including mlcourse.ai, and a practical tools progression from spreadsheets and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP‑DM to structure projects, starting as a data analyst to build a portfolio, using Kaggle and community resources (OpenDataScience, DataTalks), and preparing for production with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain specialization (fraud detection, node2vec), realistic job search expectations, part‑time learning plans, and essential math topics — a clear roadmap for transitioning to data science.' +description: 'Discover how project managers switch to data science: master machine learning, Python, CRISP-DM, build a portfolio, and land data roles faster.' +intro: 'How do you move from project management into a data science career — and what skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data Scientist at momox GmbH, walks through her transition after four years as a project manager into three years researching fraud and anomaly detection and earning a degree in data analysis. We cover career foundations, the difference between analytics and data science, and a concrete learning strategy: assess strengths, target gaps, and build core skills in programming, statistics, and domain expertise.

Ksenia outlines recommended coursework (machine learning, time series, graph analysis), online resources including mlcourse.ai, and a practical tools progression from spreadsheets and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP-DM to structure projects, starting as a data analyst to build a portfolio, using Kaggle and community resources (OpenDataScience, DataTalks), and preparing for production with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain specialization (fraud detection, node2vec), realistic job search expectations, part-time learning plans, and essential math topics — a clear roadmap for transitioning to data science.' topics: - career transition - project management @@ -78,7 +78,7 @@ quotableClips: startOffset: 1352 url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1352 endOffset: 1820 -- name: 'Project Frameworks: using CRISP‑DM to structure data projects' +- name: 'Project Frameworks: using CRISP-DM to structure data projects' startOffset: 1820 url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1820 endOffset: 1963 @@ -115,7 +115,7 @@ quotableClips: startOffset: 3075 url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3075 endOffset: 3249 -- name: 'Part‑time Learning Plan: nanodegrees and structured six‑month paths' +- name: 'Part-time Learning Plan: nanodegrees and structured six-month paths' startOffset: 3249 url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3249 endOffset: 3462 @@ -499,7 +499,7 @@ transcript: sec: 1778 time: '29:38' who: Alexey -- header: 'Project Frameworks: using CRISP‑DM to structure data projects' +- header: 'Project Frameworks: using CRISP-DM to structure data projects' - line: Definitely. It's fortunate that it's already developed. I would recommend to use the standard of the industry in data mining — CRISP-DM framework. It's really convenient for data science projects. I also use it. It's nicely structured. @@ -825,7 +825,7 @@ transcript: sec: 3209 time: '53:29' who: Alexey -- header: 'Part‑time Learning Plan: nanodegrees and structured six‑month paths' +- header: 'Part-time Learning Plan: nanodegrees and structured six-month paths' - line: Yes, I think I can give some tips or some recommendations. First of all, start to be interested in data analysis and start to apply this at your work already. This would be the first step of getting involved in data analysis. Then, when diff --git a/_podcast/public-speaking-for-data-scientists.md b/_podcast/public-speaking-for-data-scientists.md index 108ec29d..d6df1bf7 100644 --- a/_podcast/public-speaking-for-data-scientists.md +++ b/_podcast/public-speaking-for-data-scientists.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/essentials-public-speaking-for-career-in-data-science/id1541710331?i=1000513669829 description: 'Master public speaking, AI evangelism & storytelling for data scientists: learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth.' -intro: How do data scientists move from technical deep dives to memorable keynotes and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at DataRobot, breaks down the public speaking playbook for data practitioners who want to persuade, teach, and scale their talks.

Ben draws on a career from engineering and quant roles through startups and acquisitions to explain the mindset for improvement, practical rehearsal habits, and the positioning and messaging that define AI evangelism. Key topics include crafting repeatable keynotes, avoiding early mistakes like technical overload, using story hooks and warm‑ups to capture attention, and structuring talks around 1–3 clear takeaways and calls to action. He also covers introductions that work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling exercises (Pixar lessons), and executive presentations that lead with recommendations while keeping an appendix ready.

Listeners will find actionable guidance on earning speaking stages, writing conference proposals that push boundaries, Q&A strategies (including how and when to admit unknowns), starter topics for newcomers, and resources like Toastmasters and story practice to build a speaker resume and break into AI evangelism +intro: How do data scientists move from technical deep dives to memorable keynotes and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at DataRobot, breaks down the public speaking playbook for data practitioners who want to persuade, teach, and scale their talks.

Ben draws on a career from engineering and quant roles through startups and acquisitions to explain the mindset for improvement, practical rehearsal habits, and the positioning and messaging that define AI evangelism. Key topics include crafting repeatable keynotes, avoiding early mistakes like technical overload, using story hooks and warm-ups to capture attention, and structuring talks around 1–3 clear takeaways and calls to action. He also covers introductions that work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling exercises (Pixar lessons), and executive presentations that lead with recommendations while keeping an appendix ready.

Listeners will find actionable guidance on earning speaking stages, writing conference proposals that push boundaries, Q&A strategies (including how and when to admit unknowns), starter topics for newcomers, and resources like Toastmasters and story practice to build a speaker resume and break into AI evangelism topics: - developer relations - public speaking @@ -62,7 +62,7 @@ quotableClips: startOffset: 1132 url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1132 endOffset: 1194 -- name: 'Attention Techniques: Warm‑up, Emotion, and Story Hooks' +- name: 'Attention Techniques: Warm-up, Emotion, and Story Hooks' startOffset: 1194 url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1194 endOffset: 1315 @@ -82,7 +82,7 @@ quotableClips: startOffset: 2052 url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2052 endOffset: 2191 -- name: 'Ambitious Goals: Memorable Talks and Long‑term Impact' +- name: 'Ambitious Goals: Memorable Talks and Long-term Impact' startOffset: 2191 url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2191 endOffset: 2395 @@ -435,7 +435,7 @@ transcript: sec: 1188 time: '19:48' who: Alexey -- header: 'Attention Techniques: Warm‑up, Emotion, and Story Hooks' +- header: 'Attention Techniques: Warm-up, Emotion, and Story Hooks' - line: You have this concept of attention. You walk out on the stage. You being the speaker, you're given attention for free. But you can quickly lose it. You can imagine if you or myself, were walking out on stage – COVID’s over – we're gonna @@ -695,7 +695,7 @@ transcript: sec: 2189 time: '36:29' who: Alexey -- header: 'Ambitious Goals: Memorable Talks and Long‑term Impact' +- header: 'Ambitious Goals: Memorable Talks and Long-term Impact' - line: Most of them. And that’s not an insult. It's just true. Most of them. Most of the talks are forgettable. How many of those people were smart? All of them. They're very smart, they're very accomplished. Unfortunately, most of the talks diff --git a/_podcast/research-to-production-ml-systems-roadmap.md b/_podcast/research-to-production-ml-systems-roadmap.md index b115e038..2c342d98 100644 --- a/_podcast/research-to-production-ml-systems-roadmap.md +++ b/_podcast/research-to-production-ml-systems-roadmap.md @@ -16,7 +16,7 @@ links: apple: https://podcasts.apple.com/us/podcast/what-researchers-and-engineers-can-learn-from-each/id1541710331?i=1000537258362 description: 'Learn to build reproducible, deployable full-stack ML systems: deploy models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows.' -intro: How do you move ML work from research notebooks to reproducible, deployable full‑stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that exact challenge. We trace Mihail’s path from academic NLP to self‑driving and conversational AI, then into hybrid roles that blend hypothesis‑driven research with production engineering.

Key topics include research infrastructure for data collection and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, role fluidity, code reviews for researchers, and practical skills swaps so researchers learn reproducibility and engineers learn experimental rigor.

Listeners will get concrete guidance on building end‑to‑end ML systems, improving reproducibility and model deployment, and actionable career advice (internships, reading papers, reproducing models). Tune in to learn practical steps and tools to bridge research to production for real‑world ML systems +intro: How do you move ML work from research notebooks to reproducible, deployable full-stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that exact challenge. We trace Mihail’s path from academic NLP to self-driving and conversational AI, then into hybrid roles that blend hypothesis-driven research with production engineering.

Key topics include research infrastructure for data collection and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, role fluidity, code reviews for researchers, and practical skills swaps so researchers learn reproducibility and engineers learn experimental rigor.

Listeners will get concrete guidance on building end-to-end ML systems, improving reproducibility and model deployment, and actionable career advice (internships, reading papers, reproducing models). Tune in to learn practical steps and tools to bridge research to production for real-world ML systems topics: - machine learning - MLOps diff --git a/_podcast/responsible-explainable-ai-bias-detection.md b/_podcast/responsible-explainable-ai-bias-detection.md index c69be1a8..ea7915de 100644 --- a/_podcast/responsible-explainable-ai-bias-detection.md +++ b/_podcast/responsible-explainable-ai-bias-detection.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=8Eb5mG-pC3o description: Discover Responsible AI & Explainable AI tactics for bias detection, fairness checks and governance, practical tools to build trustworthy, compliant ML models -intro: How do you detect bias, enforce fairness, and govern AI systems in production without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia and Rutgers — walks through a practical roadmap for responsible AI and explainable AI grounded in real-world examples.

We define responsible AI and contrast it with post‑hoc explainability, then unpack a credit decision bias case to show disparate outcomes in practice. Supreet outlines glass‑box explainability techniques, data‑level fairness checks (skewness, missingness, coverage), and EDA methods for bias detection. She covers PII handling, feature necessity assessments with SMEs and compliance, and automating data quality and monitoring. You’ll hear tool recommendations — What‑If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local interpretability, drift and feedback‑loop detection, and trade‑offs between accuracy and interpretability.

Listeners will gain actionable guidance on bias detection, model interpretability, AI governance structures, and managing AutoML and regulated‑industry risks — practical steps to make AI systems more fair, transparent, and accountable +intro: How do you detect bias, enforce fairness, and govern AI systems in production without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia and Rutgers — walks through a practical roadmap for responsible AI and explainable AI grounded in real-world examples.

We define responsible AI and contrast it with post-hoc explainability, then unpack a credit decision bias case to show disparate outcomes in practice. Supreet outlines glass-box explainability techniques, data-level fairness checks (skewness, missingness, coverage), and EDA methods for bias detection. She covers PII handling, feature necessity assessments with SMEs and compliance, and automating data quality and monitoring. You’ll hear tool recommendations — What-If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local interpretability, drift and feedback-loop detection, and trade-offs between accuracy and interpretability.

Listeners will gain actionable guidance on bias detection, model interpretability, AI governance structures, and managing AutoML and regulated-industry risks — practical steps to make AI systems more fair, transparent, and accountable topics: - responsible AI - explainable AI @@ -49,15 +49,15 @@ quotableClips: startOffset: 402 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=402 endOffset: 500 -- name: 'Explainable vs Responsible AI: Post‑mortem Tools vs Governance Mindset' +- name: 'Explainable vs Responsible AI: Post-mortem Tools vs Governance Mindset' startOffset: 500 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=500 endOffset: 630 -- name: 'Glass‑Box Approach: Explainable AI Techniques Overview' +- name: 'Glass-Box Approach: Explainable AI Techniques Overview' startOffset: 630 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=630 endOffset: 696 -- name: 'Data‑Level Fairness Checks: Skewness, Missingness, and Coverage' +- name: 'Data-Level Fairness Checks: Skewness, Missingness, and Coverage' startOffset: 696 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=696 endOffset: 768 @@ -65,7 +65,7 @@ quotableClips: startOffset: 768 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=768 endOffset: 879 -- name: 'PII Handling: Age, Gender, Masking, and Use‑case Justification' +- name: 'PII Handling: Age, Gender, Masking, and Use-case Justification' startOffset: 879 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=879 endOffset: 1040 @@ -77,7 +77,7 @@ quotableClips: startOffset: 1107 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1107 endOffset: 1143 -- name: 'Model Explainability Tools: What‑If, Skater, and AI Explainability 360' +- name: 'Model Explainability Tools: What-If, Skater, and AI Explainability 360' startOffset: 1143 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1143 endOffset: 1404 @@ -89,15 +89,15 @@ quotableClips: startOffset: 1462 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1462 endOffset: 1658 -- name: 'Cross‑Functional Governance: SMEs, Compliance, and Leadership Roles' +- name: 'Cross-Functional Governance: SMEs, Compliance, and Leadership Roles' startOffset: 1658 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1658 endOffset: 1949 -- name: 'Accuracy vs Interpretability: Managing Model Complexity Trade‑offs' +- name: 'Accuracy vs Interpretability: Managing Model Complexity Trade-offs' startOffset: 1949 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1949 endOffset: 2128 -- name: 'Human‑in‑the‑Loop: Limits of Automation and Responsible Oversight' +- name: 'Human-in-the-Loop: Limits of Automation and Responsible Oversight' startOffset: 2128 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2128 endOffset: 2251 @@ -129,7 +129,7 @@ quotableClips: startOffset: 3404 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3404 endOffset: 3567 -- name: 'Closing Remarks: Follow‑up, Links, and Contact Information' +- name: 'Closing Remarks: Follow-up, Links, and Contact Information' startOffset: 3567 url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3567 endOffset: 3536 @@ -270,7 +270,7 @@ transcript: sec: 426 time: '7:06' who: Supreet -- header: 'Explainable vs Responsible AI: Post‑mortem Tools vs Governance Mindset' +- header: 'Explainable vs Responsible AI: Post-mortem Tools vs Governance Mindset' - line: What is the relationship between responsible AI and explainable AI? From what I hear now – if we want to feel confident in the predictions, we need to be able to explain them. Does this mean that responsible AI is explainable AI? Or what's @@ -306,7 +306,7 @@ transcript: sec: 629 time: '10:29' who: Supreet -- header: 'Glass‑Box Approach: Explainable AI Techniques Overview' +- header: 'Glass-Box Approach: Explainable AI Techniques Overview' - line: You mentioned tools and you mentioned a framework. So how do we do this? sec: 630 time: '10:30' @@ -329,7 +329,7 @@ transcript: sec: 681 time: '11:21' who: Alexey -- header: 'Data‑Level Fairness Checks: Skewness, Missingness, and Coverage' +- header: 'Data-Level Fairness Checks: Skewness, Missingness, and Coverage' - line: '[laughs] Yeah. I will start with the data level. First, when we talk about the data level, we talk about fairness and bias testing. There, you have a few data quality checks that I feel every data scientist does. They do some sort of @@ -379,7 +379,7 @@ transcript: sec: 796 time: '13:16' who: Supreet -- header: 'PII Handling: Age, Gender, Masking, and Use‑case Justification' +- header: 'PII Handling: Age, Gender, Masking, and Use-case Justification' - line: So basically you need to, as a human – as an analyst or as a data scientist – you need to get your dataset from your database, CSV file, whatever, and just spend enough time trying to understand what's happening there. Right? Should we @@ -473,7 +473,7 @@ transcript: sec: 1117 time: '18:37' who: Supreet -- header: 'Model Explainability Tools: What‑If, Skater, and AI Explainability 360' +- header: 'Model Explainability Tools: What-If, Skater, and AI Explainability 360' - line: So what about this model part? sec: 1143 time: '19:03' @@ -642,7 +642,7 @@ transcript: sec: 1644 time: '27:24' who: Supreet -- header: 'Cross‑Functional Governance: SMEs, Compliance, and Leadership Roles' +- header: 'Cross-Functional Governance: SMEs, Compliance, and Leadership Roles' - line: So what kind of people do we need to have in this room to be able to have these fruitful discussions? You said that we need, perhaps, data scientists, analysts, and people from compliance. Who else should we have? @@ -741,7 +741,7 @@ transcript: sec: 1916 time: '31:56' who: Supreet -- header: 'Accuracy vs Interpretability: Managing Model Complexity Trade‑offs' +- header: 'Accuracy vs Interpretability: Managing Model Complexity Trade-offs' - line: Actually, we have a question from Shivam that is exactly about that. The question is, “How to manage the trade-off between model complexity and explainability? Complex models do not necessarily have good explainability, so how do we manage @@ -791,7 +791,7 @@ transcript: sec: 2066 time: '34:26' who: Alexey -- header: 'Human‑in‑the‑Loop: Limits of Automation and Responsible Oversight' +- header: 'Human-in-the-Loop: Limits of Automation and Responsible Oversight' - line: There is a question from Raquel, “What does ‘you need a human touch’ mean?” I think this is related to our discussion, where the first step is always a human analyzing the data. Then the question goes on “Does this mean that responsible @@ -1252,7 +1252,7 @@ transcript: sec: 3550 time: '59:10' who: Supreet -- header: 'Closing Remarks: Follow‑up, Links, and Contact Information' +- header: 'Closing Remarks: Follow-up, Links, and Contact Information' - line: I think that the time is up. So thanks for joining us. Maybe before we wrap up, is there anything you want to mention that maybe you forgot? sec: 3567 diff --git a/_podcast/scaling-data-engineering-teams-self-service-platforms.md b/_podcast/scaling-data-engineering-teams-self-service-platforms.md index 6b10ebe9..39d97a5e 100644 --- a/_podcast/scaling-data-engineering-teams-self-service-platforms.md +++ b/_podcast/scaling-data-engineering-teams-self-service-platforms.md @@ -1,5 +1,5 @@ --- -title: 'Scale Data Engineering Teams: Build Self‑Service Data Platforms, Hire Senior Engineers & Use Kafka' +title: 'Scale Data Engineering Teams: Build Self-Service Data Platforms, Hire Senior Engineers & Use Kafka' short: Growing Data Engineering Team in a Scale-Up season: 10 episode: 5 @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=acJ6sVqKOUk description: 'Master scaling data engineering teams: build self-service data platforms, hire senior engineers, deploy Kafka best practices to boost velocity, onboarding.' -intro: 'How do you scale data engineering teams during hypergrowth without sacrificing quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, orchestration, infrastructure and analytics — walks through practical approaches to scale data engineering teams, build self‑service data platforms, hire senior engineers and adopt Kafka-based event streaming.

We cover what “scale‑up” looks like in practice (rapid hiring, product launches, US expansion), the data platform’s role in enabling self‑service onboarding and scalability, and a platform anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi also digs into event streaming: Kafka, schema registries and data contracts, plus hiring-for-scale tactics — prioritizing senior experts and niche tech experience — and assessment strategies like reverse interviews. You’ll hear about balancing platform engineering and use‑case pipelines, cultivating culture shifts, creating junior learning paths, and growing toward senior roles through proactivity and cross‑team impact.

Listen for concrete guidance on building a self‑service data platform, practical Kafka practices, and hiring strategies that help teams move fast while staying reliable.' +intro: 'How do you scale data engineering teams during hypergrowth without sacrificing quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, orchestration, infrastructure and analytics — walks through practical approaches to scale data engineering teams, build self-service data platforms, hire senior engineers and adopt Kafka-based event streaming.

We cover what “scale-up” looks like in practice (rapid hiring, product launches, US expansion), the data platform’s role in enabling self-service onboarding and scalability, and a platform anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi also digs into event streaming: Kafka, schema registries and data contracts, plus hiring-for-scale tactics — prioritizing senior experts and niche tech experience — and assessment strategies like reverse interviews. You’ll hear about balancing platform engineering and use-case pipelines, cultivating culture shifts, creating junior learning paths, and growing toward senior roles through proactivity and cross-team impact.

Listen for concrete guidance on building a self-service data platform, practical Kafka practices, and hiring strategies that help teams move fast while staying reliable.' dateadded: 2022-08-29 duration: PT01H01M25S @@ -26,11 +26,11 @@ quotableClips: startOffset: 117 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=117 endOffset: 162 -- name: 'Guest background: BI, on‑prem Big Data to staff data engineer (career highlights)' +- name: 'Guest background: BI, on-prem Big Data to staff data engineer (career highlights)' startOffset: 162 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=162 endOffset: 341 -- name: 'Defining scale‑up: hypergrowth, funding, hiring surge, speed vs quality' +- name: 'Defining scale-up: hypergrowth, funding, hiring surge, speed vs quality' startOffset: 341 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=341 endOffset: 621 @@ -38,7 +38,7 @@ quotableClips: startOffset: 621 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=621 endOffset: 750 -- name: 'Data platform role: enabling self‑service, onboarding, and scalability' +- name: 'Data platform role: enabling self-service, onboarding, and scalability' startOffset: 750 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=750 endOffset: 1042 @@ -62,7 +62,7 @@ quotableClips: startOffset: 1867 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1867 endOffset: 2105 -- name: 'Career trade‑offs: scale‑up vs enterprise vs FAANG' +- name: 'Career trade-offs: scale-up vs enterprise vs FAANG' startOffset: 2105 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2105 endOffset: 2292 @@ -70,7 +70,7 @@ quotableClips: startOffset: 2292 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2292 endOffset: 2342 -- name: 'Junior opportunities: rapid learning, promotions, and exposure in scale‑ups' +- name: 'Junior opportunities: rapid learning, promotions, and exposure in scale-ups' startOffset: 2342 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2342 endOffset: 2451 @@ -90,11 +90,11 @@ quotableClips: startOffset: 3017 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3017 endOffset: 3175 -- name: 'Work balance: platform engineering vs use‑case pipelines (~50/50)' +- name: 'Work balance: platform engineering vs use-case pipelines (~50/50)' startOffset: 3175 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3175 endOffset: 3271 -- name: 'Path to senior: proactivity, broader impact, and cross‑team collaboration' +- name: 'Path to senior: proactivity, broader impact, and cross-team collaboration' startOffset: 3271 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3271 endOffset: 3394 @@ -110,7 +110,7 @@ quotableClips: startOffset: 3612 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3612 endOffset: 3713 -- name: 'Video editing tips: multi‑take filming, lighting consistency, and tricks' +- name: 'Video editing tips: multi-take filming, lighting consistency, and tricks' startOffset: 3713 url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3713 endOffset: 3776 @@ -140,7 +140,7 @@ transcript: sec: 155 time: '2:35' who: Alexey -- header: 'Guest background: BI, on‑prem Big Data to staff data engineer (career highlights)' +- header: 'Guest background: BI, on-prem Big Data to staff data engineer (career highlights)' - line: Yeah, sure. I started, as you mentioned, about eight years ago in the data world, doing classic BI with Microsoft tooling, and mostly click, and drag-and-drop tooling. And then I had quite quickly the opportunity to jump early on a Big Data @@ -189,7 +189,7 @@ transcript: sec: 325 time: '5:25' who: Mehdi -- header: 'Defining scale‑up: hypergrowth, funding, hiring surge, speed vs quality' +- header: 'Defining scale-up: hypergrowth, funding, hiring surge, speed vs quality' - line: '[laughs] So what does it mean to be a scale-up? I know what a startup is – a startup is a company that just started up. I also know more or less what an enterprise is – it’s a huge company with a lot of people working there. So what @@ -307,7 +307,7 @@ transcript: sec: 709 time: '11:49' who: Mehdi -- header: 'Data platform role: enabling self‑service, onboarding, and scalability' +- header: 'Data platform role: enabling self-service, onboarding, and scalability' - line: And what do data engineers do in a scale-up environment? How is this world different from your typical enterprise? sec: 750 @@ -674,7 +674,7 @@ transcript: sec: 2001 time: '33:21' who: Mehdi -- header: 'Career trade‑offs: scale‑up vs enterprise vs FAANG' +- header: 'Career trade-offs: scale-up vs enterprise vs FAANG' - line: So what do you recommend to somebody that’s a senior and that person is considering multiple offers – and one of these offers is for a scale-up company. Let's say another offer is for an enterprise. Would you suggest that the person goes with @@ -737,7 +737,7 @@ transcript: sec: 2295 time: '38:15' who: Mehdi -- header: 'Junior opportunities: rapid learning, promotions, and exposure in scale‑ups' +- header: 'Junior opportunities: rapid learning, promotions, and exposure in scale-ups' - line: Would your answer be different for a junior specialist? Somebody who is just entering the field of data engineering and maybe has less than one year of experience? For them it may be just too boring to work nine to five? @@ -944,7 +944,7 @@ transcript: sec: 3039 time: '50:39' who: Mehdi -- header: 'Work balance: platform engineering vs use‑case pipelines (~50/50)' +- header: 'Work balance: platform engineering vs use-case pipelines (~50/50)' - line: At the beginning, you also mentioned that there are different kinds of data engineers. One type of data engineers are those that work on platforms, and the other kind works more on use cases. The question that we have here is, “What is @@ -971,7 +971,7 @@ transcript: sec: 3269 time: '54:29' who: Mehdi -- header: 'Path to senior: proactivity, broader impact, and cross‑team collaboration' +- header: 'Path to senior: proactivity, broader impact, and cross-team collaboration' - line: What would you say is the most important attribute for a data engineer who wants to get promoted from a mid-level role to a senior role? sec: 3271 @@ -1152,7 +1152,7 @@ transcript: sec: 3710 time: '1:01:50' who: Mehdi -- header: 'Video editing tips: multi‑take filming, lighting consistency, and tricks' +- header: 'Video editing tips: multi-take filming, lighting consistency, and tricks' - line: How did you do this? You filmed three different things and then you kind of stitched it through a program? sec: 3713 diff --git a/_podcast/solopreneur-data-scientist.md b/_podcast/solopreneur-data-scientist.md index 320762d4..89b6d660 100644 --- a/_podcast/solopreneur-data-scientist.md +++ b/_podcast/solopreneur-data-scientist.md @@ -45,7 +45,7 @@ quotableClips: startOffset: 493 url: https://youtube.com/watch?v=KMSE9GkU2mE&t=493 endOffset: 653 -- name: 'Experience Required: Mid‑Senior, End‑to‑End Project Skills' +- name: 'Experience Required: Mid-Senior, End-to-End Project Skills' startOffset: 653 url: https://youtube.com/watch?v=KMSE9GkU2mE&t=653 endOffset: 753 @@ -65,7 +65,7 @@ quotableClips: startOffset: 1267 url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1267 endOffset: 1345 -- name: 'First Month: Early Research, Insights or Proof‑of‑Concept' +- name: 'First Month: Early Research, Insights or Proof-of-Concept' startOffset: 1345 url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1345 endOffset: 1447 @@ -303,7 +303,7 @@ transcript: sec: 652 time: '10:52' who: Marianna -- header: 'Experience Required: Mid‑Senior, End‑to‑End Project Skills' +- header: 'Experience Required: Mid-Senior, End-to-End Project Skills' - line: How much experience would I need to have in order to join a company as the only data scientist? Do I need to be very experienced, like a senior person? Or if I'm just switching careers, would it be a good idea to join such a company? @@ -489,7 +489,7 @@ transcript: sec: 1291 time: '21:31' who: Marianna -- header: 'First Month: Early Research, Insights or Proof‑of‑Concept' +- header: 'First Month: Early Research, Insights or Proof-of-Concept' - line: That's the first week, you said. Let’s say, you’re there for a month already – what should you do in the first month? Do you already need to have some sort of POC or are you still in the exploratory phase? I guess it depends on the case, diff --git a/_podcast/teaching-mentoring-data-analytics-fintech.md b/_podcast/teaching-mentoring-data-analytics-fintech.md index 3b66e941..179f5347 100644 --- a/_podcast/teaching-mentoring-data-analytics-fintech.md +++ b/_podcast/teaching-mentoring-data-analytics-fintech.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=saaRRzgHsmE description: 'Discover FinTech data analytics curriculum: fraud detection, BigQuery labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career guidance.' -intro: 'How do you design a FinTech data analytics curriculum that teaches fraud detection, chargeback modeling, and real-world cloud skills while also mentoring diverse learners? In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google data leader, and long-time mentor and teacher — walks through building practical FinTech courses informed by industry experience across Berlin, Amsterdam and the Bay Area.

We cover curriculum components you can reuse: rule‑based vs neural approaches to fraud detection, chargeback modeling, deploying ML in production, and essential business skills for analysts. Irina explains hands‑on BigQuery labs, student cloud access strategies, and how to demystify Google Cloud for analysts. She shares mentoring methods (visual explanations, learner‑centered teaching), instructor sourcing and storytelling for classroom impact, cohort analysis for product metrics, recruiting women to zoomcamps, and securing technical reviewers.

Listen to gain actionable guidance on structuring FinTech analytics training, designing cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring practices that help career changers and underrepresented learners succeed in data analytics.' +intro: 'How do you design a FinTech data analytics curriculum that teaches fraud detection, chargeback modeling, and real-world cloud skills while also mentoring diverse learners? In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google data leader, and long-time mentor and teacher — walks through building practical FinTech courses informed by industry experience across Berlin, Amsterdam and the Bay Area.

We cover curriculum components you can reuse: rule-based vs neural approaches to fraud detection, chargeback modeling, deploying ML in production, and essential business skills for analysts. Irina explains hands-on BigQuery labs, student cloud access strategies, and how to demystify Google Cloud for analysts. She shares mentoring methods (visual explanations, learner-centered teaching), instructor sourcing and storytelling for classroom impact, cohort analysis for product metrics, recruiting women to zoomcamps, and securing technical reviewers.

Listen to gain actionable guidance on structuring FinTech analytics training, designing cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring practices that help career changers and underrepresented learners succeed in data analytics.' topics: - data analytics - fintech diff --git a/_podcast/urban-data-science.md b/_podcast/urban-data-science.md index 94773f5a..9a72efc3 100644 --- a/_podcast/urban-data-science.md +++ b/_podcast/urban-data-science.md @@ -20,17 +20,17 @@ intro: 'How can cities use transport analytics, sensors and AI to become more li In this episode Rachel Lim, an urban data scientist with a geography background and a master’s in urban data science, walks through practical ways data informs transport planning and placemaking. We cover core data sources—GPS, sensors, fare - card systems, ride‑hailing logs and computer vision for passenger flow—plus travel - demand forecasting, real‑time monitoring (including event analytics like F1), and + card systems, ride-hailing logs and computer vision for passenger flow—plus travel + demand forecasting, real-time monitoring (including event analytics like F1), and operational responses such as traffic marshals and recovery services.

Rachel - explains data engineering realities—Kafka, Apache Spark, real‑time APIs, data pipelines + explains data engineering realities—Kafka, Apache Spark, real-time APIs, data pipelines and warehousing—alongside journey logic, fare computation and data quality management. - She also explores emerging tools: generative AI for natural‑language access, text‑to‑SQL + She also explores emerging tools: generative AI for natural-language access, text-to-SQL architectures, synthetic data, and privacy practices for publishing masked datasets. The conversation highlights Singapore’s planning context, open data portals (data.gov.sg, DataMall), and project ideas for learners using parking and taxi datasets.

Listen to learn which transport analytics and sensor strategies produce actionable - insights, how to set up robust data pipelines, and where to start hands‑on projects + insights, how to set up robust data pipelines, and where to start hands-on projects to build liveable cities.' dateadded: 2024-11-06 duration: PT00H51M32S diff --git a/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md index 09170cd5..2b0132a5 100644 --- a/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md +++ b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md @@ -16,7 +16,7 @@ links: youtube: https://www.youtube.com/watch?v=OuCuk-7RHjM description: Discover kDimensions and Figma templates to visualize machine learning, build intuition before the math, map ML problems, and create shareable visuals -intro: 'How do you teach machine learning so people build intuition before diving into math? In this episode, Meor Amer—educator, author, and Developer Relations at Cohere—walks through a visual-first approach to machine learning that makes concepts accessible and actionable. Drawing on his journey from bioengineering and telecom analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, Meor explains why visual machine learning and dimensionality reduction matter and how templates can scale understanding.

We cover practical workflows: generating ideas (visualize the verb, use metaphors like the catapult and airplane), design constraints that spark creativity, and a sketchbook → Figma pipeline for engineers that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn visuals, how to map ML problems (classification, regression, clustering, anomaly, RL) to templates, and hands‑on learning techniques—consume with intent, break and modify code. He also discusses monetizing visual design services and turning articles into key visuals using 4–5 keywords.

Listen to learn concrete techniques for ML visualization, Figma for engineers, and creating reusable templates that build intuition and make machine learning teachable.' +intro: 'How do you teach machine learning so people build intuition before diving into math? In this episode, Meor Amer—educator, author, and Developer Relations at Cohere—walks through a visual-first approach to machine learning that makes concepts accessible and actionable. Drawing on his journey from bioengineering and telecom analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, Meor explains why visual machine learning and dimensionality reduction matter and how templates can scale understanding.

We cover practical workflows: generating ideas (visualize the verb, use metaphors like the catapult and airplane), design constraints that spark creativity, and a sketchbook → Figma pipeline for engineers that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn visuals, how to map ML problems (classification, regression, clustering, anomaly, RL) to templates, and hands-on learning techniques—consume with intent, break and modify code. He also discusses monetizing visual design services and turning articles into key visuals using 4–5 keywords.

Listen to learn concrete techniques for ML visualization, Figma for engineers, and creating reusable templates that build intuition and make machine learning teachable.' topics: - machine learning - education @@ -33,7 +33,7 @@ quotableClips: startOffset: 116 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=116 endOffset: 177 -- name: 'Career Journey: Bioengineering → Telecom Analytics → Self‑employment' +- name: 'Career Journey: Bioengineering → Telecom Analytics → Self-employment' startOffset: 177 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=177 endOffset: 375 @@ -57,7 +57,7 @@ quotableClips: startOffset: 1053 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1053 endOffset: 1286 -- name: Drift Visualized (Catapult Metaphor) & Data‑centric AI Airplane Analogy +- name: Drift Visualized (Catapult Metaphor) & Data-centric AI Airplane Analogy startOffset: 1286 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1286 endOffset: 1447 @@ -85,7 +85,7 @@ quotableClips: startOffset: 2450 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2450 endOffset: 2617 -- name: 'Hands‑on Learning: Break and Modify Code to Understand ML' +- name: 'Hands-on Learning: Break and Modify Code to Understand ML' startOffset: 2617 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2617 endOffset: 2687 @@ -106,11 +106,11 @@ quotableClips: startOffset: 3246 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3246 endOffset: 3361 -- name: 'Book Overview: Visual Introduction to Deep Learning (Neuron‑by‑Neuron)' +- name: 'Book Overview: Visual Introduction to Deep Learning (Neuron-by-Neuron)' startOffset: 3361 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3361 endOffset: 3536 -- name: 'Book Workflow: Visual‑first Layout with Concise Text' +- name: 'Book Workflow: Visual-first Layout with Concise Text' startOffset: 3536 url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3536 endOffset: 3612 @@ -145,7 +145,7 @@ transcript: sec: 173 time: '2:53' who: Meor -- header: 'Career Journey: Bioengineering → Telecom Analytics → Self‑employment' +- header: 'Career Journey: Bioengineering → Telecom Analytics → Self-employment' - line: Before we go into our main topic of visualising machine learning, let's start with your background. Can you tell us about your career journey so far? sec: 177 @@ -395,7 +395,7 @@ transcript: sec: 1135 time: '18:55' who: Meor -- header: Drift Visualized (Catapult Metaphor) & Data‑centric AI Airplane Analogy +- header: Drift Visualized (Catapult Metaphor) & Data-centric AI Airplane Analogy - line: Okay, so first, you think, "I want to create something on drift” Then you give yourself a bit of time, use a timer, set it to 5-10 minutes. And then you start brainstorming, like “How can I show the action? What is the action there?” @@ -687,7 +687,7 @@ transcript: sec: 2544 time: '42:24' who: Meor -- header: 'Hands‑on Learning: Break and Modify Code to Understand ML' +- header: 'Hands-on Learning: Break and Modify Code to Understand ML' - line: How do you come up with this “What if?” and “What can go wrong?”? If you have practical experience, then you can use it. But if you're just learning this thing, how can you know about these things? @@ -867,7 +867,7 @@ transcript: sec: 3246 time: '54:06' who: Meor -- header: 'Book Overview: Visual Introduction to Deep Learning (Neuron‑by‑Neuron)' +- header: 'Book Overview: Visual Introduction to Deep Learning (Neuron-by-Neuron)' - line: I also know that you wrote a book. You recently came to DataTalks.Club’s slack to answer questions about your book. Can you tell us about it? And how did you come up with the idea behind the book? @@ -900,7 +900,7 @@ transcript: sec: 3430 time: '57:10' who: Meor -- header: 'Book Workflow: Visual‑first Layout with Concise Text' +- header: 'Book Workflow: Visual-first Layout with Concise Text' - line: Did you first come up with text and then created illustrations? Or did you first come up with illustrations and then wrote text for them? sec: 3536 diff --git a/podcast-errors-found.md b/podcast-errors-found.md new file mode 100644 index 00000000..4a6c7313 --- /dev/null +++ b/podcast-errors-found.md @@ -0,0 +1,242 @@ +# Podcast Files - Errors Found + +## Summary + +A comprehensive analysis of all podcast markdown files in `_podcast/` directory revealed several categories of errors affecting multiple files. + +--- + +## 1. ✅ FIXED: Truncated Anchor IDs (264 instances across 187 files) + +**Status:** Already corrected in your recent edits + +**Issue:** The `ids.anchor` field had truncated values missing the proper prefix: +- `atatalksclub` → should be `datatalksclub` (missing 'd') +- `atalksclub` → should be `datatalksclub` (missing 'dat') +- `lub/episodes/` → should be `datatalksclub/episodes/` + +**Example:** +```yaml +# INCORRECT +ids: + anchor: atatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi + +# CORRECT +ids: + anchor: datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi +``` + +--- + +## 2. 🔴 Special Dash Characters (187 files affected) + +**Issue:** Non-ASCII dash characters used throughout files: +- `‑` (U+2011: Non-breaking hyphen) +- `—` (U+2014: Em dash) +- `–` (U+2013: En dash) + +**Impact:** +- May cause encoding issues +- Inconsistent with standard ASCII hyphen `-` +- Can break parsing or search functionality + +**Most affected files:** +- `algorithmic-trading-with-python-and-machine-learning.md` (123 instances) +- `from-data-freelancer-to-startup-open-source-products.md` (136 instances) +- `from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md` (134 instances) +- `building-data-team.md` (104 instances) + +**Example locations in ai-for-ecology-biodiversity-and-conservation.md:** +- Line 22: `Berger‑Wolf` (should be `Berger-Wolf`) +- Line 63: `Photo‑ID` (should be `Photo-ID`) +- Throughout intro text + +**Recommendation:** Replace all special dashes with standard ASCII hyphen `-` + +--- + +## 3. 🔴 YAML Escaped Quotes (130 files) + +**Issue:** Doubled single quotes `''` used for escaping in YAML strings + +**Example:** +```yaml +context: 'AI''s most important role...' +``` + +**Current Status:** This is actually **valid YAML syntax** for escaping single quotes within single-quoted strings. However, it may look confusing. + +**Alternative approaches:** +1. Use double quotes: `"AI's most important role"` +2. Keep as-is (valid YAML) +3. Use multiline literal blocks + +**Verdict:** Not necessarily an error, but could be standardized for consistency. + +--- + +## 4. 🔴 QuotableClips with Same Start/End Offset (15 files) + +**Issue:** Final quotableClip entries have `startOffset` equal to `endOffset`, creating zero-duration clips + +**Affected files:** +1. `ai-for-ecology-biodiversity-and-conservation.md` - 'Episode Closing: Key Takeaways and Next Steps' (3720) +2. `open-source-ml-contributions.md` - 'Episode Wrap-Up and Final Advice' (2280) +3. `technical-writing-for-data-scientists.md` - 'Podcast Wrap-Up and Resources' (3630) +4. `mindful-data-strategy-for-business-impact.md` - 'Episode Outro and Hummus Banter' (3965) +5. `nlp-dataset-creation-annotation-tools-workflows.md` - 'Contact & Resources' (3820) +6. `personal-brand-for-data-professionals.md` - 'Episode Close and Links' (3030) +7. `building-domestic-risk-assessment-tool.md` - 'Episode Wrap-Up' (3840) +8. `mlops-feature-stores-feature-stores-feast-tecton.md` - 'Episode Close' (3450) +9. `data-freelancing-career-strategy-market-demand-and-client-acquisition.md` - 'Episode Wrap-up' (3929) +10. `from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md` - 'Episode Wrap-Up' (3822) +... and 5 more + +**Example from current file (lines 131-134):** +```yaml +- name: 'Episode Closing: Key Takeaways and Next Steps' + startOffset: 3720 + url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3720 + endOffset: 3720 # ⚠️ Same as startOffset! +``` + +**Recommendation:** Either: +- Set endOffset to actual video duration +- Remove these zero-duration closing clips +- Add a small duration (e.g., +60 seconds) + +--- + +## 5. 🟡 Missing Duration Field (12 files) + +**Issue:** `duration:` field missing from frontmatter (should be in ISO 8601 format like `PT01H02M30S`) + +**Affected files:** +1. **ai-for-ecology-biodiversity-and-conservation.md** ⬅️ **CURRENT FILE** +2. open-source-ml-contributions.md +3. technical-writing-for-data-scientists.md +4. personal-brand-for-data-professionals.md +5. building-domestic-risk-assessment-tool.md +6. crisp-dm.md +7. building-data-products-lead-data-scientist.md +8. mlops-feature-stores-feature-stores-feast-tecton.md +9. from-marketing-to-product-owner-in-search.md +10. machine-learning-decision-optimization.md +11. data-team-roles.md +12. mentoring-in-tech-how-to-find-and-become-a-mentor.md.md + +**Impact:** May affect schema markup, SEO, and podcast platforms + +**How to calculate:** Based on the highest `endOffset` value (in seconds), convert to `PT[H]H[M]M[S]S` format + +**For current file:** +- Highest endOffset: 3720 seconds = 62 minutes = 1 hour 2 minutes +- Should add: `duration: PT01H02M00S` + +--- + +## 6. 🟡 Missing Topics Field (63 files) + +**Issue:** `topics:` field missing from frontmatter + +**Impact:** Reduced discoverability, categorization, and filtering capabilities + +**Sample of affected files:** +- algorithmic-trading-with-python-and-machine-learning.md +- ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md +- mindful-data-strategy-for-business-impact.md +- fairness-in-ai-ml-engineering.md +- modern-search-systems-vector-databases-llms-semantic-retrieval.md +... and 58 more + +**Recommendation:** Add relevant topic tags like: +```yaml +topics: +- data science +- machine learning +- mlops +- career +``` + +--- + +## 7. 🔴 TODO Placeholders (4 files) + +**Issue:** Unfinished placeholder values still present + +**Affected files:** +1. technical-writing-for-data-scientists.md +2. crisp-dm.md +3. data-team-roles.md +4. mentoring-in-tech-how-to-find-and-become-a-mentor.md.md + +**Common locations:** +```yaml +links: + spotify: TODO + apple: TODO +``` + +**Recommendation:** Find correct URLs and replace TODO values + +--- + +## 8. 🟢 Description Length (Looks Good!) + +**Status:** Most descriptions are within the recommended 140-155 character range + +**Current file description:** +> "Discover AI-driven computer vision and remote sensing strategies to scale biodiversity monitoring, improve species ID, and inform conservation policy." + +**Length:** 150 characters ✓ (optimal per your memory guidelines) + +--- + +## Priority Recommendations + +### High Priority (Data Quality Issues) +1. **Fix zero-duration quotableClips** (15 files) - Set proper endOffset values +2. **Remove TODO placeholders** (4 files) - Add real URLs or remove fields +3. **Replace special dashes** (187 files) - Use standard ASCII `-` for consistency + +### Medium Priority (Missing Metadata) +4. **Add duration field** (12 files including current) - Calculate from endOffset +5. **Add topics field** (63 files) - Improve categorization + +### Low Priority (Style/Consistency) +6. **Standardize YAML quote escaping** (130 files) - Optional, current syntax is valid + +--- + +## Files Needing Most Attention + +Based on multiple issues: + +1. **ai-for-ecology-biodiversity-and-conservation.md** (current file) + - ✅ Anchor ID fixed + - 🔴 13 special dashes + - 🔴 Zero-duration closing clip + - 🟡 Missing duration field + - 🟡 Doubled quotes in context + +2. **technical-writing-for-data-scientists.md** + - 🔴 TODO placeholders + - 🔴 Zero-duration clip + - 🟡 Missing duration + - 🟡 Missing topics + +3. **crisp-dm.md** & **data-team-roles.md** + - 🔴 TODO placeholders + - 🟡 Missing duration + - 🟡 Missing topics + +--- + +## Automated Fix Suggestions + +You could create scripts to: +1. Replace all `‑–—` with `-` across all files +2. Calculate and add `duration:` based on max `endOffset` +3. Fix zero-duration clips by adding 60-120 seconds to final clips +4. Find TODO placeholders and flag for manual review + From 1ad0dbabe27c4e98c2469dd89928a2ca1e2c2cf9 Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Tue, 18 Nov 2025 12:41:45 +0100 Subject: [PATCH 5/9] Renamed podcast images accourding to new URLs --- .../ab-testing-and-product-experimentation.md | 2 +- ...r-ecology-biodiversity-and-conservation.md | 2 +- ...-in-healthcare-and-digital-therapeutics.md | 2 +- ...brid-cloud-on-prem-distributed-training.md | 2 +- ...i-ml-product-design-and-experimentation.md | 2 +- ...rading-with-python-and-machine-learning.md | 2 +- ...lgorithms-data-structures-for-engineers.md | 2 +- _podcast/analytics-engineer-skills-tools.md | 2 +- ...s-to-data-science-with-kaggle-portfolio.md | 2 +- ...-research-and-career-growth-in-practice.md | 2 +- .../bayesian-modeling-workflows-and-tools.md | 2 +- _podcast/becoming-data-freelancer.md | 2 +- ...big-data-analytics-and-postdoc-research.md | 2 +- .../big-data-engineer-vs-data-scientist.md | 2 +- ...ty-for-data-scientists-and-ml-engineers.md | 2 +- ...rmatics-worflows-tools-and-data-science.md | 2 +- ...ngineering-tooling-retrieval-evaluation.md | 2 +- .../building-ai-digital-health-startups.md | 2 +- ...and-scaling-ai-data-products-with-mlops.md | 2 +- ...engineering-systems-for-fraud-detection.md | 2 +- ...ta-science-practice-industrial-ai-mlops.md | 2 +- _podcast/building-and-scaling-data-team.md | 2 +- ...oducts-product-owner-vs-product-manager.md | 2 +- ...emocratizing-high-performance-computing.md | 2 +- _podcast/building-data-team.md | 2 +- .../building-domestic-risk-assessment-tool.md | 2 +- ...xplainable-and-actionable-ai-ml-systems.md | 2 +- ...ing-healthcare-machine-learning-systems.md | 2 +- ...communities-diversity-and-career-growth.md | 2 +- _podcast/building-mlops-startup.md | 2 +- ...ce-data-product-for-identity-resolution.md | 2 +- _podcast/building-open-source-nlp-tool.md | 2 +- ...g-production-ml-platform-and-mlops-team.md | 2 +- .../building-production-search-systems.md | 2 +- ...e-and-reliable-machine-learning-systems.md | 2 +- .../causal-inference-for-machine-learning.md | 2 +- ...ta-officer-data-strategy-and-org-design.md | 2 +- _podcast/cloud-data-governance.md | 2 +- ...munity-building-and-teaching-in-ai-tech.md | 2 +- _podcast/crisp-dm.md | 2 +- _podcast/data-centric.md | 2 +- ...business-pricing-and-client-acquisition.md | 2 +- ...data-engineering-career-path-and-skills.md | 2 +- ...ng-leadership-and-modern-data-platforms.md | 2 +- ...ata-engineering-tools-modern-data-stack.md | 2 +- ...gy-market-demand-and-client-acquisition.md | 2 +- .../data-governance-data-access-management.md | 2 +- ...iew-behavioral-and-portfolio-prep-guide.md | 2 +- ...alism-python-visualization-storytelling.md | 2 +- _podcast/data-leadership-coaching.md | 2 +- ...d-growth-event-tracking-and-reverse-etl.md | 2 +- ...rchitecture-decentralized-data-products.md | 2 +- ...ivacy-engineering-gdpr-machine-learning.md | 2 +- ...a-professionals-business-skills-in-saas.md | 2 +- ...ity-data-observability-data-reliability.md | 2 +- ...-analytics-for-nonprofits-tech-for-good.md | 2 +- _podcast/data-science-career-abc-framework.md | 2 +- ...data-science-failures-and-mlops-lessons.md | 2 +- ...-public-policy-ethical-ai-social-impact.md | 2 +- .../data-science-interview-and-cv-guide.md | 2 +- ...ence-job-red-flags-and-mismatched-roles.md | 2 +- .../data-science-leadership-hiring-mlops.md | 2 +- ...e-management-and-agile-machine-learning.md | 2 +- ...-science-manager-vs-expert-hiring-guide.md | 2 +- ...a-science-team-structure-and-org-design.md | 2 +- ...ndie-hacker-bootstrapping-side-projects.md | 2 +- ...egy-and-dataops-for-ai-powered-products.md | 2 +- _podcast/data-team-roles.md | 2 +- .../data-translator-role-and-data-strategy.md | 2 +- ...nd-gitops-best-practices-for-data-teams.md | 2 +- ...-automation-and-reliable-data-pipelines.md | 2 +- _podcast/dataops-for-data-engineering.md | 2 +- ...-principles-and-scalable-data-platforms.md | 2 +- ...lksclub-building-scaling-data-community.md | 2 +- ...able-data-community-3-years-anniversary.md | 2 +- .../datatalksclub-scaling-and-free-courses.md | 2 +- ...n-fine-tuning-retrieval-open-source-api.md | 2 +- ...eveloper-personal-brand-learn-in-public.md | 2 +- .../devrel-data-science-open-source-tools.md | 2 +- .../devrel-open-source-machine-learning.md | 2 +- _podcast/fairness-in-ai-ml-engineering.md | 2 +- ...ng-model-monitoring-and-data-governance.md | 2 +- _podcast/finops-for-data-engineers.md | 2 +- ...ce-data-engineering-pricing-and-clients.md | 2 +- _podcast/freelancing-in-machine-learning.md | 2 +- ...i-engineer-interviews-and-career-growth.md | 2 +- ...esearch-to-data-engineering-freelancing.md | 2 +- ...pen-source-computer-vision-transformers.md | 2 +- ...ision-research-to-autonomous-driving-ai.md | 2 +- ...elancer-to-startup-open-source-products.md | 2 +- ...ing-automation-open-source-volunteering.md | 2 +- _podcast/from-game-ai-to-modern-ai-agents.md | 2 +- ...a-engineering-to-leading-data-architect.md | 2 +- ...a-science-research-software-engineering.md | 2 +- ...ytics-engineering-sql-dbt-career-switch.md | 2 +- .../from-math-graduate-to-data-analytics.md | 2 +- ...cs-to-computer-vision-career-transition.md | 2 +- ...o-machine-learning-and-data-engineering.md | 2 +- ...ductor-data-to-applied-machine-learning.md | 2 +- ...m-software-engineer-to-machine-learning.md | 2 +- ...-science-to-data-engineering-leadership.md | 2 +- ...gineering-to-leading-data-science-teams.md | 2 +- ...-machine-learning-applied-ml-leadership.md | 2 +- ...p-engineering-to-freelance-data-science.md | 2 +- ...tive-ai-chatbots-in-production-security.md | 2 +- ...data-analytics-and-data-engineering-job.md | 2 +- ...data-engineering-job-prep-and-interview.md | 2 +- _podcast/get-data-scientist-job.md | 2 +- ...junior-data-job-and-transferable-skills.md | 2 +- ...managing-data-science-teams-in-b2b-saas.md | 2 +- .../hiring-data-scientists-and-analysts.md | 2 +- ...ing-for-data-engineering-jobs-in-europe.md | 2 +- ...ence-jobs-interview-questions-skills.md.md | 2 +- _podcast/how-to-break-into-data-science.md | 2 +- .../how-to-grow-your-ml-engineering-career.md | 2 +- _podcast/how-to-stand-out-in-data-science.md | 2 +- ...to-switch-to-ml-tech-without-experience.md | 2 +- ...on-into-ml-and-data-engineering-from-qa.md | 2 +- ...ng-face-contributions-and-nlp-portfolio.md | 2 +- ...entered-ai-automatic-speech-recognition.md | 2 +- ...man-centered-mlops-and-model-monitoring.md | 2 +- ...-small-data-production-machine-learning.md | 2 +- _podcast/interpretable-machine-learning.md | 2 +- ...nvesting-in-open-source-developer-tools.md | 2 +- ...y-in-tech-projects-skills-cv-networking.md | 2 +- ...ndmaster-to-production-ml-and-education.md | 2 +- ...edge-graphs-and-llms-for-automotive-rnd.md | 2 +- ...data-product-adoption-modern-data-stack.md | 2 +- _podcast/launch-and-build-retail-startup.md | 2 +- _podcast/lean-mlops-for-startups.md | 2 +- ...ine-learning-self-taught-bioinformatics.md | 2 +- ...ne-learning-data-science-interview-prep.md | 2 +- .../machine-learning-decision-optimization.md | 2 +- ...g-engineering-production-best-practices.md | 2 +- ...for-asteroid-mining-and-water-detection.md | 2 +- ...ting-attribution-marketing-mix-modeling.md | 2 +- ...achine-learning-system-design-interview.md | 2 +- ...oney-with-machine-learning-roles-skills.md | 2 +- ...tech-how-to-find-and-become-a-mentor.md.md | 2 +- ...ndful-data-strategy-for-business-impact.md | 2 +- ...l-engineering-kpis-and-metrics-strategy.md | 2 +- ...uct-manager-and-mlops-platform-strategy.md | 2 +- _podcast/ml-system-design.md | 2 +- .../mlops-and-ml-engineering-in-finance.md | 2 +- ...mlops-at-scale-reproducibility-adoption.md | 2 +- .../mlops-community-building-and-meetups.md | 2 +- ...ture-stores-feature-stores-feast-tecton.md | 2 +- _podcast/mlops-kubeflow-model-monitoring.md | 2 +- ...ops-model-monitoring-data-observability.md | 2 +- ...elines-orchestration-ingestion-modeling.md | 2 +- ...ector-databases-llms-semantic-retrieval.md | 2 +- ...set-creation-annotation-tools-workflows.md | 2 +- .../nlp-team-hiring-and-production-mlops.md | 2 +- ...earning-freelancing-and-public-learning.md | 2 +- ...teering-in-ai-for-data-ml-career-growth.md | 2 +- _podcast/open-source-ml-contributions.md | 2 +- ...e-ml-tools-strategy-and-business-models.md | 2 +- ...turned-into-career-and-startup-creation.md | 2 +- .../personal-brand-for-data-professionals.md | 2 +- ...-to-data-science-lead-career-transition.md | 2 +- ...rel-demofirst-education-and-open-source.md | 2 +- ...-ai-consulting-from-expertise-to-impact.md | 2 +- _podcast/practical-llm-engineering-and-rag.md | 2 +- ...ical-llm-use-cases-and-product-patterns.md | 2 +- _podcast/pragmatic-and-standardized-mlops.md | 2 +- ...roduct-designer-to-data-product-manager.md | 2 +- ...duction-ml-mlops-and-data-team-building.md | 2 +- ...duction-ml-pipelines-with-aws-and-kafka.md | 2 +- ...-vector-search-embeddings-hybrid search.md | 2 +- _podcast/production-ready-ai-engineering.md | 2 +- _podcast/project-manager-to-data-scientist.md | 2 +- .../public-speaking-for-data-scientists.md | 2 +- ...neering-work-and-building-iot-platforms.md | 2 +- ...search-to-production-ml-systems-roadmap.md | 2 +- ...sponsible-explainable-ai-bias-detection.md | 2 +- ...ngineering-teams-self-service-platforms.md | 2 +- ...enterprise-ai-mlops-data-first-strategy.md | 2 +- ...ftware-engineering-for-machine-learning.md | 2 +- _podcast/solopreneur-data-scientist.md | 2 +- ...preneur-developer-and-data-professional.md | 2 +- ...aching-mentoring-data-analytics-fintech.md | 2 +- ...n-science-coding-practices-for-academia.md | 2 +- .../technical-writing-for-data-scientists.md | 2 +- ...ng-to-tesla-full-stack-data-engineering.md | 2 +- _podcast/trends-in-modern-data-engineering.md | 2 +- _podcast/urban-data-science.md | 2 +- ...machine-learning-concepts-to-explain-ml.md | 2 +- ...b-testing-and-product-experimentation.jpg} | Bin ...n-healthcare-and-digital-therapeutics.jpg} | Bin ...id-cloud-on-prem-distributed-training.jpg} | Bin ...ml-product-design-and-experimentation.jpg} | Bin ...ding-with-python-and-machine-learning.jpg} | Bin ...orithms-data-structures-for-engineers.jpg} | Bin ...pg => analytics-engineer-skills-tools.jpg} | Bin ...to-data-science-with-kaggle-portfolio.jpg} | Bin ...esearch-and-career-growth-in-practice.jpg} | Bin ...bayesian-modeling-workflows-and-tools.jpg} | Bin ...ancer.jpg => becoming-data-freelancer.jpg} | Bin ...g-data-analytics-and-postdoc-research.jpg} | Bin ...> big-data-engineer-vs-data-scientist.jpg} | Bin ...-for-data-scientists-and-ml-engineers.jpg} | Bin ...atics-worflows-tools-and-data-science.jpg} | Bin ...ineering-tooling-retrieval-evaluation.jpg} | Bin ...> building-ai-digital-health-startups.jpg} | Bin ...d-scaling-ai-data-products-with-mlops.jpg} | Bin ...gineering-systems-for-fraud-detection.jpg} | Bin ...-science-practice-industrial-ai-mlops.jpg} | Bin ...jpg => building-and-scaling-data-team.jpg} | Bin ...ucts-product-owner-vs-product-manager.jpg} | Bin ...ocratizing-high-performance-computing.jpg} | Bin ...ing-ds-team.jpg => building-data-team.jpg} | Bin ...uilding-domestic-risk-assessment-tool.jpg} | Bin ...lainable-and-actionable-ai-ml-systems.jpg} | Bin ...g-healthcare-machine-learning-systems.jpg} | Bin ...mmunities-diversity-and-career-growth.jpg} | Bin ...startup.jpg => building-mlops-startup.jpg} | Bin ...-data-product-for-identity-resolution.jpg} | Bin ....jpg => building-open-source-nlp-tool.jpg} | Bin ...production-ml-platform-and-mlops-team.jpg} | Bin ...=> building-production-search-systems.jpg} | Bin ...and-reliable-machine-learning-systems.jpg} | Bin ...causal-inference-for-machine-learning.jpg} | Bin ...-officer-data-strategy-and-org-design.jpg} | Bin ...vernance.jpg => cloud-data-governance.jpg} | Bin ...nity-building-and-teaching-in-ai-tech.jpg} | Bin .../{s01e02-processes.jpg => crisp-dm.jpg} | Bin ...3-data-centric-ai.jpg => data-centric.jpg} | Bin ...siness-pricing-and-client-acquisition.jpg} | Bin ...ta-engineering-career-path-and-skills.jpg} | Bin ...-leadership-and-modern-data-platforms.jpg} | Bin ...a-engineering-tools-modern-data-stack.jpg} | Bin ...-market-demand-and-client-acquisition.jpg} | Bin ...ata-governance-data-access-management.jpg} | Bin ...w-behavioral-and-portfolio-prep-guide.jpg} | Bin ...ism-python-visualization-storytelling.jpg} | Bin ...ching.jpg => data-leadership-coaching.jpg} | Bin ...growth-event-tracking-and-reverse-etl.jpg} | Bin ...hitecture-decentralized-data-products.jpg} | Bin ...acy-engineering-gdpr-machine-learning.jpg} | Bin ...professionals-business-skills-in-saas.jpg} | Bin ...y-data-observability-data-reliability.jpg} | Bin ...nalytics-for-nonprofits-tech-for-good.jpg} | Bin ... => data-science-career-abc-framework.jpg} | Bin ...ta-science-failures-and-mlops-lessons.jpg} | Bin ...ublic-policy-ethical-ai-social-impact.jpg} | Bin ...> data-science-interview-and-cv-guide.jpg} | Bin ...ce-job-red-flags-and-mismatched-roles.jpg} | Bin ... data-science-leadership-hiring-mlops.jpg} | Bin ...management-and-agile-machine-learning.jpg} | Bin ...cience-manager-vs-expert-hiring-guide.jpg} | Bin ...science-team-structure-and-org-design.jpg} | Bin ...ie-hacker-bootstrapping-side-projects.jpg} | Bin ...y-and-dataops-for-ai-powered-products.jpg} | Bin .../{s01e01-roles.jpg => data-team-roles.jpg} | Bin ...ata-translator-role-and-data-strategy.jpg} | Bin ...-gitops-best-practices-for-data-teams.jpg} | Bin ...utomation-and-reliable-data-pipelines.jpg} | Bin ...s.jpg => dataops-for-data-engineering.jpg} | Bin ...rinciples-and-scalable-data-platforms.jpg} | Bin ...sclub-building-scaling-data-community.jpg} | Bin ...le-data-community-3-years-anniversary.jpg} | Bin ...atatalksclub-scaling-and-free-courses.jpg} | Bin ...fine-tuning-retrieval-open-source-api.jpg} | Bin ...eloper-personal-brand-learn-in-public.jpg} | Bin ...devrel-data-science-open-source-tools.jpg} | Bin ...> devrel-open-source-machine-learning.jpg} | Bin ....jpg => fairness-in-ai-ml-engineering.jpg} | Bin ...-model-monitoring-and-data-governance.jpg} | Bin ...nops.jpg => finops-for-data-engineers.jpg} | Bin ...-data-engineering-pricing-and-clients.jpg} | Bin ...pg => freelancing-in-machine-learning.jpg} | Bin ...engineer-interviews-and-career-growth.jpg} | Bin ...earch-to-data-engineering-freelancing.jpg} | Bin ...n-source-computer-vision-transformers.jpg} | Bin ...ion-research-to-autonomous-driving-ai.jpg} | Bin ...ancer-to-startup-open-source-products.jpg} | Bin ...g-automation-open-source-volunteering.jpg} | Bin ...g => from-game-ai-to-modern-ai-agents.jpg} | Bin ...engineering-to-leading-data-architect.jpg} | Bin ...science-research-software-engineering.jpg} | Bin ...ics-engineering-sql-dbt-career-switch.jpg} | Bin ... from-math-graduate-to-data-analytics.jpg} | Bin ...-to-computer-vision-career-transition.jpg} | Bin ...machine-learning-and-data-engineering.jpg} | Bin ...ctor-data-to-applied-machine-learning.jpg} | Bin ...software-engineer-to-machine-learning.jpg} | Bin ...cience-to-data-engineering-leadership.jpg} | Bin ...neering-to-leading-data-science-teams.jpg} | Bin ...achine-learning-applied-ml-leadership.jpg} | Bin ...engineering-to-freelance-data-science.jpg} | Bin ...ve-ai-chatbots-in-production-security.jpg} | Bin ...ta-analytics-and-data-engineering-job.jpg} | Bin ...ta-engineering-job-prep-and-interview.jpg} | Bin ...ientist.jpg => get-data-scientist-job.jpg} | Bin ...nior-data-job-and-transferable-skills.jpg} | Bin ...naging-data-science-teams-in-b2b-saas.jpg} | Bin ...> hiring-data-scientists-and-analysts.jpg} | Bin ...g-for-data-engineering-jobs-in-europe.jpg} | Bin ...ce-jobs-interview-questions-skills.md.jpg} | Bin ...jpg => how-to-break-into-data-science.jpg} | Bin ...ow-to-grow-your-ml-engineering-career.jpg} | Bin ...g => how-to-stand-out-in-data-science.jpg} | Bin ...-switch-to-ml-tech-without-experience.jpg} | Bin ...-into-ml-and-data-engineering-from-qa.jpg} | Bin ...-face-contributions-and-nlp-portfolio.jpg} | Bin ...tered-ai-automatic-speech-recognition.jpg} | Bin ...n-centered-mlops-and-model-monitoring.jpg} | Bin ...mall-data-production-machine-learning.jpg} | Bin ...jpg => interpretable-machine-learning.jpg} | Bin ...esting-in-open-source-developer-tools.jpg} | Bin ...in-tech-projects-skills-cv-networking.jpg} | Bin ...master-to-production-ml-and-education.jpg} | Bin ...ge-graphs-and-llms-for-automotive-rnd.jpg} | Bin ...ta-product-adoption-modern-data-stack.jpg} | Bin ...pg => launch-and-build-retail-startup.jpg} | Bin ...artups.jpg => lean-mlops-for-startups.jpg} | Bin ...e-learning-self-taught-bioinformatics.jpg} | Bin ...-learning-data-science-interview-prep.jpg} | Bin ...achine-learning-decision-optimization.jpg} | Bin ...engineering-production-best-practices.jpg} | Bin ...r-asteroid-mining-and-water-detection.jpg} | Bin ...ng-attribution-marketing-mix-modeling.jpg} | Bin ...hine-learning-system-design-interview.jpg} | Bin ...ey-with-machine-learning-roles-skills.jpg} | Bin ...ch-how-to-find-and-become-a-mentor.md.jpg} | Bin ...ful-data-strategy-for-business-impact.jpg} | Bin ...engineering-kpis-and-metrics-strategy.jpg} | Bin ...t-manager-and-mlops-platform-strategy.jpg} | Bin ...design-broken.jpg => ml-system-design.jpg} | Bin ...> mlops-and-ml-engineering-in-finance.jpg} | Bin ...ops-at-scale-reproducibility-adoption.jpg} | Bin ... mlops-community-building-and-meetups.jpg} | Bin ...re-stores-feature-stores-feast-tecton.jpg} | Bin ...pg => mlops-kubeflow-model-monitoring.jpg} | Bin ...s-model-monitoring-data-observability.jpg} | Bin ...ines-orchestration-ingestion-modeling.jpg} | Bin ...tor-databases-llms-semantic-retrieval.jpg} | Bin ...t-creation-annotation-tools-workflows.jpg} | Bin ... nlp-team-hiring-and-production-mlops.jpg} | Bin ...rning-freelancing-and-public-learning.jpg} | Bin ...ering-in-ai-for-data-ml-career-growth.jpg} | Bin ...e.jpg => open-source-ml-contributions.jpg} | Bin ...ml-tools-strategy-and-business-models.jpg} | Bin ...rned-into-career-and-startup-creation.jpg} | Bin ...personal-brand-for-data-professionals.jpg} | Bin ...o-data-science-lead-career-transition.jpg} | Bin ...l-demofirst-education-and-open-source.jpg} | Bin ...i-consulting-from-expertise-to-impact.jpg} | Bin ... => practical-llm-engineering-and-rag.jpg} | Bin ...al-llm-use-cases-and-product-patterns.jpg} | Bin ...g => pragmatic-and-standardized-mlops.jpg} | Bin ...duct-designer-to-data-product-manager.jpg} | Bin ...ction-ml-mlops-and-data-team-building.jpg} | Bin ...ction-ml-pipelines-with-aws-and-kafka.jpg} | Bin ...ector-search-embeddings-hybrid search.jpg} | Bin ...pg => production-ready-ai-engineering.jpg} | Bin ... => project-manager-to-data-scientist.jpg} | Bin ...> public-speaking-for-data-scientists.jpg} | Bin ...ering-work-and-building-iot-platforms.jpg} | Bin ...arch-to-production-ml-systems-roadmap.jpg} | Bin ...onsible-explainable-ai-bias-detection.jpg} | Bin ...ineering-teams-self-service-platforms.jpg} | Bin ...terprise-ai-mlops-data-first-strategy.jpg} | Bin ...ware-engineering-for-machine-learning.jpg} | Bin ...ups.jpg => solopreneur-data-scientist.jpg} | Bin ...eneur-developer-and-data-professional.jpg} | Bin ...hing-mentoring-data-analytics-fintech.jpg} | Bin ...science-coding-practices-for-academia.jpg} | Bin ...technical-writing-for-data-scientists.jpg} | Bin ...-to-tesla-full-stack-data-engineering.jpg} | Bin ... => trends-in-modern-data-engineering.jpg} | Bin ...able-cities.jpg => urban-data-science.jpg} | Bin ...chine-learning-concepts-to-explain-ml.jpg} | Bin podcast-errors-found.md | 242 ------------------ scripts/podcast-rename-mapping.md | 192 ++++++++++++++ scripts/podcasts.txt | 21 -- scripts/podcasts2.txt | 189 -------------- scripts/rename_podcast_images.py | 184 +++++++++++++ scripts/timestamps.txt | 20 -- 379 files changed, 563 insertions(+), 659 deletions(-) rename images/podcast/{s07e06-ab-testing.jpg => ab-testing-and-product-experimentation.jpg} (100%) rename images/podcast/{s08e04-machine-learning-and-personalization-in-healthcare.jpg => ai-in-healthcare-and-digital-therapeutics.jpg} (100%) rename images/podcast/{s20e01-trends-in-ai-infrastructure.jpg => ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg} (100%) rename images/podcast/{s08e03-innovation-and-design-for-machine-learning.jpg => ai-ml-product-design-and-experimentation.jpg} (100%) rename images/podcast/{s17e03-stock-market-analysis-with-python-and-machine-learning.jpg => algorithmic-trading-with-python-and-machine-learning.jpg} (100%) rename images/podcast/{s05e01-mastering-algorithms-and-data-structures.jpg => algorithms-data-structures-for-engineers.jpg} (100%) rename images/podcast/{s03e11-analytics-engineer.jpg => analytics-engineer-skills-tools.jpg} (100%) rename images/podcast/{s03e02-from-analytics-to-data-science.jpg => analytics-to-data-science-with-kaggle-portfolio.jpg} (100%) rename images/podcast/{s20e07-build-strong-career-in-data.jpg => applied-llm-research-and-career-growth-in-practice.jpg} (100%) rename images/podcast/{s17e04-bayesian-modeling-and-probabilistic-programming.jpg => bayesian-modeling-workflows-and-tools.jpg} (100%) rename images/podcast/{s16e09-become-data-freelancer.jpg => becoming-data-freelancer.jpg} (100%) rename images/podcast/{s06e05-post-doctoral-research.jpg => big-data-analytics-and-postdoc-research.jpg} (100%) rename images/podcast/{s04e03-big-data-engineer-vs-data-scientist.jpg => big-data-engineer-vs-data-scientist.jpg} (100%) rename images/podcast/{s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg => biohacking-productivity-for-data-scientists-and-ml-engineers.jpg} (100%) rename images/podcast/{s22e03-from-biotechnology-to-bioinformatics-software.jpg => bioinformatics-worflows-tools-and-data-science.jpg} (100%) rename images/podcast/{s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg => building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg} (100%) rename images/podcast/{s16e08-ai-for-digital-health.jpg => building-ai-digital-health-startups.jpg} (100%) rename images/podcast/{s07e03-product-management-essentials.jpg => building-and-scaling-ai-data-products-with-mlops.jpg} (100%) rename images/podcast/{s15e09-data-engineering-for-fraud-prevention.jpg => building-and-scaling-data-engineering-systems-for-fraud-detection.jpg} (100%) rename images/podcast/{s11e05-building-data-science-practice.jpg => building-and-scaling-data-science-practice-industrial-ai-mlops.jpg} (100%) rename images/podcast/{s05e06-building-and-leading-data-teams.jpg => building-and-scaling-data-team.jpg} (100%) rename images/podcast/{s11e06-product-owners-in-data-science.jpg => building-data-products-product-owner-vs-product-manager.jpg} (100%) rename images/podcast/{s10e08-leading-data-research.jpg => building-data-science-programs-and-democratizing-high-performance-computing.jpg} (100%) rename images/podcast/{s01e03-building-ds-team.jpg => building-data-team.jpg} (100%) rename images/podcast/{s18e07-building-domestic-risk-assessment-tool.jpg => building-domestic-risk-assessment-tool.jpg} (100%) rename images/podcast/{s14e09-interpretable-ai-and-ml.jpg => building-explainable-and-actionable-ai-ml-systems.jpg} (100%) rename images/podcast/{s16e02-bridging-data-science-and-healthcare.jpg => building-healthcare-machine-learning-systems.jpg} (100%) rename images/podcast/{s13e01-accelerating-adoption-of-ai-through-diversity.jpg => building-ml-communities-diversity-and-career-growth.jpg} (100%) rename images/podcast/{s04e04-ml-startup.jpg => building-mlops-startup.jpg} (100%) rename images/podcast/{s11e04-large-scale-entity-resolution.jpg => building-open-source-data-product-for-identity-resolution.jpg} (100%) rename images/podcast/{s13e09-building-open-source-nlp-tool.jpg => building-open-source-nlp-tool.jpg} (100%) rename images/podcast/{s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg => building-production-ml-platform-and-mlops-team.jpg} (100%) rename images/podcast/{s17e09-building-production-search-systems.jpg => building-production-search-systems.jpg} (100%) rename images/podcast/{s14e01-building-scalable-and-reliable-machine-learning-systems.jpg => building-scalable-and-reliable-machine-learning-systems.jpg} (100%) rename images/podcast/{s15e06-democratizing-causality.jpg => causal-inference-for-machine-learning.jpg} (100%) rename images/podcast/{s04e09-chief-data-officer.jpg => chief-data-officer-data-strategy-and-org-design.jpg} (100%) rename images/podcast/{s03e10-data-governance.jpg => cloud-data-governance.jpg} (100%) rename images/podcast/{s18e05-community-building-and-teaching-in-ai-tech.jpg => community-building-and-teaching-in-ai-tech.jpg} (100%) rename images/podcast/{s01e02-processes.jpg => crisp-dm.jpg} (100%) rename images/podcast/{s12e03-data-centric-ai.jpg => data-centric.jpg} (100%) rename images/podcast/{s13e04-starting-consultancy-in-data-space.jpg => data-consulting-business-pricing-and-client-acquisition.jpg} (100%) rename images/podcast/{s08e08-teaching-data-engineers.jpg => data-engineering-career-path-and-skills.jpg} (100%) rename images/podcast/{s07e07-becoming-a-data-engineering-manager.jpg => data-engineering-leadership-and-modern-data-platforms.jpg} (100%) rename images/podcast/{s05e02-data-engineering-acronyms.jpg => data-engineering-tools-modern-data-stack.jpg} (100%) rename images/podcast/{s20e09-taking-your-freelance-career-to-next-level.jpg => data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg} (100%) rename images/podcast/{s14e04-data-access-management.jpg => data-governance-data-access-management.jpg} (100%) rename images/podcast/{s06e02-non-technical-interviews.jpg => data-interview-behavioral-and-portfolio-prep-guide.jpg} (100%) rename images/podcast/{s11e08-technical-writing-and-data-journalism.jpg => data-journalism-python-visualization-storytelling.jpg} (100%) rename images/podcast/{s18e01-inclusive-data-leadership-coaching.jpg => data-leadership-coaching.jpg} (100%) rename images/podcast/{s03e08-data-led-professional.jpg => data-led-growth-event-tracking-and-reverse-etl.jpg} (100%) rename images/podcast/{s10e06-data-mesh-101.jpg => data-mesh-architecture-decentralized-data-products.jpg} (100%) rename images/podcast/{s14e02-practical-data-privacy.jpg => data-privacy-engineering-gdpr-machine-learning.jpg} (100%) rename images/podcast/{s12e02-business-skills-for-data-professionals.jpg => data-professionals-business-skills-in-saas.jpg} (100%) rename images/podcast/{s03e03-data-observability.jpg => data-quality-data-observability-data-reliability.jpg} (100%) rename images/podcast/{s13e02-analytics-for-better-world.jpg => data-science-and-analytics-for-nonprofits-tech-for-good.jpg} (100%) rename images/podcast/{s02e07-abc-data-science.jpg => data-science-career-abc-framework.jpg} (100%) rename images/podcast/{s03e09-what-data-scientists-dont-mention.jpg => data-science-failures-and-mlops-lessons.jpg} (100%) rename images/podcast/{s10e01-data-science-for-social-impact.jpg => data-science-for-public-policy-ethical-ai-social-impact.jpg} (100%) rename images/podcast/{s03e04-interviewing-300-data-scientists.jpg => data-science-interview-and-cv-guide.jpg} (100%) rename images/podcast/{s10e02-decoding-data-science-job-descriptions.jpg => data-science-job-red-flags-and-mismatched-roles.jpg} (100%) rename images/podcast/{s06e09-data-science-manager.jpg => data-science-leadership-hiring-mlops.jpg} (100%) rename images/podcast/{s13e06-secret-sauce-of-data-science-management.jpg => data-science-management-and-agile-machine-learning.jpg} (100%) rename images/podcast/{s06e03-manager-vs-expert.jpg => data-science-manager-vs-expert-hiring-guide.jpg} (100%) rename images/podcast/{s09e07-designing-data-science-organization.jpg => data-science-team-structure-and-org-design.jpg} (100%) rename images/podcast/{s12e05-indie-hacking.jpg => data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg} (100%) rename images/podcast/{s14e03-data-strategy-key-principles-and-best-practices.jpg => data-strategy-and-dataops-for-ai-powered-products.jpg} (100%) rename images/podcast/{s01e01-roles.jpg => data-team-roles.jpg} (100%) rename images/podcast/{s03e04-effective-communication-with-business.jpg => data-translator-role-and-data-strategy.jpg} (100%) rename images/podcast/{s11e03-from-data-science-to-dataops.jpg => dataops-and-gitops-best-practices-for-data-teams.jpg} (100%) rename images/podcast/{s08e05-storytime-for-dataops.jpg => dataops-automation-and-reliable-data-pipelines.jpg} (100%) rename images/podcast/{s18e09-dataops-observability-and-cure-for-data-team-blues.jpg => dataops-for-data-engineering.jpg} (100%) rename images/podcast/{s02e11-dataops.jpg => dataops-principles-and-scalable-data-platforms.jpg} (100%) rename images/podcast/{s07e01-datatalksclub-behind-the-scenes.jpg => datatalksclub-building-scaling-data-community.jpg} (100%) rename images/podcast/{s16e01-datatalks-club-anniversary-interview.jpg => datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg} (100%) rename images/podcast/{s19e03-datatalks-club-anniversary-podcast.jpg => datatalksclub-scaling-and-free-courses.jpg} (100%) rename images/podcast/{s15e03-llms-for-everyone.jpg => deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg} (100%) rename images/podcast/{s03e07-market-yourself.jpg => developer-personal-brand-learn-in-public.jpg} (100%) rename images/podcast/{s02e02-developer-advocacy.jpg => devrel-data-science-open-source-tools.jpg} (100%) rename images/podcast/{s14e06-data-developer-relations.jpg => devrel-open-source-machine-learning.jpg} (100%) rename images/podcast/{s19e09-linguistics-and-fairness.jpg => fairness-in-ai-ml-engineering.jpg} (100%) rename images/podcast/{s05e09-business-acumen.jpg => feature-engineering-model-monitoring-and-data-governance.jpg} (100%) rename images/podcast/{s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg => finops-for-data-engineers.jpg} (100%) rename images/podcast/{s09e04-freelancing-and-consulting-with-data-engineering.jpg => freelance-data-engineering-pricing-and-clients.jpg} (100%) rename images/podcast/{s04e08-freelancing.jpg => freelancing-in-machine-learning.jpg} (100%) rename images/podcast/{s12e09-staff-ai-engineer.jpg => from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg} (100%) rename images/podcast/{s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg => from-academic-research-to-data-engineering-freelancing.jpg} (100%) rename images/podcast/{s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg => from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg} (100%) rename images/podcast/{s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg => from-computer-vision-research-to-autonomous-driving-ai.jpg} (100%) rename images/podcast/{s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg => from-data-freelancer-to-startup-open-source-products.jpg} (100%) rename images/podcast/{s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg => from-devops-to-data-engineering-automation-open-source-volunteering.jpg} (100%) rename images/podcast/{s21e07-lessons-from-two-decades-of-ai.jpg => from-game-ai-to-modern-ai-agents.jpg} (100%) rename images/podcast/{s15e08-from-data-manager-to-data-architect.jpg => from-iot-data-engineering-to-leading-data-architect.jpg} (100%) rename images/podcast/{s19e05-large-hadron-collider-and-mentorship.jpg => from-large-hadron-collider-to-data-science-research-software-engineering.jpg} (100%) rename images/podcast/{s11e07-from-digital-marketing-to-analytics-engineering.jpg => from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg} (100%) rename images/podcast/{s07e09-from-math-teacher-to-analytics-engineer.jpg => from-math-graduate-to-data-analytics.jpg} (100%) rename images/podcast/{s03e06-from-physics-to-machine-learning.jpg => from-physics-to-computer-vision-career-transition.jpg} (100%) rename images/podcast/{s21e05-from-astronomy-to-applied-ml.jpg => from-radio-astronomy-to-machine-learning-and-data-engineering.jpg} (100%) rename images/podcast/{s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg => from-semiconductor-data-to-applied-machine-learning.jpg} (100%) rename images/podcast/{s04e01-from-swe-to-ml.jpg => from-software-engineer-to-machine-learning.jpg} (100%) rename images/podcast/{s07e08-from-data-science-to-data-engineering.jpg => from-software-engineering-data-science-to-data-engineering-leadership.jpg} (100%) rename images/podcast/{s12e01-from-software-engineer-to-data-science-manager.jpg => from-software-engineering-to-leading-data-science-teams.jpg} (100%) rename images/podcast/{s16e06-unwritten-rules-for-success-in-machine-learning.jpg => from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg} (100%) rename images/podcast/{s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg => from-startup-engineering-to-freelance-data-science.jpg} (100%) rename images/podcast/{s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg => generative-ai-chatbots-in-production-security.jpg} (100%) rename images/podcast/{s08e09-from-academia-to-data-analytics-and-engineering.jpg => get-data-analytics-and-data-engineering-job.jpg} (100%) rename images/podcast/{s09e03-getting-data-engineering-job-(summary-and-q&a).jpg => get-data-engineering-job-prep-and-interview.jpg} (100%) rename images/podcast/{s01e04-standing-out-as-a-data-scientist.jpg => get-data-scientist-job.jpg} (100%) rename images/podcast/{s07e04-career-coaching.jpg => get-junior-data-job-and-transferable-skills.jpg} (100%) rename images/podcast/{s11e02-data-science-career-development.jpg => hiring-and-managing-data-science-teams-in-b2b-saas.jpg} (100%) rename images/podcast/{s07e02-recruiting-data-professionals.jpg => hiring-data-scientists-and-analysts.jpg} (100%) rename images/podcast/{s08e06-recruiting-data-engineers.jpg => hiring-for-data-engineering-jobs-in-europe.jpg} (100%) rename images/podcast/{s09e09-hiring-data-science-talent.jpg => hiring-for-data-science-jobs-interview-questions-skills.md.jpg} (100%) rename images/podcast/{s09e05-data-scientists-at-work.jpg => how-to-break-into-data-science.jpg} (100%) rename images/podcast/{s12e07-navigating-career-changes-in-machine-learning.jpg => how-to-grow-your-ml-engineering-career.jpg} (100%) rename images/podcast/{s08e02-hacking-your-data-career.jpg => how-to-stand-out-in-data-science.jpg} (100%) rename images/podcast/{s08e07-from-roasting-coffee-to-backend-development.jpg => how-to-switch-to-ml-tech-without-experience.jpg} (100%) rename images/podcast/{s11e01-from-testing-phones-to-managing-nlp-projects.jpg => how-to-transition-into-ml-and-data-engineering-from-qa.jpg} (100%) rename images/podcast/{s09e06-developer-advocacy-engineer-for-open-source.jpg => hugging-face-contributions-and-nlp-portfolio.jpg} (100%) rename images/podcast/{s19e02-human-centered-ai-for-disordered-speech-recognition.jpg => human-centered-ai-automatic-speech-recognition.jpg} (100%) rename images/podcast/{s04e06-humans-in-the-loop.jpg => human-centered-mlops-and-model-monitoring.jpg} (100%) rename images/podcast/{s13e08-navigating-industrial-data-challenges.jpg => industrial-data-small-data-production-machine-learning.jpg} (100%) rename images/podcast/{s16e07-cracking-code-machine-learning-made-understandable.jpg => interpretable-machine-learning.jpg} (100%) rename images/podcast/{s15e02-investing-in-open-source-data-tools.jpg => investing-in-open-source-developer-tools.jpg} (100%) rename images/podcast/{s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg => job-search-strategy-in-tech-projects-skills-cv-networking.jpg} (100%) rename images/podcast/{s20e02-competitive-machine-learning-and-teaching.jpg => kaggle-grandmaster-to-production-ml-and-education.jpg} (100%) rename images/podcast/{s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg => knowledge-graphs-and-llms-for-automotive-rnd.jpg} (100%) rename images/podcast/{s05e08-the-last-mile-in-data.jpg => last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg} (100%) rename images/podcast/{s04e07-launching-a-startup.jpg => launch-and-build-retail-startup.jpg} (100%) rename images/podcast/{s20e04-mlops-in-corporations-and-startups.jpg => lean-mlops-for-startups.jpg} (100%) rename images/podcast/{s13e07-mastering-self-learning-in-machine-learning.jpg => learning-machine-learning-self-taught-bioinformatics.jpg} (100%) rename images/podcast/{s12e06-preparing-for-data-science-interview.jpg => machine-learning-data-science-interview-prep.jpg} (100%) rename images/podcast/{s02e06-decision-optimization.jpg => machine-learning-decision-optimization.jpg} (100%) rename images/podcast/{s04e05-running-from-complexity.jpg => machine-learning-engineering-production-best-practices.jpg} (100%) rename images/podcast/{s09e02-using-data-for-asteroid-mining.jpg => machine-learning-for-asteroid-mining-and-water-detection.jpg} (100%) rename images/podcast/{s09e01-machine-learning-in-marketing.jpg => machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg} (100%) rename images/podcast/{s07e05-machine-learning-system-design-interview.jpg => machine-learning-system-design-interview.jpg} (100%) rename images/podcast/{s02e09-roles-skills-monetizing-ml.jpg => make-money-with-machine-learning-roles-skills.jpg} (100%) rename images/podcast/{s01e05-mentoring.jpg => mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg} (100%) rename images/podcast/{s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg => mindful-data-strategy-for-business-impact.jpg} (100%) rename images/podcast/{s05e03-metrics-and-kpis.jpg => ml-engineering-kpis-and-metrics-strategy.jpg} (100%) rename images/podcast/{s06e07-product-management-for-machine-learning.jpg => ml-product-manager-and-mlops-platform-strategy.jpg} (100%) rename images/podcast/{s15e01-why-machine-learning-design-broken.jpg => ml-system-design.jpg} (100%) rename images/podcast/{s17e05-machine-learning-engineering-in-finance.jpg => mlops-and-ml-engineering-in-finance.jpg} (100%) rename images/podcast/{s19e04-mlops-as-team.jpg => mlops-at-scale-reproducibility-adoption.jpg} (100%) rename images/podcast/{s02e12-communities.jpg => mlops-community-building-and-meetups.jpg} (100%) rename images/podcast/{s02e05-feature-stores.jpg => mlops-feature-stores-feature-stores-feast-tecton.jpg} (100%) rename images/podcast/{s02e04-mlops.jpg => mlops-kubeflow-model-monitoring.jpg} (100%) rename images/podcast/{s10e03-mlops-architect.jpg => mlops-model-monitoring-data-observability.jpg} (100%) rename images/podcast/{s14e07-from-mlops-to-dataops.jpg => modern-data-pipelines-orchestration-ingestion-modeling.jpg} (100%) rename images/podcast/{s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg => modern-search-systems-vector-databases-llms-semantic-retrieval.jpg} (100%) rename images/podcast/{s10e07-dataset-creation-and-curation.jpg => nlp-dataset-creation-annotation-tools-workflows.jpg} (100%) rename images/podcast/{s06e08-nlp-teams.jpg => nlp-team-hiring-and-production-mlops.jpg} (100%) rename images/podcast/{s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg => nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg} (100%) rename images/podcast/{s17e07-make-impact-through-volunteering-open-source-work.jpg => open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg} (100%) rename images/podcast/{s02e03-open-source.jpg => open-source-ml-contributions.jpg} (100%) rename images/podcast/{s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg => open-source-ml-tools-strategy-and-business-models.jpg} (100%) rename images/podcast/{s09e08-from-open-source-maintainer-to-founder.jpg => open-source-turned-into-career-and-startup-creation.jpg} (100%) rename images/podcast/{s02e08-personal-branding.jpg => personal-brand-for-data-professionals.jpg} (100%) rename images/podcast/{s06e06-from-academia-to-industry.jpg => postdoc-to-data-science-lead-career-transition.jpg} (100%) rename images/podcast/{s20e08-from-hackathons-to-developer-advocacy.jpg => practical-devrel-demofirst-education-and-open-source.jpg} (100%) rename images/podcast/{s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg => practical-generative-ai-consulting-from-expertise-to-impact.jpg} (100%) rename images/podcast/{s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg => practical-llm-engineering-and-rag.jpg} (100%) rename images/podcast/{s15e04-good-bad-and-ugly-of-gpt.jpg => practical-llm-use-cases-and-product-patterns.jpg} (100%) rename images/podcast/{s15e07-pragmatic-and-standardized-mlops.jpg => pragmatic-and-standardized-mlops.jpg} (100%) rename images/podcast/{s06e04-becoming-a-data-product-manager.jpg => product-designer-to-data-product-manager.jpg} (100%) rename images/podcast/{s05e07-ml-vs-analytics.jpg => production-ml-mlops-and-data-team-building.jpg} (100%) rename images/podcast/{s04e02-build-your-own-data-pipeline.jpg => production-ml-pipelines-with-aws-and-kafka.jpg} (100%) rename images/podcast/{s17e08-building-machine-learning-products.jpg => production-ml-search-vector-search-embeddings-hybrid search.jpg} (100%) rename images/podcast/{s20e05-data-intensive-ai.jpg => production-ready-ai-engineering.jpg} (100%) rename images/podcast/{s03e01-from-pm-to-ds.jpg => project-manager-to-data-scientist.jpg} (100%) rename images/podcast/{s02e10-public-speaking.jpg => public-speaking-for-data-scientists.jpg} (100%) rename images/podcast/{s15e05-mastering-data-engineering-as-remote-worker.jpg => remote-data-engineering-work-and-building-iot-platforms.jpg} (100%) rename images/podcast/{s05e05-researchers-vs-engineers.jpg => research-to-production-ml-systems-roadmap.jpg} (100%) rename images/podcast/{s10e09-responsible-and-explainable-ai.jpg => responsible-explainable-ai-bias-detection.jpg} (100%) rename images/podcast/{s10e05-growing-data-engineering-team-in-scale-up.jpg => scaling-data-engineering-teams-self-service-platforms.jpg} (100%) rename images/podcast/{s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg => scaling-enterprise-ai-mlops-data-first-strategy.jpg} (100%) rename images/podcast/{s13e05-se4ml-software-engineering-for-machine-learning.jpg => software-engineering-for-machine-learning.jpg} (100%) rename images/podcast/{s05e04-introducing-data-science-in-startups.jpg => solopreneur-data-scientist.jpg} (100%) rename images/podcast/{s06e01-solopreneur.jpg => solopreneur-developer-and-data-professional.jpg} (100%) rename images/podcast/{s11e09-teaching-and-mentoring-in-data-analytics.jpg => teaching-mentoring-data-analytics-fintech.jpg} (100%) rename images/podcast/{s12e04-doing-software-engineering-in-academia.jpg => teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg} (100%) rename images/podcast/{s02e01-writing.jpg => technical-writing-for-data-scientists.jpg} (100%) rename images/podcast/{s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg => theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg} (100%) rename images/podcast/{s20e03-trends-in-data-engineering.jpg => trends-in-modern-data-engineering.jpg} (100%) rename images/podcast/{s19e01-using-data-to-create-liveable-cities.jpg => urban-data-science.jpg} (100%) rename images/podcast/{s08e01-visualising-machine-learning.jpg => visualizing-machine-learning-concepts-to-explain-ml.jpg} (100%) delete mode 100644 podcast-errors-found.md create mode 100644 scripts/podcast-rename-mapping.md delete mode 100644 scripts/podcasts.txt delete mode 100644 scripts/podcasts2.txt create mode 100755 scripts/rename_podcast_images.py delete mode 100644 scripts/timestamps.txt diff --git a/_podcast/ab-testing-and-product-experimentation.md b/_podcast/ab-testing-and-product-experimentation.md index a4b72513..5c19830b 100644 --- a/_podcast/ab-testing-and-product-experimentation.md +++ b/_podcast/ab-testing-and-product-experimentation.md @@ -5,7 +5,7 @@ season: 7 episode: 6 guests: - jakobgraff -image: images/podcast/s07e06-ab-testing.jpg +image: images/podcast/ab-testing-and-product-experimentation.jpg ids: anchor: AB-Testing---Jakob-Graff-e1eq73v youtube: 0Gqx1LtqRZU diff --git a/_podcast/ai-for-ecology-biodiversity-and-conservation.md b/_podcast/ai-for-ecology-biodiversity-and-conservation.md index 7fa385ef..6d4cff5f 100644 --- a/_podcast/ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/ai-for-ecology-biodiversity-and-conservation.md @@ -6,7 +6,7 @@ season: 18 episode: 3 guests: - tanyabergerwolf -image: images/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.jpg +image: images/podcast/ai-for-ecology-biodiversity-and-conservation.jpg ids: anchor: datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi youtube: 30tTrozbAkg diff --git a/_podcast/ai-in-healthcare-and-digital-therapeutics.md b/_podcast/ai-in-healthcare-and-digital-therapeutics.md index 0df232db..5d23b7f3 100644 --- a/_podcast/ai-in-healthcare-and-digital-therapeutics.md +++ b/_podcast/ai-in-healthcare-and-digital-therapeutics.md @@ -5,7 +5,7 @@ season: 8 episode: 4 guests: - stefangudmundsson -image: images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg +image: images/podcast/ai-in-healthcare-and-digital-therapeutics.jpg ids: anchor: Machine-Learning-and-Personalization-in-Healthcare---Stefan-Gudmundsson-e1h5gdg youtube: IDzhmmKeNG4 diff --git a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md index ecf3cb27..06e76c14 100644 --- a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md +++ b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md @@ -6,7 +6,7 @@ season: 20 episode: 1 guests: - andreycheptsov -image: images/podcast/s20e01-trends-in-ai-infrastructure.jpg +image: images/podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg ids: anchor: datatalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 youtube: 1aMuynlLM3o diff --git a/_podcast/ai-ml-product-design-and-experimentation.md b/_podcast/ai-ml-product-design-and-experimentation.md index e5145a56..494b6ec4 100644 --- a/_podcast/ai-ml-product-design-and-experimentation.md +++ b/_podcast/ai-ml-product-design-and-experimentation.md @@ -5,7 +5,7 @@ season: 8 episode: 3 guests: - liesbethdingemans -image: images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg +image: images/podcast/ai-ml-product-design-and-experimentation.jpg ids: anchor: Innovation-and-Design-for-Machine-Learning---Liesbeth-Dingemans-e1gq0en youtube: tcqBfZw41FM diff --git a/_podcast/algorithmic-trading-with-python-and-machine-learning.md b/_podcast/algorithmic-trading-with-python-and-machine-learning.md index bac49ead..86432a7c 100644 --- a/_podcast/algorithmic-trading-with-python-and-machine-learning.md +++ b/_podcast/algorithmic-trading-with-python-and-machine-learning.md @@ -5,7 +5,7 @@ season: 17 episode: 3 guests: - ivanbrigida -image: images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg +image: images/podcast/algorithmic-trading-with-python-and-machine-learning.jpg ids: anchor: datatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 youtube: NThHAEIazFk diff --git a/_podcast/algorithms-data-structures-for-engineers.md b/_podcast/algorithms-data-structures-for-engineers.md index 027c0795..cdf3ab4b 100644 --- a/_podcast/algorithms-data-structures-for-engineers.md +++ b/_podcast/algorithms-data-structures-for-engineers.md @@ -5,7 +5,7 @@ season: 5 episode: 1 guests: - marcellolarocca -image: images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg +image: images/podcast/algorithms-data-structures-for-engineers.jpg ids: youtube: RiQa-9LguW8 anchor: Mastering-Algorithms-and-Data-Structures---Marcello-La-Rocca-e16s7lf diff --git a/_podcast/analytics-engineer-skills-tools.md b/_podcast/analytics-engineer-skills-tools.md index f0795f23..0d124ef2 100644 --- a/_podcast/analytics-engineer-skills-tools.md +++ b/_podcast/analytics-engineer-skills-tools.md @@ -5,7 +5,7 @@ season: 3 episode: 11 guests: - victoriaperezmola -image: images/podcast/s03e11-analytics-engineer.jpg +image: images/podcast/analytics-engineer-skills-tools.jpg ids: youtube: C5UcxBwdCEg anchor: Analytics-Engineer-New-Role-in-a-Data-Team---Victoria-Perez-Mola-e131e3n diff --git a/_podcast/analytics-to-data-science-with-kaggle-portfolio.md b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md index 4e8c1188..13ec1681 100644 --- a/_podcast/analytics-to-data-science-with-kaggle-portfolio.md +++ b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md @@ -5,7 +5,7 @@ season: 3 episode: 2 guests: - andradaolteanu -image: images/podcast/s03e02-from-analytics-to-data-science.jpg +image: images/podcast/analytics-to-data-science-with-kaggle-portfolio.jpg ids: youtube: ixmTewD5Waw anchor: Shifting-Career-from-Analytics-to-Data-Science---Andrada-Olteanu-ev19ma diff --git a/_podcast/applied-llm-research-and-career-growth-in-practice.md b/_podcast/applied-llm-research-and-career-growth-in-practice.md index 3303d0e2..a6aa69ae 100644 --- a/_podcast/applied-llm-research-and-career-growth-in-practice.md +++ b/_podcast/applied-llm-research-and-career-growth-in-practice.md @@ -6,7 +6,7 @@ season: 20 episode: 7 guests: - lavanyagupta -image: images/podcast/s20e07-build-strong-career-in-data.jpg +image: images/podcast/applied-llm-research-and-career-growth-in-practice.jpg ids: anchor: datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p youtube: ekG5zJioyFs diff --git a/_podcast/bayesian-modeling-workflows-and-tools.md b/_podcast/bayesian-modeling-workflows-and-tools.md index 29fad801..f4dc49f5 100644 --- a/_podcast/bayesian-modeling-workflows-and-tools.md +++ b/_podcast/bayesian-modeling-workflows-and-tools.md @@ -5,7 +5,7 @@ season: 17 episode: 4 guests: - robzinkov -image: images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg +image: images/podcast/bayesian-modeling-workflows-and-tools.jpg ids: anchor: datatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 youtube: kcKvUSInm-M diff --git a/_podcast/becoming-data-freelancer.md b/_podcast/becoming-data-freelancer.md index a8c34a59..1a18ff95 100644 --- a/_podcast/becoming-data-freelancer.md +++ b/_podcast/becoming-data-freelancer.md @@ -5,7 +5,7 @@ season: 16 episode: 9 guests: - dimitrivisnadi -image: images/podcast/s16e09-become-data-freelancer.jpg +image: images/podcast/becoming-data-freelancer.jpg ids: anchor: datatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 youtube: R_EnSa9aZtE diff --git a/_podcast/big-data-analytics-and-postdoc-research.md b/_podcast/big-data-analytics-and-postdoc-research.md index cf301358..04da239a 100644 --- a/_podcast/big-data-analytics-and-postdoc-research.md +++ b/_podcast/big-data-analytics-and-postdoc-research.md @@ -5,7 +5,7 @@ season: 6 episode: 5 guests: - elenitziritazacharatou -image: images/podcast/s06e05-post-doctoral-research.jpg +image: images/podcast/big-data-analytics-and-postdoc-research.jpg ids: youtube: 7jgmIQGMhGE anchor: Advancing-Big-Data-Analytics-Post-Doctoral-Research---Eleni-Tzirita-Zacharatou-e1b6f41 diff --git a/_podcast/big-data-engineer-vs-data-scientist.md b/_podcast/big-data-engineer-vs-data-scientist.md index fd598dd8..da33386f 100644 --- a/_podcast/big-data-engineer-vs-data-scientist.md +++ b/_podcast/big-data-engineer-vs-data-scientist.md @@ -5,7 +5,7 @@ season: 4 episode: 3 guests: - roksolanadiachuk -image: images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg +image: images/podcast/big-data-engineer-vs-data-scientist.jpg ids: youtube: yg3d1lFd7Uo anchor: Big-Data-Engineer-vs-Data-Scientist---Roksolana-Diachuk-e139sl8 diff --git a/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md index 8b5e0d58..570cf309 100644 --- a/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md +++ b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md @@ -5,7 +5,7 @@ season: 13 episode: 3 guests: - ruslanshchuchkin -image: images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg +image: images/podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.jpg ids: anchor: ow/datatalksclub/episodes/Biohacking-for-Data-Scientists-and-ML-Engineers---Ruslan-Shchuchkin-e1vpm1i youtube: uyxUBADZYpU diff --git a/_podcast/bioinformatics-worflows-tools-and-data-science.md b/_podcast/bioinformatics-worflows-tools-and-data-science.md index c72f2157..da9c1268 100644 --- a/_podcast/bioinformatics-worflows-tools-and-data-science.md +++ b/_podcast/bioinformatics-worflows-tools-and-data-science.md @@ -7,7 +7,7 @@ season: 22 episode: 3 guests: - sebastianayalaruano -image: images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg +image: images/podcast/bioinformatics-worflows-tools-and-data-science.jpg ids: anchor: datatalksclub/episodes/From-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-RuanoFrom-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-Ruano-e39vsv6 youtube: ZFrcrTtnB1Q diff --git a/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md index ff5f9337..e1bd1c28 100644 --- a/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md +++ b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md @@ -6,7 +6,7 @@ season: 22 episode: 1 guests: - ranjithakulkarni -image: images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg +image: images/podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg ids: anchor: datatalksclub/episodes/Building-reliable-AI-products-in-the-era-of-Gen-AI-and-Agents---Ranjitha-Kulkarni-e396m2u youtube: x2AAjqz2XmM diff --git a/_podcast/building-ai-digital-health-startups.md b/_podcast/building-ai-digital-health-startups.md index 83e3e5b1..4dc4670b 100644 --- a/_podcast/building-ai-digital-health-startups.md +++ b/_podcast/building-ai-digital-health-startups.md @@ -5,7 +5,7 @@ season: 16 episode: 8 guests: - mariabruckert -image: images/podcast/s16e08-ai-for-digital-health.jpg +image: images/podcast/building-ai-digital-health-startups.jpg ids: anchor: datatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc youtube: whpkDmVVGUE diff --git a/_podcast/building-and-scaling-ai-data-products-with-mlops.md b/_podcast/building-and-scaling-ai-data-products-with-mlops.md index 2ba5714c..81937fc0 100644 --- a/_podcast/building-and-scaling-ai-data-products-with-mlops.md +++ b/_podcast/building-and-scaling-ai-data-products-with-mlops.md @@ -5,7 +5,7 @@ season: 7 episode: 3 guests: - gregcoquillo -image: images/podcast/s07e03-product-management-essentials.jpg +image: images/podcast/building-and-scaling-ai-data-products-with-mlops.jpg ids: youtube: p4wg0Vd2uD4 anchor: Product-Management-Essentials-for-Data-Professionals---Greg-Coquillo-e1dr8g5 diff --git a/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md index 2340cf6e..10ab967b 100644 --- a/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md +++ b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md @@ -5,7 +5,7 @@ season: 15 episode: 9 guests: - angelaramirez -image: images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg +image: images/podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.jpg ids: anchor: datatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab youtube: ZXNKjrrKU_I diff --git a/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md index 37cce8cb..044ed50e 100644 --- a/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md +++ b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md @@ -5,7 +5,7 @@ season: 11 episode: 5 guests: - andreyshtylenko -image: images/podcast/s11e05-building-data-science-practice.jpg +image: images/podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.jpg ids: anchor: Building-Data-Science-Practice---Andrey-Shtylenko-e1q2ka6 youtube: XbDQv8FTA4U diff --git a/_podcast/building-and-scaling-data-team.md b/_podcast/building-and-scaling-data-team.md index 588d6324..36c3fe13 100644 --- a/_podcast/building-and-scaling-data-team.md +++ b/_podcast/building-and-scaling-data-team.md @@ -5,7 +5,7 @@ season: 5 episode: 6 guests: - tammyliang -image: images/podcast/s05e06-building-and-leading-data-teams.jpg +image: images/podcast/building-and-scaling-data-team.jpg ids: youtube: kI4V2iBbaH0 anchor: Building-and-Leading-Data-Teams---Tammy-Liang-e18efdl diff --git a/_podcast/building-data-products-product-owner-vs-product-manager.md b/_podcast/building-data-products-product-owner-vs-product-manager.md index 2012e1b3..3d96ac82 100644 --- a/_podcast/building-data-products-product-owner-vs-product-manager.md +++ b/_podcast/building-data-products-product-owner-vs-product-manager.md @@ -5,7 +5,7 @@ season: 11 episode: 6 guests: - annahannemann -image: images/podcast/s11e06-product-owners-in-data-science.jpg +image: images/podcast/building-data-products-product-owner-vs-product-manager.jpg ids: anchor: Product-Owners-in-Data-Science---Anna-Hannemann-e1q0ord youtube: rTRTjB6cGng diff --git a/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md index e915e828..2900165f 100644 --- a/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md +++ b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md @@ -5,7 +5,7 @@ season: 10 episode: 8 guests: - davidbader -image: images/podcast/s10e08-leading-data-research.jpg +image: images/podcast/building-data-science-programs-and-democratizing-high-performance-computing.jpg ids: anchor: Leading-Data-Research---David-Bader-e1nmt3r youtube: vZLlpsUlchQ diff --git a/_podcast/building-data-team.md b/_podcast/building-data-team.md index 790b88f3..e265dc27 100644 --- a/_podcast/building-data-team.md +++ b/_podcast/building-data-team.md @@ -5,7 +5,7 @@ season: 1 episode: 3 guests: - dattran -image: images/podcast/s01e03-building-ds-team.jpg +image: images/podcast/building-data-team.jpg ids: youtube: ScDIB-3O77A anchor: Building-a-Data-Science-Team---Dat-Tran-enlmef diff --git a/_podcast/building-domestic-risk-assessment-tool.md b/_podcast/building-domestic-risk-assessment-tool.md index 2bd37544..da8d6455 100644 --- a/_podcast/building-domestic-risk-assessment-tool.md +++ b/_podcast/building-domestic-risk-assessment-tool.md @@ -6,7 +6,7 @@ season: 18 episode: 7 guests: - sabinafirtala -image: images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg +image: images/podcast/building-domestic-risk-assessment-tool.jpg ids: anchor: datatalksclub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i youtube: CpWlBAmD9ok diff --git a/_podcast/building-explainable-and-actionable-ai-ml-systems.md b/_podcast/building-explainable-and-actionable-ai-ml-systems.md index 80fe420c..ae75e85d 100644 --- a/_podcast/building-explainable-and-actionable-ai-ml-systems.md +++ b/_podcast/building-explainable-and-actionable-ai-ml-systems.md @@ -5,7 +5,7 @@ season: 14 episode: 9 guests: - polinamosolova -image: images/podcast/s14e09-interpretable-ai-and-ml.jpg +image: images/podcast/building-explainable-and-actionable-ai-ml-systems.jpg ids: anchor: datatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq youtube: EQcY83VA0Us diff --git a/_podcast/building-healthcare-machine-learning-systems.md b/_podcast/building-healthcare-machine-learning-systems.md index 7aa89a23..21b46d46 100644 --- a/_podcast/building-healthcare-machine-learning-systems.md +++ b/_podcast/building-healthcare-machine-learning-systems.md @@ -6,7 +6,7 @@ season: 16 episode: 2 guests: - elenistamatelou -image: images/podcast/s16e02-bridging-data-science-and-healthcare.jpg +image: images/podcast/building-healthcare-machine-learning-systems.jpg ids: anchor: datatalksclub/episodes/Bridging-Data-Science-and-Healthcare---Eleni-Stamatelou-e2aegvc youtube: pDOwlulDh0c diff --git a/_podcast/building-ml-communities-diversity-and-career-growth.md b/_podcast/building-ml-communities-diversity-and-career-growth.md index 26c06c45..7a88fb09 100644 --- a/_podcast/building-ml-communities-diversity-and-career-growth.md +++ b/_podcast/building-ml-communities-diversity-and-career-growth.md @@ -5,7 +5,7 @@ season: 13 episode: 1 guests: - daniameira -image: images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg +image: images/podcast/building-ml-communities-diversity-and-career-growth.jpg ids: anchor: Accelerating-the-Adoption-of-AI-through-Diversity---Dnia-Meira-e1v9obp youtube: SRUwwvk_YCk diff --git a/_podcast/building-mlops-startup.md b/_podcast/building-mlops-startup.md index 0b1b0d76..0ed65837 100644 --- a/_podcast/building-mlops-startup.md +++ b/_podcast/building-mlops-startup.md @@ -5,7 +5,7 @@ season: 4 episode: 4 guests: - elenasamuylova -image: images/podcast/s04e04-ml-startup.jpg +image: images/podcast/building-mlops-startup.jpg ids: youtube: DiDs5aMjEWg anchor: I-Want-to-Build-a-Machine-Learning-Startup----Elena-Samuylova-e139ste diff --git a/_podcast/building-open-source-data-product-for-identity-resolution.md b/_podcast/building-open-source-data-product-for-identity-resolution.md index 7cb333b5..e547bb66 100644 --- a/_podcast/building-open-source-data-product-for-identity-resolution.md +++ b/_podcast/building-open-source-data-product-for-identity-resolution.md @@ -5,7 +5,7 @@ season: 11 episode: 4 guests: - sonalgoyal -image: images/podcast/s11e04-large-scale-entity-resolution.jpg +image: images/podcast/building-open-source-data-product-for-identity-resolution.jpg ids: anchor: Large-Scale-Entity-Resolution---Sonal-Goyal-e1pibrh youtube: lpjffCOPxlY diff --git a/_podcast/building-open-source-nlp-tool.md b/_podcast/building-open-source-nlp-tool.md index 24b903c3..b019be9f 100644 --- a/_podcast/building-open-source-nlp-tool.md +++ b/_podcast/building-open-source-nlp-tool.md @@ -5,7 +5,7 @@ season: 13 episode: 9 guests: - johanneshotter -image: images/podcast/s13e09-building-open-source-nlp-tool.jpg +image: images/podcast/building-open-source-nlp-tool.jpg ids: anchor: ow/datatalksclub/episodes/Building-an-Open-Source-NLP-Tool---Johannes-Htter-e22lbn4 youtube: WIpnyiHp4IE diff --git a/_podcast/building-production-ml-platform-and-mlops-team.md b/_podcast/building-production-ml-platform-and-mlops-team.md index 242dabd3..516d8a22 100644 --- a/_podcast/building-production-ml-platform-and-mlops-team.md +++ b/_podcast/building-production-ml-platform-and-mlops-team.md @@ -5,7 +5,7 @@ season: 14 episode: 8 guests: - simonstiebellehner -image: images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg +image: images/podcast/building-production-ml-platform-and-mlops-team.jpg ids: anchor: datatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c youtube: CB1YIsxQRtc diff --git a/_podcast/building-production-search-systems.md b/_podcast/building-production-search-systems.md index ae1c5a93..0fb14dcd 100644 --- a/_podcast/building-production-search-systems.md +++ b/_podcast/building-production-search-systems.md @@ -5,7 +5,7 @@ season: 17 episode: 9 guests: - danielsvonava -image: images/podcast/s17e09-building-production-search-systems.jpg +image: images/podcast/building-production-search-systems.jpg ids: anchor: datatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh youtube: gEmSrknGKDE diff --git a/_podcast/building-scalable-and-reliable-machine-learning-systems.md b/_podcast/building-scalable-and-reliable-machine-learning-systems.md index 1b0b2bad..fe23edc6 100644 --- a/_podcast/building-scalable-and-reliable-machine-learning-systems.md +++ b/_podcast/building-scalable-and-reliable-machine-learning-systems.md @@ -5,7 +5,7 @@ season: 14 episode: 1 guests: - arsenykravchenko -image: images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg +image: images/podcast/building-scalable-and-reliable-machine-learning-systems.jpg ids: anchor: datatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q youtube: i-pIdekjUow diff --git a/_podcast/causal-inference-for-machine-learning.md b/_podcast/causal-inference-for-machine-learning.md index cae678ba..80fca817 100644 --- a/_podcast/causal-inference-for-machine-learning.md +++ b/_podcast/causal-inference-for-machine-learning.md @@ -5,7 +5,7 @@ season: 15 episode: 6 guests: - aleksandermolak -image: images/podcast/s15e06-democratizing-causality.jpg +image: images/podcast/causal-inference-for-machine-learning.jpg ids: anchor: datatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh youtube: 0I2FHH95Ofs diff --git a/_podcast/chief-data-officer-data-strategy-and-org-design.md b/_podcast/chief-data-officer-data-strategy-and-org-design.md index cff5d375..51e4c02c 100644 --- a/_podcast/chief-data-officer-data-strategy-and-org-design.md +++ b/_podcast/chief-data-officer-data-strategy-and-org-design.md @@ -5,7 +5,7 @@ season: 4 episode: 9 guests: - marcodesa -image: images/podcast/s04e09-chief-data-officer.jpg +image: images/podcast/chief-data-officer-data-strategy-and-org-design.jpg ids: youtube: IdaZOD46FEw anchor: Chief-Data-Officer---Marco-De-Sa-e16hm4t diff --git a/_podcast/cloud-data-governance.md b/_podcast/cloud-data-governance.md index 03433dab..c41b20a3 100644 --- a/_podcast/cloud-data-governance.md +++ b/_podcast/cloud-data-governance.md @@ -6,7 +6,7 @@ episode: 10 guests: - jessiashdown - urigilad -image: images/podcast/s03e10-data-governance.jpg +image: images/podcast/cloud-data-governance.jpg ids: youtube: tJ3v8h7A7RY anchor: Data-Governance---Jessi-Ashdown--Uri-Gilad-e12jmoo diff --git a/_podcast/community-building-and-teaching-in-ai-tech.md b/_podcast/community-building-and-teaching-in-ai-tech.md index 1559f5fa..7f25df9f 100644 --- a/_podcast/community-building-and-teaching-in-ai-tech.md +++ b/_podcast/community-building-and-teaching-in-ai-tech.md @@ -6,7 +6,7 @@ season: 18 episode: 5 guests: - erumafzal -image: images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg +image: images/podcast/community-building-and-teaching-in-ai-tech.jpg ids: anchor: datatalksclub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r youtube: 7SLd5V7z3xQ diff --git a/_podcast/crisp-dm.md b/_podcast/crisp-dm.md index 8f477053..6a3c6db5 100644 --- a/_podcast/crisp-dm.md +++ b/_podcast/crisp-dm.md @@ -5,7 +5,7 @@ season: 1 episode: 2 guests: - alexeygrigorev -image: images/podcast/s01e02-processes.jpg +image: images/podcast/crisp-dm.jpg ids: youtube: SesVTDklFYQ anchor: Processes-in-a-Data-Science-Project---Alexey-Grigorev-encdlg diff --git a/_podcast/data-centric.md b/_podcast/data-centric.md index 8eb40cb7..4a5aa45a 100644 --- a/_podcast/data-centric.md +++ b/_podcast/data-centric.md @@ -5,7 +5,7 @@ season: 12 episode: 3 guests: - marysiawinkels -image: images/podcast/s12e03-data-centric-ai.jpg +image: images/podcast/data-centric.jpg ids: anchor: Data-Centric-AI---Marysia-Winkels-e1shctn youtube: t3HDdVWQzNM diff --git a/_podcast/data-consulting-business-pricing-and-client-acquisition.md b/_podcast/data-consulting-business-pricing-and-client-acquisition.md index 06bacd54..f7de5e57 100644 --- a/_podcast/data-consulting-business-pricing-and-client-acquisition.md +++ b/_podcast/data-consulting-business-pricing-and-client-acquisition.md @@ -5,7 +5,7 @@ season: 13 episode: 4 guests: - aleksanderkruszelnicki -image: images/podcast/s13e04-starting-consultancy-in-data-space.jpg +image: images/podcast/data-consulting-business-pricing-and-client-acquisition.jpg ids: anchor: ow/datatalksclub/episodes/Starting-a-Consultancy-in-the-Data-Space---Aleksander-Kruszelnicki-e203c8g youtube: rh_pE35m3vE diff --git a/_podcast/data-engineering-career-path-and-skills.md b/_podcast/data-engineering-career-path-and-skills.md index 6f26f7e6..4f8e6a3c 100644 --- a/_podcast/data-engineering-career-path-and-skills.md +++ b/_podcast/data-engineering-career-path-and-skills.md @@ -5,7 +5,7 @@ season: 8 episode: 8 guests: - jeffkatz -image: images/podcast/s08e08-teaching-data-engineers.jpg +image: images/podcast/data-engineering-career-path-and-skills.jpg ids: anchor: Teaching-Data-Engineers---Jeff-Katz-e1iaoru youtube: dFo10l8B6Go diff --git a/_podcast/data-engineering-leadership-and-modern-data-platforms.md b/_podcast/data-engineering-leadership-and-modern-data-platforms.md index a50fc5e1..9dd76f72 100644 --- a/_podcast/data-engineering-leadership-and-modern-data-platforms.md +++ b/_podcast/data-engineering-leadership-and-modern-data-platforms.md @@ -5,7 +5,7 @@ season: 7 episode: 7 guests: - 16rahuljain -image: images/podcast/s07e07-becoming-a-data-engineering-manager.jpg +image: images/podcast/data-engineering-leadership-and-modern-data-platforms.jpg ids: anchor: Becoming-a-Data-Engineering-Manager---Rahul-Jain-e1f5nvf youtube: FljnbUQ796w diff --git a/_podcast/data-engineering-tools-modern-data-stack.md b/_podcast/data-engineering-tools-modern-data-stack.md index cdde41b0..aea7fc15 100644 --- a/_podcast/data-engineering-tools-modern-data-stack.md +++ b/_podcast/data-engineering-tools-modern-data-stack.md @@ -5,7 +5,7 @@ season: 5 episode: 2 guests: - nataliekwong -image: images/podcast/s05e02-data-engineering-acronyms.jpg +image: images/podcast/data-engineering-tools-modern-data-stack.jpg ids: youtube: t9Z1S3OYnJU anchor: Making-Sense-of-Data-Engineering-Acronyms-and-Buzzwords---Natalie-Kwong-e177303 diff --git a/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md index 56fd9648..c7f4a0c5 100644 --- a/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md +++ b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md @@ -5,7 +5,7 @@ season: 20 episode: 9 guests: - dimitrivisnadi -image: images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg +image: images/podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg ids: anchor: datatalksclub/episodes/Can-You-Quit-Your-Job-and-Still-Succeed-as-a-Data-Freelancer-e360j7e youtube: S93V8RgwBig diff --git a/_podcast/data-governance-data-access-management.md b/_podcast/data-governance-data-access-management.md index 4f82c4da..f116b3a0 100644 --- a/_podcast/data-governance-data-access-management.md +++ b/_podcast/data-governance-data-access-management.md @@ -5,7 +5,7 @@ season: 14 episode: 4 guests: - bartvandekerckhove -image: images/podcast/s14e04-data-access-management.jpg +image: images/podcast/data-governance-data-access-management.jpg ids: anchor: ow/datatalksclub/episodes/Data-Access-Management---Bart-Vandekerckhove-e253r4u youtube: IiPOIiUy5b4 diff --git a/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md index 48a845e6..181e0325 100644 --- a/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md +++ b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md @@ -5,7 +5,7 @@ season: 6 episode: 2 guests: - nicksingh -image: images/podcast/s06e02-non-technical-interviews.jpg +image: images/podcast/data-interview-behavioral-and-portfolio-prep-guide.jpg ids: youtube: tRdLVUKU7Bo anchor: Ace-Non-Technical-Data-Science-Interviews---Nick-Singh-e1a5qtd diff --git a/_podcast/data-journalism-python-visualization-storytelling.md b/_podcast/data-journalism-python-visualization-storytelling.md index d2104536..b51074f9 100644 --- a/_podcast/data-journalism-python-visualization-storytelling.md +++ b/_podcast/data-journalism-python-visualization-storytelling.md @@ -5,7 +5,7 @@ season: 11 episode: 8 guests: - angelicaloduca -image: images/podcast/s11e08-technical-writing-and-data-journalism.jpg +image: images/podcast/data-journalism-python-visualization-storytelling.jpg ids: anchor: Technical-Writing-and-Data-Journalism---Angelica-Lo-Duca-e1r7j8k youtube: uO_lk12q02A diff --git a/_podcast/data-leadership-coaching.md b/_podcast/data-leadership-coaching.md index 39e440ab..9538bcd1 100644 --- a/_podcast/data-leadership-coaching.md +++ b/_podcast/data-leadership-coaching.md @@ -5,7 +5,7 @@ season: 18 episode: 1 guests: - terezaiofciu -image: images/podcast/s18e01-inclusive-data-leadership-coaching.jpg +image: images/podcast/data-leadership-coaching.jpg ids: youtube: Z4vOTgzLkJQ links: diff --git a/_podcast/data-led-growth-event-tracking-and-reverse-etl.md b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md index ef835b8a..eda167d6 100644 --- a/_podcast/data-led-growth-event-tracking-and-reverse-etl.md +++ b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md @@ -5,7 +5,7 @@ season: 3 episode: 8 guests: - arpitchoudhury -image: images/podcast/s03e08-data-led-professional.jpg +image: images/podcast/data-led-growth-event-tracking-and-reverse-etl.jpg ids: youtube: 8v5KpHWgyYw anchor: Becoming-a-Data-led-Professional---Arpit-Choudhury-e11mkgq diff --git a/_podcast/data-mesh-architecture-decentralized-data-products.md b/_podcast/data-mesh-architecture-decentralized-data-products.md index 27c7095e..4f77c1a0 100644 --- a/_podcast/data-mesh-architecture-decentralized-data-products.md +++ b/_podcast/data-mesh-architecture-decentralized-data-products.md @@ -5,7 +5,7 @@ season: 10 episode: 6 guests: - zhamakdehghani -image: images/podcast/s10e06-data-mesh-101.jpg +image: images/podcast/data-mesh-architecture-decentralized-data-products.jpg ids: anchor: Data-Mesh-101---Zhamak-Dehghani-e1n7vlk youtube: 346N_pCtYZU diff --git a/_podcast/data-privacy-engineering-gdpr-machine-learning.md b/_podcast/data-privacy-engineering-gdpr-machine-learning.md index b289b6fd..934e9de2 100644 --- a/_podcast/data-privacy-engineering-gdpr-machine-learning.md +++ b/_podcast/data-privacy-engineering-gdpr-machine-learning.md @@ -5,7 +5,7 @@ season: 14 episode: 2 guests: - katharinejarmul -image: images/podcast/s14e02-practical-data-privacy.jpg +image: images/podcast/data-privacy-engineering-gdpr-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Practical-Data-Privacy---Katharine-Jarmul-e23u551 youtube: gbjoFfrm4iw diff --git a/_podcast/data-professionals-business-skills-in-saas.md b/_podcast/data-professionals-business-skills-in-saas.md index 34b10f8e..84717f1f 100644 --- a/_podcast/data-professionals-business-skills-in-saas.md +++ b/_podcast/data-professionals-business-skills-in-saas.md @@ -5,7 +5,7 @@ season: 12 episode: 2 guests: - lorismarini -image: images/podcast/s12e02-business-skills-for-data-professionals.jpg +image: images/podcast/data-professionals-business-skills-in-saas.jpg ids: anchor: Business-Skills-for-Data-Professionals---Loris-Marini-e1s89hu youtube: xMYRUiTu960 diff --git a/_podcast/data-quality-data-observability-data-reliability.md b/_podcast/data-quality-data-observability-data-reliability.md index 833e3fab..df44bbcd 100644 --- a/_podcast/data-quality-data-observability-data-reliability.md +++ b/_podcast/data-quality-data-observability-data-reliability.md @@ -5,7 +5,7 @@ season: 3 episode: 3 guests: - barrmoses -image: images/podcast/s03e03-data-observability.jpg +image: images/podcast/data-quality-data-observability-data-reliability.jpg ids: youtube: TrMG1SOqZkQ anchor: Data-Observability---Barr-Moses-evghmh diff --git a/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md index 6f734daa..47d4b8b0 100644 --- a/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md +++ b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md @@ -5,7 +5,7 @@ season: 13 episode: 2 guests: - parvathykrishnan -image: images/podcast/s13e02-analytics-for-better-world.jpg +image: images/podcast/data-science-and-analytics-for-nonprofits-tech-for-good.jpg ids: anchor: Analytics-for-a-Better-World---Parvathy-Krishnan-e1vo27h youtube: b6x5zZ3C6sQ diff --git a/_podcast/data-science-career-abc-framework.md b/_podcast/data-science-career-abc-framework.md index 189e5132..2847dc66 100644 --- a/_podcast/data-science-career-abc-framework.md +++ b/_podcast/data-science-career-abc-framework.md @@ -5,7 +5,7 @@ season: 2 episode: 7 guests: - dannyma -image: images/podcast/s02e07-abc-data-science.jpg +image: images/podcast/data-science-career-abc-framework.jpg ids: youtube: HVQ0DZOQcts anchor: The-ABCs-of-Data-Science---Danny-Ma-er33oa diff --git a/_podcast/data-science-failures-and-mlops-lessons.md b/_podcast/data-science-failures-and-mlops-lessons.md index a59121d5..04c6447a 100644 --- a/_podcast/data-science-failures-and-mlops-lessons.md +++ b/_podcast/data-science-failures-and-mlops-lessons.md @@ -5,7 +5,7 @@ season: 3 episode: 9 guests: - yurykashnitsky -image: images/podcast/s03e09-what-data-scientists-dont-mention.jpg +image: images/podcast/data-science-failures-and-mlops-lessons.jpg ids: youtube: c6dK1LWpv4g anchor: What-Data-Scientists-Dont-Mention-in-Their-LinkedIn-Profiles---Yury-Kashnitsky-e125jjl diff --git a/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md index 3dcbacb3..ee90c2da 100644 --- a/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md +++ b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md @@ -5,7 +5,7 @@ season: 10 episode: 1 guests: - christinecepelak -image: images/podcast/s10e01-data-science-for-social-impact.jpg +image: images/podcast/data-science-for-public-policy-ethical-ai-social-impact.jpg ids: anchor: Data-Science-for-Social-Impact---Christine-Cepelak-e1li47e youtube: xWC1HAfekRk diff --git a/_podcast/data-science-interview-and-cv-guide.md b/_podcast/data-science-interview-and-cv-guide.md index b425ca5a..02e171bc 100644 --- a/_podcast/data-science-interview-and-cv-guide.md +++ b/_podcast/data-science-interview-and-cv-guide.md @@ -5,7 +5,7 @@ season: 3 episode: 4 guests: - olegnovikov -image: images/podcast/s03e04-interviewing-300-data-scientists.jpg +image: images/podcast/data-science-interview-and-cv-guide.jpg ids: youtube: AYi7b-8GPm4 anchor: What-I-Learned-After-Interviewing-300-Data-Scientists---Oleg-Novikov-e10ctbs diff --git a/_podcast/data-science-job-red-flags-and-mismatched-roles.md b/_podcast/data-science-job-red-flags-and-mismatched-roles.md index 1a9ec781..388c3347 100644 --- a/_podcast/data-science-job-red-flags-and-mismatched-roles.md +++ b/_podcast/data-science-job-red-flags-and-mismatched-roles.md @@ -5,7 +5,7 @@ season: 10 episode: 2 guests: - terezaiofciu -image: images/podcast/s10e02-decoding-data-science-job-descriptions.jpg +image: images/podcast/data-science-job-red-flags-and-mismatched-roles.jpg ids: anchor: Decoding-Data-Science-Job-Descriptions---Tereza-Iofciu-e1m079l youtube: bqxBiIwtmX4 diff --git a/_podcast/data-science-leadership-hiring-mlops.md b/_podcast/data-science-leadership-hiring-mlops.md index 3b20b64b..ad7cc2c3 100644 --- a/_podcast/data-science-leadership-hiring-mlops.md +++ b/_podcast/data-science-leadership-hiring-mlops.md @@ -5,7 +5,7 @@ season: 6 episode: 9 guests: - marianosemelman -image: images/podcast/s06e09-data-science-manager.jpg +image: images/podcast/data-science-leadership-hiring-mlops.jpg ids: youtube: qOLR84-KHoY anchor: Becoming-a-Data-Science-Manager---Mariano-Semelman-e1cbrf7 diff --git a/_podcast/data-science-management-and-agile-machine-learning.md b/_podcast/data-science-management-and-agile-machine-learning.md index 67c6efdc..b065a4c0 100644 --- a/_podcast/data-science-management-and-agile-machine-learning.md +++ b/_podcast/data-science-management-and-agile-machine-learning.md @@ -5,7 +5,7 @@ season: 13 episode: 6 guests: - shirmeirlador -image: images/podcast/s13e06-secret-sauce-of-data-science-management.jpg +image: images/podcast/data-science-management-and-agile-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/The-Secret-Sauce-of-Data-Science-Management---Shir-Meir-Lador-e21cu92 youtube: gcxP0qRO-MY diff --git a/_podcast/data-science-manager-vs-expert-hiring-guide.md b/_podcast/data-science-manager-vs-expert-hiring-guide.md index 29f1571c..be5180ed 100644 --- a/_podcast/data-science-manager-vs-expert-hiring-guide.md +++ b/_podcast/data-science-manager-vs-expert-hiring-guide.md @@ -5,7 +5,7 @@ season: 6 episode: 3 guests: - barbarasobkowiak -image: images/podcast/s06e03-manager-vs-expert.jpg +image: images/podcast/data-science-manager-vs-expert-hiring-guide.jpg ids: youtube: hFmIgaN-F8Y anchor: Data-Science-Manager-vs-Data-Science-Expert---Barbara-Sobkowiak-e1ah3od diff --git a/_podcast/data-science-team-structure-and-org-design.md b/_podcast/data-science-team-structure-and-org-design.md index c0413df0..ff85cf8e 100644 --- a/_podcast/data-science-team-structure-and-org-design.md +++ b/_podcast/data-science-team-structure-and-org-design.md @@ -5,7 +5,7 @@ season: 9 episode: 7 guests: - lisacohen -image: images/podcast/s09e07-designing-data-science-organization.jpg +image: images/podcast/data-science-team-structure-and-org-design.jpg ids: anchor: Designing-a-Data-Science-Organization---Lisa-Cohen-e1kcm5e youtube: F_rJ4fg5ZEA diff --git a/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md index c12cc513..9615cedf 100644 --- a/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md +++ b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md @@ -5,7 +5,7 @@ season: 12 episode: 5 guests: - paulineclavelloux -image: images/podcast/s12e05-indie-hacking.jpg +image: images/podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg ids: anchor: Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb youtube: KsV_SVXlTo8 diff --git a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md index 9fb06e8e..5388e59f 100644 --- a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md +++ b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md @@ -5,7 +5,7 @@ season: 14 episode: 3 guests: - boyanangelov -image: images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg +image: images/podcast/data-strategy-and-dataops-for-ai-powered-products.jpg ids: anchor: datatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete youtube: jGbfeYdlCiQ diff --git a/_podcast/data-team-roles.md b/_podcast/data-team-roles.md index 04515d84..1562a5c5 100644 --- a/_podcast/data-team-roles.md +++ b/_podcast/data-team-roles.md @@ -5,7 +5,7 @@ season: 1 episode: 1 guests: - alexeygrigorev -image: images/podcast/s01e01-roles.jpg +image: images/podcast/data-team-roles.jpg ids: youtube: UukjwSIAnpw anchor: Roles-in-a-data-team---Alexey-Grigorev-emqcft diff --git a/_podcast/data-translator-role-and-data-strategy.md b/_podcast/data-translator-role-and-data-strategy.md index 0c7ebca3..5639b204 100644 --- a/_podcast/data-translator-role-and-data-strategy.md +++ b/_podcast/data-translator-role-and-data-strategy.md @@ -5,7 +5,7 @@ season: 3 episode: 4 guests: - liorbarak -image: images/podcast/s03e04-effective-communication-with-business.jpg +image: images/podcast/data-translator-role-and-data-strategy.jpg ids: youtube: gqroEsTyLD0 anchor: Effective-Communication-with-Business-for-Data-Professionals---Lior-Barak-e1002rm diff --git a/_podcast/dataops-and-gitops-best-practices-for-data-teams.md b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md index 44e8214d..371de265 100644 --- a/_podcast/dataops-and-gitops-best-practices-for-data-teams.md +++ b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md @@ -5,7 +5,7 @@ season: 11 episode: 3 guests: - tomaszhinc -image: images/podcast/s11e03-from-data-science-to-dataops.jpg +image: images/podcast/dataops-and-gitops-best-practices-for-data-teams.jpg ids: anchor: From-Data-Science-to-DataOps---Tomasz-Hinc-e1p7sjb youtube: lem7knxqNzg diff --git a/_podcast/dataops-automation-and-reliable-data-pipelines.md b/_podcast/dataops-automation-and-reliable-data-pipelines.md index 8513d5b7..f2bd2694 100644 --- a/_podcast/dataops-automation-and-reliable-data-pipelines.md +++ b/_podcast/dataops-automation-and-reliable-data-pipelines.md @@ -5,7 +5,7 @@ season: 8 episode: 5 guests: - christopherbergh -image: images/podcast/s08e05-storytime-for-dataops.jpg +image: images/podcast/dataops-automation-and-reliable-data-pipelines.jpg ids: anchor: Storytime-for-DataOps---Christopher-Bergh-e1hgl0m youtube: 0Fx5PCoLkf4 diff --git a/_podcast/dataops-for-data-engineering.md b/_podcast/dataops-for-data-engineering.md index b29e1ef8..1d88d826 100644 --- a/_podcast/dataops-for-data-engineering.md +++ b/_podcast/dataops-for-data-engineering.md @@ -6,7 +6,7 @@ season: 18 episode: 9 guests: - christopherbergh -image: images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg +image: images/podcast/dataops-for-data-engineering.jpg ids: anchor: datatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f youtube: HzGpIxV8HtA diff --git a/_podcast/dataops-principles-and-scalable-data-platforms.md b/_podcast/dataops-principles-and-scalable-data-platforms.md index 58923da2..1f414e51 100644 --- a/_podcast/dataops-principles-and-scalable-data-platforms.md +++ b/_podcast/dataops-principles-and-scalable-data-platforms.md @@ -5,7 +5,7 @@ season: 2 episode: 11 guests: - larsalbertsson -image: images/podcast/s02e11-dataops.jpg +image: images/podcast/dataops-principles-and-scalable-data-platforms.jpg ids: youtube: vyF3yGsF6UY anchor: DataOps-101---Lars-Albertsson-ethsp1 diff --git a/_podcast/datatalksclub-building-scaling-data-community.md b/_podcast/datatalksclub-building-scaling-data-community.md index 074bf74f..d469ab3a 100644 --- a/_podcast/datatalksclub-building-scaling-data-community.md +++ b/_podcast/datatalksclub-building-scaling-data-community.md @@ -6,7 +6,7 @@ episode: 1 guests: - eugeneyan - alexeygrigorev -image: images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg +image: images/podcast/datatalksclub-building-scaling-data-community.jpg ids: youtube: IxTyq96juVE anchor: DataTalks-Club-Behind-the-Scenes---Eugene-Yan--Alexey-Grigorev-e1d4567 diff --git a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md index d5d2e79b..6dc26817 100644 --- a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md +++ b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md @@ -7,7 +7,7 @@ episode: 1 guests: - alexeygrigorev - johannabayer -image: images/podcast/s16e01-datatalks-club-anniversary-interview.jpg +image: images/podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg ids: anchor: datatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo youtube: nCqwZT9zA0M diff --git a/_podcast/datatalksclub-scaling-and-free-courses.md b/_podcast/datatalksclub-scaling-and-free-courses.md index a0eeea96..e56caeed 100644 --- a/_podcast/datatalksclub-scaling-and-free-courses.md +++ b/_podcast/datatalksclub-scaling-and-free-courses.md @@ -6,7 +6,7 @@ season: 19 episode: 3 guests: - alexeygrigorev -image: images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg +image: images/podcast/datatalksclub-scaling-and-free-courses.jpg ids: anchor: datatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 youtube: GHbeXIKnkLQ diff --git a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md index 86aaf2ee..155e4dfa 100644 --- a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md +++ b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md @@ -5,7 +5,7 @@ season: 15 episode: 3 guests: - meryemarik -image: images/podcast/s15e03-llms-for-everyone.jpg +image: images/podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg ids: anchor: datatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf youtube: 6dn6uZFkk04 diff --git a/_podcast/developer-personal-brand-learn-in-public.md b/_podcast/developer-personal-brand-learn-in-public.md index 50a723e1..3666afec 100644 --- a/_podcast/developer-personal-brand-learn-in-public.md +++ b/_podcast/developer-personal-brand-learn-in-public.md @@ -5,7 +5,7 @@ season: 3 episode: 7 guests: - swyx -image: images/podcast/s03e07-market-yourself.jpg +image: images/podcast/developer-personal-brand-learn-in-public.jpg ids: youtube: tkBCPqWKCL8 anchor: How-to-Market-Yourself-without-Being-a-Celebrity---Shawn-Swyx-Wang-e11ai8t diff --git a/_podcast/devrel-data-science-open-source-tools.md b/_podcast/devrel-data-science-open-source-tools.md index dfecf9d3..1f4fd893 100644 --- a/_podcast/devrel-data-science-open-source-tools.md +++ b/_podcast/devrel-data-science-open-source-tools.md @@ -5,7 +5,7 @@ season: 2 episode: 2 guests: - elleobrien -image: images/podcast/s02e02-developer-advocacy.jpg +image: images/podcast/devrel-data-science-open-source-tools.jpg ids: youtube: jv5W4jXk4P4 anchor: Developer-Advocacy-for-Data-Science---Elle-OBrien-epcbak diff --git a/_podcast/devrel-open-source-machine-learning.md b/_podcast/devrel-open-source-machine-learning.md index 7455eb0e..baa82839 100644 --- a/_podcast/devrel-open-source-machine-learning.md +++ b/_podcast/devrel-open-source-machine-learning.md @@ -5,7 +5,7 @@ season: 14 episode: 6 guests: - hugobowneanderson -image: images/podcast/s14e06-data-developer-relations.jpg +image: images/podcast/devrel-open-source-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Data-Developer-Relations---Hugo-Bowne-Anderson-e25q88q youtube: z7BvslwVRbQ diff --git a/_podcast/fairness-in-ai-ml-engineering.md b/_podcast/fairness-in-ai-ml-engineering.md index ce47c6de..8dacb4bf 100644 --- a/_podcast/fairness-in-ai-ml-engineering.md +++ b/_podcast/fairness-in-ai-ml-engineering.md @@ -6,7 +6,7 @@ season: 19 episode: 9 guests: - tamaraatanasoska -image: images/podcast/s19e09-linguistics-and-fairness.jpg +image: images/podcast/fairness-in-ai-ml-engineering.jpg ids: anchor: datatalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 youtube: sXU9vMDBjmk diff --git a/_podcast/feature-engineering-model-monitoring-and-data-governance.md b/_podcast/feature-engineering-model-monitoring-and-data-governance.md index 4883e648..c19062d9 100644 --- a/_podcast/feature-engineering-model-monitoring-and-data-governance.md +++ b/_podcast/feature-engineering-model-monitoring-and-data-governance.md @@ -5,7 +5,7 @@ season: 5 episode: 9 guests: - thomives -image: images/podcast/s05e09-business-acumen.jpg +image: images/podcast/feature-engineering-model-monitoring-and-data-governance.jpg ids: youtube: pImYf9ML95Q anchor: Building-Business-Acumen-for-Data-Professionals---Thom-Ives-e19gq91 diff --git a/_podcast/finops-for-data-engineers.md b/_podcast/finops-for-data-engineers.md index 3cb343a4..d2b46ea5 100644 --- a/_podcast/finops-for-data-engineers.md +++ b/_podcast/finops-for-data-engineers.md @@ -5,7 +5,7 @@ season: 20 episode: 6 guests: - eddyzulkifly -image: images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg +image: images/podcast/finops-for-data-engineers.jpg ids: anchor: datatalksclub/episodes/From-Supply-Chain-Management-to-Digital-Warehousing-and-FinOps---Eddy-Zulkifly-e313t7b youtube: 7ePp6wuxM5s diff --git a/_podcast/freelance-data-engineering-pricing-and-clients.md b/_podcast/freelance-data-engineering-pricing-and-clients.md index 890c945e..d3f0180c 100644 --- a/_podcast/freelance-data-engineering-pricing-and-clients.md +++ b/_podcast/freelance-data-engineering-pricing-and-clients.md @@ -5,7 +5,7 @@ season: 9 episode: 4 guests: - adrianbrudaru -image: images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg +image: images/podcast/freelance-data-engineering-pricing-and-clients.jpg ids: anchor: Freelancing-and-Consulting-with-Data-Engineering---Adrian-Brudaru-e1jtkkg youtube: 9DTTrN-khCk diff --git a/_podcast/freelancing-in-machine-learning.md b/_podcast/freelancing-in-machine-learning.md index f5624680..56659d22 100644 --- a/_podcast/freelancing-in-machine-learning.md +++ b/_podcast/freelancing-in-machine-learning.md @@ -5,7 +5,7 @@ season: 4 episode: 8 guests: - mikiobraun -image: images/podcast/s04e08-freelancing.jpg +image: images/podcast/freelancing-in-machine-learning.jpg ids: youtube: HfF791e0HR8 anchor: Freelancing-in-Machine-Learning---Mikio-Braun-e166n7r diff --git a/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md index 9e887262..71f2d489 100644 --- a/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md +++ b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md @@ -5,7 +5,7 @@ season: 12 episode: 9 guests: - tatianagabruseva -image: images/podcast/s12e09-staff-ai-engineer.jpg +image: images/podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg ids: anchor: Staff-AI-Engineer---Tatiana-Gabruseva-e1v3on7 youtube: _xr1_xb736E diff --git a/_podcast/from-academic-research-to-data-engineering-freelancing.md b/_podcast/from-academic-research-to-data-engineering-freelancing.md index d1227b3e..efed3052 100644 --- a/_podcast/from-academic-research-to-data-engineering-freelancing.md +++ b/_podcast/from-academic-research-to-data-engineering-freelancing.md @@ -6,7 +6,7 @@ season: 21 episode: 1 guests: - orellgarten -image: images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg +image: images/podcast/from-academic-research-to-data-engineering-freelancing.jpg ids: anchor: datatalksclub/episodes/From-Simulations-to-Freelance-Data-Engineering-Orells-Journey-Out-of-Academia-and-Into-Consulting---Orell-Garten-e369a6b youtube: pkcpH5N-GP8 diff --git a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md index bb55f1d9..c209204b 100644 --- a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md +++ b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md @@ -6,7 +6,7 @@ season: 19 episode: 7 guests: - isabellabicalho -image: images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg +image: images/podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg ids: anchor: datatalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura youtube: GifY8Zn-pnU diff --git a/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md index dab4bb1a..abfc5d52 100644 --- a/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md +++ b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md @@ -6,7 +6,7 @@ season: 22 episode: 2 guests: - aishwaryajadhav -image: images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg +image: images/podcast/from-computer-vision-research-to-autonomous-driving-ai.jpg ids: anchor: datatalksclub/episodes/Lessons-from-Applied-AI-Tesla--Waymo--and-Beyond---Aishwarya-Jadhav-e39befu youtube: vK_SxyqIfwk diff --git a/_podcast/from-data-freelancer-to-startup-open-source-products.md b/_podcast/from-data-freelancer-to-startup-open-source-products.md index 3e8dd879..435d01fc 100644 --- a/_podcast/from-data-freelancer-to-startup-open-source-products.md +++ b/_podcast/from-data-freelancer-to-startup-open-source-products.md @@ -5,7 +5,7 @@ season: 17 episode: 1 guests: - adrianbrudaru -image: images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg +image: images/podcast/from-data-freelancer-to-startup-open-source-products.jpg ids: anchor: datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k youtube: vOpEQiCsaLw diff --git a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md index c1203b7d..066519b7 100644 --- a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md +++ b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md @@ -6,7 +6,7 @@ season: 19 episode: 8 guests: - agitajaunzeme -image: images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg +image: images/podcast/from-devops-to-data-engineering-automation-open-source-volunteering.jpg ids: anchor: datatalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv youtube: QKWu5-6_6TE diff --git a/_podcast/from-game-ai-to-modern-ai-agents.md b/_podcast/from-game-ai-to-modern-ai-agents.md index adf805c5..a303ba0e 100644 --- a/_podcast/from-game-ai-to-modern-ai-agents.md +++ b/_podcast/from-game-ai-to-modern-ai-agents.md @@ -6,7 +6,7 @@ season: 21 episode: 7 guests: - micheallanham -image: images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg +image: images/podcast/from-game-ai-to-modern-ai-agents.jpg ids: anchor: datatalksclub/episodes/Lessons-from-Two-Decades-of-AI---Micheal-Lanham-e38oarc youtube: DSxqUlumM3A diff --git a/_podcast/from-iot-data-engineering-to-leading-data-architect.md b/_podcast/from-iot-data-engineering-to-leading-data-architect.md index b9cd26f1..7cb8dce2 100644 --- a/_podcast/from-iot-data-engineering-to-leading-data-architect.md +++ b/_podcast/from-iot-data-engineering-to-leading-data-architect.md @@ -5,7 +5,7 @@ season: 15 episode: 8 guests: - loicmagnien -image: images/podcast/s15e08-from-data-manager-to-data-architect.jpg +image: images/podcast/from-iot-data-engineering-to-leading-data-architect.jpg ids: anchor: datatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 youtube: qWG--iYO2uc diff --git a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md index aebf3f69..5002806f 100644 --- a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md +++ b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md @@ -6,7 +6,7 @@ season: 19 episode: 5 guests: - anastasiakaravdina -image: images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg +image: images/podcast/from-large-hadron-collider-to-data-science-research-software-engineering.jpg ids: anchor: datatalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth youtube: kV0ZDy2UtJA diff --git a/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md index 5659bd38..fbf2e5ef 100644 --- a/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md +++ b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md @@ -5,7 +5,7 @@ season: 11 episode: 7 guests: - nikolamaksimovic -image: images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg +image: images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg ids: anchor: From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s youtube: GawJ7mG5ElQ diff --git a/_podcast/from-math-graduate-to-data-analytics.md b/_podcast/from-math-graduate-to-data-analytics.md index 8c6c08fa..0a3f1eee 100644 --- a/_podcast/from-math-graduate-to-data-analytics.md +++ b/_podcast/from-math-graduate-to-data-analytics.md @@ -5,7 +5,7 @@ season: 7 episode: 9 guests: - juanpablo -image: images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg +image: images/podcast/from-math-graduate-to-data-analytics.jpg ids: anchor: From-Math-Teacher-to-Analytics-Engineer---Juan-Pablo-e1fplc1 youtube: qh6-HDhw2xY diff --git a/_podcast/from-physics-to-computer-vision-career-transition.md b/_podcast/from-physics-to-computer-vision-career-transition.md index fbf41f2f..b6cb54ba 100644 --- a/_podcast/from-physics-to-computer-vision-career-transition.md +++ b/_podcast/from-physics-to-computer-vision-career-transition.md @@ -5,7 +5,7 @@ season: 3 episode: 6 guests: - tatianagabruseva -image: images/podcast/s03e06-from-physics-to-machine-learning.jpg +image: images/podcast/from-physics-to-computer-vision-career-transition.jpg ids: youtube: wJPi6Ip9PX0 anchor: From-Physics-to-Machine-Learning---Tatiana-Gabruseva-e10r4pl diff --git a/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md index c1e009d8..c54e31fb 100644 --- a/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md +++ b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md @@ -6,7 +6,7 @@ season: 21 episode: 5 guests: - danielegbo -image: images/podcast/s21e05-from-astronomy-to-applied-ml.jpg +image: images/podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.jpg ids: anchor: datatalksclub/episodes/From-Astronomy-to-Applied-ML---Daniel-Egbo-e38ha20 youtube: b92gwrsVQtg diff --git a/_podcast/from-semiconductor-data-to-applied-machine-learning.md b/_podcast/from-semiconductor-data-to-applied-machine-learning.md index 1789b6d6..0cd32820 100644 --- a/_podcast/from-semiconductor-data-to-applied-machine-learning.md +++ b/_podcast/from-semiconductor-data-to-applied-machine-learning.md @@ -6,7 +6,7 @@ season: 21 episode: 8 guests: - dashelruizperez -image: images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg +image: images/podcast/from-semiconductor-data-to-applied-machine-learning.jpg ids: anchor: datatalksclub/episodes/From-Semiconductors-to-Machine-Learning-A-Career-in-Data-and-Teaching-e395t53 youtube: B2tzuUg5uZs diff --git a/_podcast/from-software-engineer-to-machine-learning.md b/_podcast/from-software-engineer-to-machine-learning.md index 8f79d185..1b4c0b93 100644 --- a/_podcast/from-software-engineer-to-machine-learning.md +++ b/_podcast/from-software-engineer-to-machine-learning.md @@ -5,7 +5,7 @@ season: 4 episode: 1 guests: - svpino -image: images/podcast/s04e01-from-swe-to-ml.jpg +image: images/podcast/from-software-engineer-to-machine-learning.jpg ids: youtube: xVYOdRrN7hw anchor: From-Software-Engineering-to-Machine-Learning---Santiago-Valdarrama-e139s63 diff --git a/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md index 84a900f7..99466af1 100644 --- a/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md +++ b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md @@ -5,7 +5,7 @@ season: 7 episode: 8 guests: - ellenkonig -image: images/podcast/s07e08-from-data-science-to-data-engineering.jpg +image: images/podcast/from-software-engineering-data-science-to-data-engineering-leadership.jpg ids: anchor: From-Data-Science-to-Data-Engineering---Ellen-Knig-e1fgfbm youtube: 3TTu-hYzxeg diff --git a/_podcast/from-software-engineering-to-leading-data-science-teams.md b/_podcast/from-software-engineering-to-leading-data-science-teams.md index 2dbafc14..e75550ad 100644 --- a/_podcast/from-software-engineering-to-leading-data-science-teams.md +++ b/_podcast/from-software-engineering-to-leading-data-science-teams.md @@ -5,7 +5,7 @@ season: 12 episode: 1 guests: - sadatanwar -image: images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg +image: images/podcast/from-software-engineering-to-leading-data-science-teams.jpg ids: anchor: From-Software-Engineer-to-Data-Science-Manager---Sadat-Anwar-e1rqkdf youtube: xyTfqIWeKf8 diff --git a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md index c1bad849..88c94674 100644 --- a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md +++ b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md @@ -6,7 +6,7 @@ season: 16 episode: 6 guests: - jackblandin -image: images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg +image: images/podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg ids: anchor: datatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk youtube: su2M058m3Lw diff --git a/_podcast/from-startup-engineering-to-freelance-data-science.md b/_podcast/from-startup-engineering-to-freelance-data-science.md index 8de130ba..65c5de1b 100644 --- a/_podcast/from-startup-engineering-to-freelance-data-science.md +++ b/_podcast/from-startup-engineering-to-freelance-data-science.md @@ -5,7 +5,7 @@ season: 14 episode: 5 guests: - antonisstellas -image: images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg +image: images/podcast/from-startup-engineering-to-freelance-data-science.jpg ids: anchor: ow/datatalksclub/episodes/Lessons-Learned-from-Freelancing-and-Working-in-a-Start-up---Antonis-Stellas-e25g94r youtube: -Gj7SaI-QW4 diff --git a/_podcast/generative-ai-chatbots-in-production-security.md b/_podcast/generative-ai-chatbots-in-production-security.md index fd125454..8dfd70c2 100644 --- a/_podcast/generative-ai-chatbots-in-production-security.md +++ b/_podcast/generative-ai-chatbots-in-production-security.md @@ -6,7 +6,7 @@ season: 19 episode: 6 guests: - mariasukhareva -image: images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg +image: images/podcast/generative-ai-chatbots-in-production-security.jpg ids: anchor: datatalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 youtube: bT7-HRNCltk diff --git a/_podcast/get-data-analytics-and-data-engineering-job.md b/_podcast/get-data-analytics-and-data-engineering-job.md index e9412593..e0ab0de5 100644 --- a/_podcast/get-data-analytics-and-data-engineering-job.md +++ b/_podcast/get-data-analytics-and-data-engineering-job.md @@ -5,7 +5,7 @@ season: 8 episode: 9 guests: - gloriaquiceno -image: images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg +image: images/podcast/get-data-analytics-and-data-engineering-job.jpg ids: anchor: From-Academia-to-Data-Analytics-and-Engineering---Gloria-Quiceno-e1ikrd8 youtube: 0wANfIvum4U diff --git a/_podcast/get-data-engineering-job-prep-and-interview.md b/_podcast/get-data-engineering-job-prep-and-interview.md index 08dee9ca..6ea2e9be 100644 --- a/_podcast/get-data-engineering-job-prep-and-interview.md +++ b/_podcast/get-data-engineering-job-prep-and-interview.md @@ -5,7 +5,7 @@ season: 9 episode: 3 guests: - jeffkatz -image: images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg +image: images/podcast/get-data-engineering-job-prep-and-interview.jpg ids: anchor: Getting-a-Data-Engineering-Job-Summary-and-QA---Jeff-Katz-e1jljmd youtube: asnt7xlyZXQ diff --git a/_podcast/get-data-scientist-job.md b/_podcast/get-data-scientist-job.md index 83343494..8762cb00 100644 --- a/_podcast/get-data-scientist-job.md +++ b/_podcast/get-data-scientist-job.md @@ -5,7 +5,7 @@ season: 1 episode: 4 guests: - lukewhipps -image: images/podcast/s01e04-standing-out-as-a-data-scientist.jpg +image: images/podcast/get-data-scientist-job.jpg ids: youtube: Sb4CJlonB3c anchor: Standing-out-as-a-Data-Scientist---Luke-Whipps-envr7e diff --git a/_podcast/get-junior-data-job-and-transferable-skills.md b/_podcast/get-junior-data-job-and-transferable-skills.md index 5ed42e14..fd2bdf0e 100644 --- a/_podcast/get-junior-data-job-and-transferable-skills.md +++ b/_podcast/get-junior-data-job-and-transferable-skills.md @@ -5,7 +5,7 @@ season: 7 episode: 4 guests: - lindsaymcquade -image: images/podcast/s07e04-career-coaching.jpg +image: images/podcast/get-junior-data-job-and-transferable-skills.jpg ids: anchor: Career-Coaching---Lindsay-McQuade-e1e8elk youtube: _U8GrYJvmJM diff --git a/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md index be964fcd..0b56a430 100644 --- a/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md +++ b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md @@ -5,7 +5,7 @@ season: 11 episode: 2 guests: - katiebauer -image: images/podcast/s11e02-data-science-career-development.jpg +image: images/podcast/hiring-and-managing-data-science-teams-in-b2b-saas.jpg ids: anchor: Data-Science-Career-Development---Katie-Bauer-e1oq96g youtube: i1NHRroQClQ diff --git a/_podcast/hiring-data-scientists-and-analysts.md b/_podcast/hiring-data-scientists-and-analysts.md index 3fcc352d..653c45a4 100644 --- a/_podcast/hiring-data-scientists-and-analysts.md +++ b/_podcast/hiring-data-scientists-and-analysts.md @@ -5,7 +5,7 @@ season: 7 episode: 2 guests: - alicjanotowska -image: images/podcast/s07e02-recruiting-data-professionals.jpg +image: images/podcast/hiring-data-scientists-and-analysts.jpg ids: youtube: WSMDXsjKYx4 anchor: Recruiting-Data-Professionals---Alicja-Notowska-e1dj2qn diff --git a/_podcast/hiring-for-data-engineering-jobs-in-europe.md b/_podcast/hiring-for-data-engineering-jobs-in-europe.md index 987f9c32..49819543 100644 --- a/_podcast/hiring-for-data-engineering-jobs-in-europe.md +++ b/_podcast/hiring-for-data-engineering-jobs-in-europe.md @@ -5,7 +5,7 @@ season: 8 episode: 6 guests: - nicolasrassam -image: images/podcast/s08e06-recruiting-data-engineers.jpg +image: images/podcast/hiring-for-data-engineering-jobs-in-europe.jpg ids: anchor: Recruiting-Data-Engineers---Nicolas-Rassam-e1hnkl1 youtube: hylxiu4VGTo diff --git a/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md index 2bc21ae4..06c6f476 100644 --- a/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md +++ b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md @@ -5,7 +5,7 @@ season: 9 episode: 9 guests: - olgaivina -image: images/podcast/s09e09-hiring-data-science-talent.jpg +image: images/podcast/hiring-for-data-science-jobs-interview-questions-skills.md.jpg ids: anchor: Hiring-Data-Science-Talent---Olga-Ivina-e1l4aku youtube: Af9t9r2b0z0 diff --git a/_podcast/how-to-break-into-data-science.md b/_podcast/how-to-break-into-data-science.md index 2344ce50..aa63439f 100644 --- a/_podcast/how-to-break-into-data-science.md +++ b/_podcast/how-to-break-into-data-science.md @@ -5,7 +5,7 @@ season: 9 episode: 5 guests: - misraturp -image: images/podcast/s09e05-data-scientists-at-work.jpg +image: images/podcast/how-to-break-into-data-science.jpg ids: anchor: Data-Scientists-at-Work---Msra-Turp-e1k7pbn youtube: oUycqtMoYr8 diff --git a/_podcast/how-to-grow-your-ml-engineering-career.md b/_podcast/how-to-grow-your-ml-engineering-career.md index 064728de..04db88fc 100644 --- a/_podcast/how-to-grow-your-ml-engineering-career.md +++ b/_podcast/how-to-grow-your-ml-engineering-career.md @@ -5,7 +5,7 @@ season: 12 episode: 7 guests: - krzysztofszafanek -image: images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg +image: images/podcast/how-to-grow-your-ml-engineering-career.jpg ids: anchor: Navigating-Career-Changes-in-Machine-Learning---Chris-Szafranek-e1ucvn2 youtube: cUxZBXQgZaU diff --git a/_podcast/how-to-stand-out-in-data-science.md b/_podcast/how-to-stand-out-in-data-science.md index aa851e34..62b0a53d 100644 --- a/_podcast/how-to-stand-out-in-data-science.md +++ b/_podcast/how-to-stand-out-in-data-science.md @@ -5,7 +5,7 @@ season: 8 episode: 2 guests: - marijnmarkus -image: images/podcast/s08e02-hacking-your-data-career.jpg +image: images/podcast/how-to-stand-out-in-data-science.jpg ids: anchor: Hacking-Your-Data-Career---Marijn-Markus-e1gijep youtube: RhSg8ill1So diff --git a/_podcast/how-to-switch-to-ml-tech-without-experience.md b/_podcast/how-to-switch-to-ml-tech-without-experience.md index 811cbe35..52cad159 100644 --- a/_podcast/how-to-switch-to-ml-tech-without-experience.md +++ b/_podcast/how-to-switch-to-ml-tech-without-experience.md @@ -5,7 +5,7 @@ season: 8 episode: 7 guests: - jessicagreene -image: images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg +image: images/podcast/how-to-switch-to-ml-tech-without-experience.jpg ids: anchor: From-Roasting-Coffee-to-Backend-Development---Jessica-Greene-e1i1ten/a-a7s65oj youtube: BKqmNdxsBko diff --git a/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md index 3fbb05ce..22e79f76 100644 --- a/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md +++ b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md @@ -5,7 +5,7 @@ season: 11 episode: 1 guests: - alvaronavaspeire -image: images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg +image: images/podcast/how-to-transition-into-ml-and-data-engineering-from-qa.jpg ids: anchor: From-Testing-Phones-to-Managing-NLP-Projects---Alvaro-Navas-Peire-e1oj7n8 youtube: -xumbiXOlA8 diff --git a/_podcast/hugging-face-contributions-and-nlp-portfolio.md b/_podcast/hugging-face-contributions-and-nlp-portfolio.md index 76a938a8..4f33ff67 100644 --- a/_podcast/hugging-face-contributions-and-nlp-portfolio.md +++ b/_podcast/hugging-face-contributions-and-nlp-portfolio.md @@ -5,7 +5,7 @@ season: 9 episode: 6 guests: - mervenoyan -image: images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg +image: images/podcast/hugging-face-contributions-and-nlp-portfolio.jpg ids: anchor: Developer-Advocacy-Engineer-for-Open-Source---Merve-Noyan-e1kcm3u youtube: SnEYvF-Ztb8 diff --git a/_podcast/human-centered-ai-automatic-speech-recognition.md b/_podcast/human-centered-ai-automatic-speech-recognition.md index 01b9671b..2abf0ff6 100644 --- a/_podcast/human-centered-ai-automatic-speech-recognition.md +++ b/_podcast/human-centered-ai-automatic-speech-recognition.md @@ -5,7 +5,7 @@ season: 19 episode: 2 guests: - katarzynaforemniak -image: images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg +image: images/podcast/human-centered-ai-automatic-speech-recognition.jpg ids: anchor: datatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 youtube: yTZ4cddD7DU diff --git a/_podcast/human-centered-mlops-and-model-monitoring.md b/_podcast/human-centered-mlops-and-model-monitoring.md index 6cf4292a..0bd24562 100644 --- a/_podcast/human-centered-mlops-and-model-monitoring.md +++ b/_podcast/human-centered-mlops-and-model-monitoring.md @@ -5,7 +5,7 @@ season: 4 episode: 6 guests: - linaweichbrodt -image: images/podcast/s04e06-humans-in-the-loop.jpg +image: images/podcast/human-centered-mlops-and-model-monitoring.jpg ids: youtube: o50j_Ndx2Hg anchor: Humans-in-the-Loop---Lina-Weichbrodt-e14npgp diff --git a/_podcast/industrial-data-small-data-production-machine-learning.md b/_podcast/industrial-data-small-data-production-machine-learning.md index cee6ff24..4d754421 100644 --- a/_podcast/industrial-data-small-data-production-machine-learning.md +++ b/_podcast/industrial-data-small-data-production-machine-learning.md @@ -5,7 +5,7 @@ season: 13 episode: 8 guests: - rosonaeldred -image: images/podcast/s13e08-navigating-industrial-data-challenges.jpg +image: images/podcast/industrial-data-small-data-production-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Navigating-Industrial-Data-Challenges---Rosona-Eldred-e225aam youtube: rwuud5wr3J4 diff --git a/_podcast/interpretable-machine-learning.md b/_podcast/interpretable-machine-learning.md index d2b992d7..5b3f3442 100644 --- a/_podcast/interpretable-machine-learning.md +++ b/_podcast/interpretable-machine-learning.md @@ -4,7 +4,7 @@ season: 16 episode: 7 guests: - christophmolnar -image: images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg +image: images/podcast/interpretable-machine-learning.jpg ids: anchor: datatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 youtube: LBuGzyOkx7c diff --git a/_podcast/investing-in-open-source-developer-tools.md b/_podcast/investing-in-open-source-developer-tools.md index eed452d0..662122aa 100644 --- a/_podcast/investing-in-open-source-developer-tools.md +++ b/_podcast/investing-in-open-source-developer-tools.md @@ -5,7 +5,7 @@ season: 15 episode: 2 guests: - belawiertz -image: images/podcast/s15e02-investing-in-open-source-data-tools.jpg +image: images/podcast/investing-in-open-source-developer-tools.jpg ids: anchor: datatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 youtube: 7Bg1JQLnCao diff --git a/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md index 7e33f68f..0af70e1e 100644 --- a/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md +++ b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md @@ -5,7 +5,7 @@ season: 17 episode: 6 guests: - sarahmestiri -image: images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg +image: images/podcast/job-search-strategy-in-tech-projects-skills-cv-networking.jpg ids: anchor: datatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 youtube: PchwbIs0tOg diff --git a/_podcast/kaggle-grandmaster-to-production-ml-and-education.md b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md index 43c47fda..d0654822 100644 --- a/_podcast/kaggle-grandmaster-to-production-ml-and-education.md +++ b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md @@ -6,7 +6,7 @@ season: 20 episode: 2 guests: - alexanderguschin -image: images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg +image: images/podcast/kaggle-grandmaster-to-production-ml-and-education.jpg ids: anchor: datatalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 youtube: NfAJAr7FvyY&t diff --git a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md index 908a1f4c..36d24146 100644 --- a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md +++ b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md @@ -5,7 +5,7 @@ season: 18 episode: 2 guests: - anahitapakiman -image: images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg +image: images/podcast/knowledge-graphs-and-llms-for-automotive-rnd.jpg ids: anchor: datatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 youtube: YncdlUscUOo diff --git a/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md index a6254ca2..294d905f 100644 --- a/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md +++ b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md @@ -5,7 +5,7 @@ season: 5 episode: 8 guests: - caitlinmoorman -image: images/podcast/s05e08-the-last-mile-in-data.jpg +image: images/podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg ids: youtube: HfMpG2zpa2I anchor: Conquering-the-Last-Mile-in-Data---Caitlin-Moorman-e1958c1 diff --git a/_podcast/launch-and-build-retail-startup.md b/_podcast/launch-and-build-retail-startup.md index de5c74e7..41594fb3 100644 --- a/_podcast/launch-and-build-retail-startup.md +++ b/_podcast/launch-and-build-retail-startup.md @@ -5,7 +5,7 @@ season: 4 episode: 7 guests: - carminepaolino -image: images/podcast/s04e07-launching-a-startup.jpg +image: images/podcast/launch-and-build-retail-startup.jpg ids: youtube: s-w8_GDgIlU anchor: Launching-a-Startup-From-Idea-to-First-Hire---Carmine-Paolino-e15sk4i diff --git a/_podcast/lean-mlops-for-startups.md b/_podcast/lean-mlops-for-startups.md index da50702a..a89a8049 100644 --- a/_podcast/lean-mlops-for-startups.md +++ b/_podcast/lean-mlops-for-startups.md @@ -6,7 +6,7 @@ season: 20 episode: 4 guests: - nemanjaradojkovic -image: images/podcast/s20e04-mlops-in-corporations-and-startups.jpg +image: images/podcast/lean-mlops-for-startups.jpg ids: anchor: datatalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 youtube: DX9c__a4jzg diff --git a/_podcast/learning-machine-learning-self-taught-bioinformatics.md b/_podcast/learning-machine-learning-self-taught-bioinformatics.md index 04ed2d7b..30fe5135 100644 --- a/_podcast/learning-machine-learning-self-taught-bioinformatics.md +++ b/_podcast/learning-machine-learning-self-taught-bioinformatics.md @@ -5,7 +5,7 @@ season: 13 episode: 7 guests: - aaishamuhammad -image: images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg +image: images/podcast/learning-machine-learning-self-taught-bioinformatics.jpg ids: anchor: ow/datatalksclub/episodes/Mastering-Self-Learning-in-Machine-Learning---Aaisha-Muhammad-e21ud62 youtube: Kc3Puh3UCRQ diff --git a/_podcast/machine-learning-data-science-interview-prep.md b/_podcast/machine-learning-data-science-interview-prep.md index 10685971..63ae191a 100644 --- a/_podcast/machine-learning-data-science-interview-prep.md +++ b/_podcast/machine-learning-data-science-interview-prep.md @@ -5,7 +5,7 @@ season: 12 episode: 6 guests: - lukewhipps -image: images/podcast/s12e06-preparing-for-data-science-interview.jpg +image: images/podcast/machine-learning-data-science-interview-prep.jpg ids: anchor: Preparing-for-a-Data-Science-Interview---Luke-Whipps-e1tsh5d youtube: NnZjlMowkWA diff --git a/_podcast/machine-learning-decision-optimization.md b/_podcast/machine-learning-decision-optimization.md index da05a916..53af8623 100644 --- a/_podcast/machine-learning-decision-optimization.md +++ b/_podcast/machine-learning-decision-optimization.md @@ -5,7 +5,7 @@ season: 2 episode: 6 guests: - danbecker -image: images/podcast/s02e06-decision-optimization.jpg +image: images/podcast/machine-learning-decision-optimization.jpg ids: youtube: SJuzQ4bcU2c anchor: Translating-ML-Predictions-Into-Better-Real-World-Results-with-Decision-Optimization---Dan-Becker-eqk0b1/a-a4maq87 diff --git a/_podcast/machine-learning-engineering-production-best-practices.md b/_podcast/machine-learning-engineering-production-best-practices.md index 83eb404b..0d8be281 100644 --- a/_podcast/machine-learning-engineering-production-best-practices.md +++ b/_podcast/machine-learning-engineering-production-best-practices.md @@ -5,7 +5,7 @@ season: 4 episode: 5 guests: - benwilson -image: images/podcast/s04e05-running-from-complexity.jpg +image: images/podcast/machine-learning-engineering-production-best-practices.jpg ids: youtube: sMy8NYZnsy8 anchor: Running-from-Complexity---Ben-Wilson-e14np51 diff --git a/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md index b0f697e0..e9cdc16d 100644 --- a/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md +++ b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md @@ -5,7 +5,7 @@ season: 9 episode: 2 guests: - daynancrull -image: images/podcast/s09e02-using-data-for-asteroid-mining.jpg +image: images/podcast/machine-learning-for-asteroid-mining-and-water-detection.jpg ids: anchor: Using-Data-for-Asteroid-Mining---Daynan-Crull-e1jbhr0 youtube: YxijEUoDCfw diff --git a/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md index 9fafc3b7..e9567b5b 100644 --- a/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md +++ b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md @@ -5,7 +5,7 @@ season: 9 episode: 1 guests: - juanorduz -image: images/podcast/s09e01-machine-learning-in-marketing.jpg +image: images/podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg ids: anchor: Machine-Learning-in-Marketing---Juan-Orduz-e1j1muj youtube: jsAxUd_bZpw diff --git a/_podcast/machine-learning-system-design-interview.md b/_podcast/machine-learning-system-design-interview.md index 65f4f018..6fd1d4fd 100644 --- a/_podcast/machine-learning-system-design-interview.md +++ b/_podcast/machine-learning-system-design-interview.md @@ -5,7 +5,7 @@ season: 7 episode: 5 guests: - valeriybabushkin -image: images/podcast/s07e05-machine-learning-system-design-interview.jpg +image: images/podcast/machine-learning-system-design-interview.jpg ids: anchor: Machine-Learning-System-Design-Interview---Valerii-Babushkin-e1ej65e youtube: 0RsmRjar66E diff --git a/_podcast/make-money-with-machine-learning-roles-skills.md b/_podcast/make-money-with-machine-learning-roles-skills.md index fda9189f..8bae891f 100644 --- a/_podcast/make-money-with-machine-learning-roles-skills.md +++ b/_podcast/make-money-with-machine-learning-roles-skills.md @@ -5,7 +5,7 @@ season: 2 episode: 9 guests: - vinvashishta -image: images/podcast/s02e09-roles-skills-monetizing-ml.jpg +image: images/podcast/make-money-with-machine-learning-roles-skills.jpg ids: youtube: xCjzA_8S4kI anchor: New-Roles-and-Key-Skills-to-Monetize-Machine-Learning---Vin-Vashishta-escer6 diff --git a/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md index 4c5d2428..6b4e7d50 100644 --- a/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md +++ b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md @@ -5,7 +5,7 @@ season: 1 episode: 5 guests: - rahuljain -image: images/podcast/s01e05-mentoring.jpg +image: images/podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg ids: youtube: LQvwTNQbPg4 anchor: Mentoring---Rahul-Jain-eo7cmu diff --git a/_podcast/mindful-data-strategy-for-business-impact.md b/_podcast/mindful-data-strategy-for-business-impact.md index 582717ec..6e9e72bb 100644 --- a/_podcast/mindful-data-strategy-for-business-impact.md +++ b/_podcast/mindful-data-strategy-for-business-impact.md @@ -6,7 +6,7 @@ season: 21 episode: 2 guests: - liorbarak -image: images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg +image: images/podcast/mindful-data-strategy-for-business-impact.jpg ids: anchor: datatalksclub/episodes/How-to-Rebuild-Data-Trust--Mindful-Data-Strategy-and-Maintenance-vs-Innovation---Lior-Barak-e36obcs youtube: B76J4QkZPWs diff --git a/_podcast/ml-engineering-kpis-and-metrics-strategy.md b/_podcast/ml-engineering-kpis-and-metrics-strategy.md index bbe20590..348f4e22 100644 --- a/_podcast/ml-engineering-kpis-and-metrics-strategy.md +++ b/_podcast/ml-engineering-kpis-and-metrics-strategy.md @@ -5,7 +5,7 @@ season: 5 episode: 3 guests: - adamsroka -image: images/podcast/s05e03-metrics-and-kpis.jpg +image: images/podcast/ml-engineering-kpis-and-metrics-strategy.jpg ids: youtube: H4P2RfKvXGs anchor: Defining-Success-Metrics-and-KPIs---Adam-Sroka-e17gfp0 diff --git a/_podcast/ml-product-manager-and-mlops-platform-strategy.md b/_podcast/ml-product-manager-and-mlops-platform-strategy.md index a570f449..9a4e730b 100644 --- a/_podcast/ml-product-manager-and-mlops-platform-strategy.md +++ b/_podcast/ml-product-manager-and-mlops-platform-strategy.md @@ -5,7 +5,7 @@ season: 6 episode: 7 guests: - geojolly -image: images/podcast/s06e07-product-management-for-machine-learning.jpg +image: images/podcast/ml-product-manager-and-mlops-platform-strategy.jpg ids: youtube: PjqjPvHliqg anchor: Product-Management-for-Machine-Learning---Geo-Jolly-e1brpvm diff --git a/_podcast/ml-system-design.md b/_podcast/ml-system-design.md index f2b2c265..5b3ea9dc 100644 --- a/_podcast/ml-system-design.md +++ b/_podcast/ml-system-design.md @@ -5,7 +5,7 @@ season: 15 episode: 1 guests: - valeriybabushkin -image: images/podcast/s15e01-why-machine-learning-design-broken.jpg +image: images/podcast/ml-system-design.jpg ids: anchor: datatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o youtube: 6YBMU6475KQ diff --git a/_podcast/mlops-and-ml-engineering-in-finance.md b/_podcast/mlops-and-ml-engineering-in-finance.md index fe677a9e..d1275620 100644 --- a/_podcast/mlops-and-ml-engineering-in-finance.md +++ b/_podcast/mlops-and-ml-engineering-in-finance.md @@ -5,7 +5,7 @@ season: 17 episode: 5 guests: - nemanjaradojkovic -image: images/podcast/s17e05-machine-learning-engineering-in-finance.jpg +image: images/podcast/mlops-and-ml-engineering-in-finance.jpg ids: anchor: datatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 youtube: Nl4aibeFwiI diff --git a/_podcast/mlops-at-scale-reproducibility-adoption.md b/_podcast/mlops-at-scale-reproducibility-adoption.md index ce5076dd..c0e570b6 100644 --- a/_podcast/mlops-at-scale-reproducibility-adoption.md +++ b/_podcast/mlops-at-scale-reproducibility-adoption.md @@ -5,7 +5,7 @@ season: 19 episode: 4 guests: - raphaelhoogvliets -image: images/podcast/s19e04-mlops-as-team.jpg +image: images/podcast/mlops-at-scale-reproducibility-adoption.jpg ids: anchor: datatalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr youtube: rMq63r3zi4c diff --git a/_podcast/mlops-community-building-and-meetups.md b/_podcast/mlops-community-building-and-meetups.md index d79eb1ac..59022b9f 100644 --- a/_podcast/mlops-community-building-and-meetups.md +++ b/_podcast/mlops-community-building-and-meetups.md @@ -5,7 +5,7 @@ season: 2 episode: 12 guests: - demetriosbrinkmann -image: images/podcast/s02e12-communities.jpg +image: images/podcast/mlops-community-building-and-meetups.jpg ids: youtube: ByCE1vSrIr8 anchor: Building-Online-Tech-Communities---Demetrios-Brinkmann-eu35fo diff --git a/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md index ed451cdf..cd53e7b5 100644 --- a/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md +++ b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md @@ -5,7 +5,7 @@ season: 2 episode: 5 guests: - willempienaar -image: images/podcast/s02e05-feature-stores.jpg +image: images/podcast/mlops-feature-stores-feature-stores-feast-tecton.jpg ids: youtube: FQYTb4uWljQ anchor: Feature-Stores-Cutting-through-the-Hype---Willem-Pienaar-ept6m8/a-a4hlg3r diff --git a/_podcast/mlops-kubeflow-model-monitoring.md b/_podcast/mlops-kubeflow-model-monitoring.md index a4701d73..579a708c 100644 --- a/_podcast/mlops-kubeflow-model-monitoring.md +++ b/_podcast/mlops-kubeflow-model-monitoring.md @@ -5,7 +5,7 @@ season: 2 episode: 4 guests: - theofilospapapanagiotou -image: images/podcast/s02e04-mlops.jpg +image: images/podcast/mlops-kubeflow-model-monitoring.jpg ids: youtube: -i0fVp0ntYA anchor: The-Rise-of-MLOps---Theofilos-Papapanagiotou-ept67o diff --git a/_podcast/mlops-model-monitoring-data-observability.md b/_podcast/mlops-model-monitoring-data-observability.md index 5680e3cb..b1e2adc0 100644 --- a/_podcast/mlops-model-monitoring-data-observability.md +++ b/_podcast/mlops-model-monitoring-data-observability.md @@ -5,7 +5,7 @@ season: 10 episode: 3 guests: - dannyleybzon -image: images/podcast/s10e03-mlops-architect.jpg +image: images/podcast/mlops-model-monitoring-data-observability.jpg ids: anchor: MLOps-Architect---Danny-Leybzon-e1m81iu youtube: p1gVaS4Zx5M diff --git a/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md index bb73f0a4..28e79ff4 100644 --- a/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md +++ b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md @@ -5,7 +5,7 @@ season: 14 episode: 7 guests: - santonatuli -image: images/podcast/s14e07-from-mlops-to-dataops.jpg +image: images/podcast/modern-data-pipelines-orchestration-ingestion-modeling.jpg ids: anchor: ow/datatalksclub/episodes/From-MLOps-to-DataOps---Santona-Tuli-e25vb0q youtube: kSTfhQ_SZgc diff --git a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md index 12ff0762..d84f692c 100644 --- a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md +++ b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md @@ -6,7 +6,7 @@ season: 17 episode: 2 guests: - atitaarora -image: images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg +image: images/podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.jpg ids: anchor: datatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps youtube: _fbe1QyJ1PY diff --git a/_podcast/nlp-dataset-creation-annotation-tools-workflows.md b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md index d7d64782..06385664 100644 --- a/_podcast/nlp-dataset-creation-annotation-tools-workflows.md +++ b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md @@ -5,7 +5,7 @@ season: 10 episode: 7 guests: - christiannswart -image: images/podcast/s10e07-dataset-creation-and-curation.jpg +image: images/podcast/nlp-dataset-creation-annotation-tools-workflows.jpg ids: anchor: Dataset-Creation-and-Curation---Christiaan-Swart-e1nd1f6 youtube: QggWydGrWoo diff --git a/_podcast/nlp-team-hiring-and-production-mlops.md b/_podcast/nlp-team-hiring-and-production-mlops.md index 4742799a..bab136f1 100644 --- a/_podcast/nlp-team-hiring-and-production-mlops.md +++ b/_podcast/nlp-team-hiring-and-production-mlops.md @@ -5,7 +5,7 @@ season: 6 episode: 8 guests: - ivanbilan -image: images/podcast/s06e08-nlp-teams.jpg +image: images/podcast/nlp-team-hiring-and-production-mlops.jpg ids: youtube: RJEf6mzxh1w anchor: Leading-NLP-Teams---Ivan-Bilan-e1c4929 diff --git a/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md index bee42e58..f7fa4f8a 100644 --- a/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md +++ b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md @@ -5,7 +5,7 @@ season: 21 episode: 3 guests: - pastorsoto -image: images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg +image: images/podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg ids: anchor: datatalksclub/episodes/From-Medicine-to-Machine-Learning-How-Public-Learning-Turned-into-a-Career---Pastor-Soto-e376e66 youtube: 5km62e4nDaw diff --git a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md index 55c797f4..7fb7cb43 100644 --- a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md +++ b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md @@ -5,7 +5,7 @@ season: 17 episode: 7 guests: - saraelateif -image: images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg +image: images/podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg ids: anchor: datatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan youtube: aHdaIwOEI8Q diff --git a/_podcast/open-source-ml-contributions.md b/_podcast/open-source-ml-contributions.md index e80de2c9..a08e78ae 100644 --- a/_podcast/open-source-ml-contributions.md +++ b/_podcast/open-source-ml-contributions.md @@ -5,7 +5,7 @@ season: 2 episode: 3 guests: - vincentwarmerdam -image: images/podcast/s02e03-open-source.jpg +image: images/podcast/open-source-ml-contributions.jpg ids: youtube: IxV9EH-tphQ anchor: Getting-Started-with-Open-Source---Vincent-Warmerdam-epk60j diff --git a/_podcast/open-source-ml-tools-strategy-and-business-models.md b/_podcast/open-source-ml-tools-strategy-and-business-models.md index 7bdd5cf9..a6ebb830 100644 --- a/_podcast/open-source-ml-tools-strategy-and-business-models.md +++ b/_podcast/open-source-ml-tools-strategy-and-business-models.md @@ -5,7 +5,7 @@ season: 18 episode: 4 guests: - vincentwarmerdam -image: images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg +image: images/podcast/open-source-ml-tools-strategy-and-business-models.jpg ids: anchor: datatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs youtube: UPlIETGwTg8 diff --git a/_podcast/open-source-turned-into-career-and-startup-creation.md b/_podcast/open-source-turned-into-career-and-startup-creation.md index 87d03dc1..93a4c2ad 100644 --- a/_podcast/open-source-turned-into-career-and-startup-creation.md +++ b/_podcast/open-source-turned-into-career-and-startup-creation.md @@ -5,7 +5,7 @@ season: 9 episode: 8 guests: - willmcgugan -image: images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg +image: images/podcast/open-source-turned-into-career-and-startup-creation.jpg ids: anchor: From-Open-Source-Maintainer-to-Founder---Will-McGugan-e1kqtu5 youtube: bwfR9dyxf1M diff --git a/_podcast/personal-brand-for-data-professionals.md b/_podcast/personal-brand-for-data-professionals.md index 9ae16329..29b2b153 100644 --- a/_podcast/personal-brand-for-data-professionals.md +++ b/_podcast/personal-brand-for-data-professionals.md @@ -5,7 +5,7 @@ season: 2 episode: 8 guests: - admondleekinlim -image: images/podcast/s02e08-personal-branding.jpg +image: images/podcast/personal-brand-for-data-professionals.jpg ids: youtube: tQRQnz_aHYQ anchor: Personal-Branding---Admond-Lee-Kin-Lim-ern77e diff --git a/_podcast/postdoc-to-data-science-lead-career-transition.md b/_podcast/postdoc-to-data-science-lead-career-transition.md index d8cd95bb..8c325482 100644 --- a/_podcast/postdoc-to-data-science-lead-career-transition.md +++ b/_podcast/postdoc-to-data-science-lead-career-transition.md @@ -5,7 +5,7 @@ season: 6 episode: 6 guests: - cjjenkins -image: images/podcast/s06e06-from-academia-to-industry.jpg +image: images/podcast/postdoc-to-data-science-lead-career-transition.jpg ids: youtube: m4F651BpUFk anchor: Moving-from-Academia-to-Industry---CJ-Jenkins-e1bh84o diff --git a/_podcast/practical-devrel-demofirst-education-and-open-source.md b/_podcast/practical-devrel-demofirst-education-and-open-source.md index 8f510039..bf65b9d8 100644 --- a/_podcast/practical-devrel-demofirst-education-and-open-source.md +++ b/_podcast/practical-devrel-demofirst-education-and-open-source.md @@ -6,7 +6,7 @@ season: 20 episode: 8 guests: - willrussell -image: images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg +image: images/podcast/practical-devrel-demofirst-education-and-open-source.jpg ids: anchor: datatalksclub/episodes/From-Hackathons-to-Developer-Advocacy---Will-Russel-e339a5f youtube: vXbMUfHE1OE diff --git a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md index 8db1d103..097402a1 100644 --- a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md +++ b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md @@ -5,7 +5,7 @@ season: 16 episode: 5 guests: - verenaweber -image: images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg +image: images/podcast/practical-generative-ai-consulting-from-expertise-to-impact.jpg ids: anchor: datatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr youtube: 4RargY8iOaE diff --git a/_podcast/practical-llm-engineering-and-rag.md b/_podcast/practical-llm-engineering-and-rag.md index 68b85b86..ce9ba068 100644 --- a/_podcast/practical-llm-engineering-and-rag.md +++ b/_podcast/practical-llm-engineering-and-rag.md @@ -5,7 +5,7 @@ season: 22 episode: 4 guests: - hugobowneanderson -image: images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg +image: images/podcast/practical-llm-engineering-and-rag.jpg ids: anchor: datatalksclub/episodes/How-to-Build-and-Evaluate-AI-systems-in-the-Age-of-LLMs---Hugo-Bowne-Anderson-e39vt24 youtube: eC3RNuI6ow0 diff --git a/_podcast/practical-llm-use-cases-and-product-patterns.md b/_podcast/practical-llm-use-cases-and-product-patterns.md index 93eddf03..63e6c8db 100644 --- a/_podcast/practical-llm-use-cases-and-product-patterns.md +++ b/_podcast/practical-llm-use-cases-and-product-patterns.md @@ -5,7 +5,7 @@ season: 15 episode: 4 guests: - sandrakublik -image: images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg +image: images/podcast/practical-llm-use-cases-and-product-patterns.jpg ids: anchor: datatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 youtube: bM6AR4A-f98 diff --git a/_podcast/pragmatic-and-standardized-mlops.md b/_podcast/pragmatic-and-standardized-mlops.md index 8e0c76fc..aeb4e8fd 100644 --- a/_podcast/pragmatic-and-standardized-mlops.md +++ b/_podcast/pragmatic-and-standardized-mlops.md @@ -5,7 +5,7 @@ season: 15 episode: 7 guests: - mariavechtomova -image: images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg +image: images/podcast/pragmatic-and-standardized-mlops.jpg ids: anchor: datatalksclub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv youtube: q3DTR3Od1MA diff --git a/_podcast/product-designer-to-data-product-manager.md b/_podcast/product-designer-to-data-product-manager.md index e2d56b90..90800e9f 100644 --- a/_podcast/product-designer-to-data-product-manager.md +++ b/_podcast/product-designer-to-data-product-manager.md @@ -5,7 +5,7 @@ season: 6 episode: 4 guests: - saramenefee -image: images/podcast/s06e04-becoming-a-data-product-manager.jpg +image: images/podcast/product-designer-to-data-product-manager.jpg ids: youtube: nt__pVuuC-k anchor: Becoming-a-Data-Product-Manager---Sara-Menefee-e1arc4a diff --git a/_podcast/production-ml-mlops-and-data-team-building.md b/_podcast/production-ml-mlops-and-data-team-building.md index 46a7c71f..b984253d 100644 --- a/_podcast/production-ml-mlops-and-data-team-building.md +++ b/_podcast/production-ml-mlops-and-data-team-building.md @@ -5,7 +5,7 @@ season: 5 episode: 7 guests: - rishabhbhargava -image: images/podcast/s05e07-ml-vs-analytics.jpg +image: images/podcast/production-ml-mlops-and-data-team-building.jpg ids: youtube: rMRUa8WxDz4 anchor: Similarities-and-Differences-between-ML-and-Analytics---Rishabh-Bhargava-e18rcam diff --git a/_podcast/production-ml-pipelines-with-aws-and-kafka.md b/_podcast/production-ml-pipelines-with-aws-and-kafka.md index 84611467..9a5c421d 100644 --- a/_podcast/production-ml-pipelines-with-aws-and-kafka.md +++ b/_podcast/production-ml-pipelines-with-aws-and-kafka.md @@ -5,7 +5,7 @@ season: 4 episode: 2 guests: - andreaskretz -image: images/podcast/s04e02-build-your-own-data-pipeline.jpg +image: images/podcast/production-ml-pipelines-with-aws-and-kafka.jpg ids: youtube: IrZPAG6OBqo anchor: Build-Your-Own-Data-Pipeline---Andreas-Kretz-e139se1 diff --git a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md index d3df4041..0b7efa54 100644 --- a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md +++ b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md @@ -5,7 +5,7 @@ season: 17 episode: 8 guests: - reemmahmoud -image: images/podcast/s17e08-building-machine-learning-products.jpg +image: images/podcast/production-ml-search-vector-search-embeddings-hybrid search.jpg ids: anchor: datatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd youtube: m45tNY-8gY8 diff --git a/_podcast/production-ready-ai-engineering.md b/_podcast/production-ready-ai-engineering.md index 70b052e3..e6dda75d 100644 --- a/_podcast/production-ready-ai-engineering.md +++ b/_podcast/production-ready-ai-engineering.md @@ -5,7 +5,7 @@ season: 20 episode: 5 guests: - bartoszmikulski -image: images/podcast/s20e05-data-intensive-ai.jpg +image: images/podcast/production-ready-ai-engineering.jpg ids: anchor: datatalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi youtube: BP6w_vKySN0 diff --git a/_podcast/project-manager-to-data-scientist.md b/_podcast/project-manager-to-data-scientist.md index f06d7516..07109378 100644 --- a/_podcast/project-manager-to-data-scientist.md +++ b/_podcast/project-manager-to-data-scientist.md @@ -5,7 +5,7 @@ season: 3 episode: 1 guests: - ksenialegostay -image: images/podcast/s03e01-from-pm-to-ds.jpg +image: images/podcast/project-manager-to-data-scientist.jpg ids: youtube: rBKezdb9jEc anchor: Transitioning-from-Project-Management-to-Data-Science---Ksenia-Legostay-euig2a diff --git a/_podcast/public-speaking-for-data-scientists.md b/_podcast/public-speaking-for-data-scientists.md index d6df1bf7..68b46e36 100644 --- a/_podcast/public-speaking-for-data-scientists.md +++ b/_podcast/public-speaking-for-data-scientists.md @@ -5,7 +5,7 @@ season: 2 episode: 10 guests: - bentaylor -image: images/podcast/s02e10-public-speaking.jpg +image: images/podcast/public-speaking-for-data-scientists.jpg ids: youtube: wOFvlR9UBxI anchor: The-Essentials-of-Public-Speaking-for-Career-in-Data-Science---Ben-Taylor-et0m4p diff --git a/_podcast/remote-data-engineering-work-and-building-iot-platforms.md b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md index e2a27586..638077c1 100644 --- a/_podcast/remote-data-engineering-work-and-building-iot-platforms.md +++ b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md @@ -5,7 +5,7 @@ season: 15 episode: 5 guests: - josemaria -image: images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg +image: images/podcast/remote-data-engineering-work-and-building-iot-platforms.jpg ids: anchor: datatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c youtube: UX7UShEioKc diff --git a/_podcast/research-to-production-ml-systems-roadmap.md b/_podcast/research-to-production-ml-systems-roadmap.md index 2c342d98..eceae549 100644 --- a/_podcast/research-to-production-ml-systems-roadmap.md +++ b/_podcast/research-to-production-ml-systems-roadmap.md @@ -5,7 +5,7 @@ season: 5 episode: 5 guests: - mihaileric -image: images/podcast/s05e05-researchers-vs-engineers.jpg +image: images/podcast/research-to-production-ml-systems-roadmap.jpg ids: youtube: d9xVXqKq3sU anchor: What-Researchers-and-Engineers-Can-Learn-from-Each-Other---Mihail-Eric-e1854bj diff --git a/_podcast/responsible-explainable-ai-bias-detection.md b/_podcast/responsible-explainable-ai-bias-detection.md index ea7915de..84c85a56 100644 --- a/_podcast/responsible-explainable-ai-bias-detection.md +++ b/_podcast/responsible-explainable-ai-bias-detection.md @@ -5,7 +5,7 @@ season: 10 episode: 9 guests: - supreetkaur -image: images/podcast/s10e09-responsible-and-explainable-ai.jpg +image: images/podcast/responsible-explainable-ai-bias-detection.jpg ids: anchor: Responsible-and-Explainable-AI---Supreet-Kaur-e1o6mgj youtube: 8Eb5mG-pC3o diff --git a/_podcast/scaling-data-engineering-teams-self-service-platforms.md b/_podcast/scaling-data-engineering-teams-self-service-platforms.md index 39d97a5e..2ff58494 100644 --- a/_podcast/scaling-data-engineering-teams-self-service-platforms.md +++ b/_podcast/scaling-data-engineering-teams-self-service-platforms.md @@ -5,7 +5,7 @@ season: 10 episode: 5 guests: - mehdiouazza -image: images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg +image: images/podcast/scaling-data-engineering-teams-self-service-platforms.jpg ids: anchor: Growing-Data-Engineering-Team-in-a-Scale-Up---Mehdi-OUAZZA-e1mq8et youtube: acJ6sVqKOUk diff --git a/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md index 2c061955..c403ce58 100644 --- a/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md +++ b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md @@ -5,7 +5,7 @@ season: 10 episode: 4 guests: - alexanderhendorf -image: images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg +image: images/podcast/scaling-enterprise-ai-mlops-data-first-strategy.jpg ids: anchor: Lessons-Learned-About-Data--AI-at-Enterprises---Alexander-Hendorf-e1milm0/a-a8d08ua youtube: Vms29u9xC3k diff --git a/_podcast/software-engineering-for-machine-learning.md b/_podcast/software-engineering-for-machine-learning.md index 5675d637..fd9eb03d 100644 --- a/_podcast/software-engineering-for-machine-learning.md +++ b/_podcast/software-engineering-for-machine-learning.md @@ -5,7 +5,7 @@ season: 13 episode: 5 guests: - nadianahar -image: images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg +image: images/podcast/software-engineering-for-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/SE4ML---Software-Engineering-for-Machine-Learning---Nadia-Nahar-e20svmn youtube: 35Ch8xL2SA8 diff --git a/_podcast/solopreneur-data-scientist.md b/_podcast/solopreneur-data-scientist.md index 89b6d660..ad717ac2 100644 --- a/_podcast/solopreneur-data-scientist.md +++ b/_podcast/solopreneur-data-scientist.md @@ -5,7 +5,7 @@ season: 5 episode: 4 guests: - mariannadiachuk -image: images/podcast/s05e04-introducing-data-science-in-startups.jpg +image: images/podcast/solopreneur-data-scientist.jpg ids: youtube: KMSE9GkU2mE anchor: Introducing-Data-Science-in-Startups---Marianna-Diachuk-e17rc4i diff --git a/_podcast/solopreneur-developer-and-data-professional.md b/_podcast/solopreneur-developer-and-data-professional.md index 869089d5..77a443ea 100644 --- a/_podcast/solopreneur-developer-and-data-professional.md +++ b/_podcast/solopreneur-developer-and-data-professional.md @@ -5,7 +5,7 @@ season: 6 episode: 1 guests: - noahgift -image: images/podcast/s06e01-solopreneur.jpg +image: images/podcast/solopreneur-developer-and-data-professional.jpg ids: youtube: gCLUY37HGtw anchor: Becoming-a-Solopreneur-in-Data---Noah-Gift-e19gqbr diff --git a/_podcast/teaching-mentoring-data-analytics-fintech.md b/_podcast/teaching-mentoring-data-analytics-fintech.md index 179f5347..9382b6a1 100644 --- a/_podcast/teaching-mentoring-data-analytics-fintech.md +++ b/_podcast/teaching-mentoring-data-analytics-fintech.md @@ -5,7 +5,7 @@ season: 11 episode: 9 guests: - irinabrudaru -image: images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg +image: images/podcast/teaching-mentoring-data-analytics-fintech.jpg ids: anchor: Teaching-and-Mentoring-in-Data-Analytics---Irina-Brudaru-e1rihm1 youtube: saaRRzgHsmE diff --git a/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md index b7b9722d..ea0d60e6 100644 --- a/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md +++ b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md @@ -5,7 +5,7 @@ season: 12 episode: 4 guests: - johannabayer -image: images/podcast/s12e04-doing-software-engineering-in-academia.jpg +image: images/podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg ids: anchor: Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb youtube: K0PdQITQzVQ diff --git a/_podcast/technical-writing-for-data-scientists.md b/_podcast/technical-writing-for-data-scientists.md index e3534c41..719cad2e 100644 --- a/_podcast/technical-writing-for-data-scientists.md +++ b/_podcast/technical-writing-for-data-scientists.md @@ -5,7 +5,7 @@ season: 2 episode: 1 guests: - eugeneyan -image: images/podcast/s02e01-writing.jpg +image: images/podcast/technical-writing-for-data-scientists.jpg ids: youtube: vXWGd7olv3c anchor: The-Importance-of-Writing-in-a-Tech-Career---Eugene-Yan-ep17du diff --git a/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md index 79b026e4..c32809fe 100644 --- a/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md +++ b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md @@ -5,7 +5,7 @@ season: 21 episode: 9 guests: - abouzarabbaspour -image: images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg +image: images/podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg ids: anchor: datatalksclub/episodes/From-Theme-Parks-to-Tesla-Building-Data-Products-That-Work-e395qme youtube: gXvVMvhfrIY diff --git a/_podcast/trends-in-modern-data-engineering.md b/_podcast/trends-in-modern-data-engineering.md index d1f442be..1b5435c9 100644 --- a/_podcast/trends-in-modern-data-engineering.md +++ b/_podcast/trends-in-modern-data-engineering.md @@ -5,7 +5,7 @@ season: 20 episode: 3 guests: - adrianbrudaru -image: images/podcast/s20e03-trends-in-data-engineering.jpg +image: images/podcast/trends-in-modern-data-engineering.jpg ids: anchor: datatalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae youtube: AlCFKbFIEM8 diff --git a/_podcast/urban-data-science.md b/_podcast/urban-data-science.md index 9a72efc3..2367e004 100644 --- a/_podcast/urban-data-science.md +++ b/_podcast/urban-data-science.md @@ -5,7 +5,7 @@ season: 19 episode: 1 guests: - rachellim -image: images/podcast/s19e01-using-data-to-create-liveable-cities.jpg +image: images/podcast/urban-data-science.jpg ids: anchor: datatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup youtube: VXQIGHUWeL0 diff --git a/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md index 2b0132a5..cc0e988c 100644 --- a/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md +++ b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md @@ -5,7 +5,7 @@ season: 8 episode: 1 guests: - meoramer -image: images/podcast/s08e01-visualising-machine-learning.jpg +image: images/podcast/visualizing-machine-learning-concepts-to-explain-ml.jpg ids: anchor: Visualising-Machine-Learning---Meor-Amer-e1g7iri youtube: OuCuk-7RHjM diff --git a/images/podcast/s07e06-ab-testing.jpg b/images/podcast/ab-testing-and-product-experimentation.jpg similarity index 100% rename from images/podcast/s07e06-ab-testing.jpg rename to images/podcast/ab-testing-and-product-experimentation.jpg diff --git a/images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg b/images/podcast/ai-in-healthcare-and-digital-therapeutics.jpg similarity index 100% rename from images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg rename to images/podcast/ai-in-healthcare-and-digital-therapeutics.jpg diff --git a/images/podcast/s20e01-trends-in-ai-infrastructure.jpg b/images/podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg similarity index 100% rename from images/podcast/s20e01-trends-in-ai-infrastructure.jpg rename to images/podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg diff --git a/images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg b/images/podcast/ai-ml-product-design-and-experimentation.jpg similarity index 100% rename from images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg rename to images/podcast/ai-ml-product-design-and-experimentation.jpg diff --git a/images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg b/images/podcast/algorithmic-trading-with-python-and-machine-learning.jpg similarity index 100% rename from images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg rename to images/podcast/algorithmic-trading-with-python-and-machine-learning.jpg diff --git a/images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg b/images/podcast/algorithms-data-structures-for-engineers.jpg similarity index 100% rename from images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg rename to images/podcast/algorithms-data-structures-for-engineers.jpg diff --git a/images/podcast/s03e11-analytics-engineer.jpg b/images/podcast/analytics-engineer-skills-tools.jpg similarity index 100% rename from images/podcast/s03e11-analytics-engineer.jpg rename to images/podcast/analytics-engineer-skills-tools.jpg diff --git a/images/podcast/s03e02-from-analytics-to-data-science.jpg b/images/podcast/analytics-to-data-science-with-kaggle-portfolio.jpg similarity index 100% rename from images/podcast/s03e02-from-analytics-to-data-science.jpg rename to images/podcast/analytics-to-data-science-with-kaggle-portfolio.jpg diff --git a/images/podcast/s20e07-build-strong-career-in-data.jpg b/images/podcast/applied-llm-research-and-career-growth-in-practice.jpg similarity index 100% rename from images/podcast/s20e07-build-strong-career-in-data.jpg rename to images/podcast/applied-llm-research-and-career-growth-in-practice.jpg diff --git a/images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg b/images/podcast/bayesian-modeling-workflows-and-tools.jpg similarity index 100% rename from images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg rename to images/podcast/bayesian-modeling-workflows-and-tools.jpg diff --git a/images/podcast/s16e09-become-data-freelancer.jpg b/images/podcast/becoming-data-freelancer.jpg similarity index 100% rename from images/podcast/s16e09-become-data-freelancer.jpg rename to images/podcast/becoming-data-freelancer.jpg diff --git a/images/podcast/s06e05-post-doctoral-research.jpg b/images/podcast/big-data-analytics-and-postdoc-research.jpg similarity index 100% rename from images/podcast/s06e05-post-doctoral-research.jpg rename to images/podcast/big-data-analytics-and-postdoc-research.jpg diff --git a/images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg b/images/podcast/big-data-engineer-vs-data-scientist.jpg similarity index 100% rename from images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg rename to images/podcast/big-data-engineer-vs-data-scientist.jpg diff --git a/images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg b/images/podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.jpg similarity index 100% rename from images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg rename to images/podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.jpg diff --git a/images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg b/images/podcast/bioinformatics-worflows-tools-and-data-science.jpg similarity index 100% rename from images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg rename to images/podcast/bioinformatics-worflows-tools-and-data-science.jpg diff --git a/images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg b/images/podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg similarity index 100% rename from images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg rename to images/podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg diff --git a/images/podcast/s16e08-ai-for-digital-health.jpg b/images/podcast/building-ai-digital-health-startups.jpg similarity index 100% rename from images/podcast/s16e08-ai-for-digital-health.jpg rename to images/podcast/building-ai-digital-health-startups.jpg diff --git a/images/podcast/s07e03-product-management-essentials.jpg b/images/podcast/building-and-scaling-ai-data-products-with-mlops.jpg similarity index 100% rename from images/podcast/s07e03-product-management-essentials.jpg rename to images/podcast/building-and-scaling-ai-data-products-with-mlops.jpg diff --git a/images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg b/images/podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.jpg similarity index 100% rename from images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg rename to images/podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.jpg diff --git a/images/podcast/s11e05-building-data-science-practice.jpg b/images/podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.jpg similarity index 100% rename from images/podcast/s11e05-building-data-science-practice.jpg rename to images/podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.jpg diff --git a/images/podcast/s05e06-building-and-leading-data-teams.jpg b/images/podcast/building-and-scaling-data-team.jpg similarity index 100% rename from images/podcast/s05e06-building-and-leading-data-teams.jpg rename to images/podcast/building-and-scaling-data-team.jpg diff --git a/images/podcast/s11e06-product-owners-in-data-science.jpg b/images/podcast/building-data-products-product-owner-vs-product-manager.jpg similarity index 100% rename from images/podcast/s11e06-product-owners-in-data-science.jpg rename to images/podcast/building-data-products-product-owner-vs-product-manager.jpg diff --git a/images/podcast/s10e08-leading-data-research.jpg b/images/podcast/building-data-science-programs-and-democratizing-high-performance-computing.jpg similarity index 100% rename from images/podcast/s10e08-leading-data-research.jpg rename to images/podcast/building-data-science-programs-and-democratizing-high-performance-computing.jpg diff --git a/images/podcast/s01e03-building-ds-team.jpg b/images/podcast/building-data-team.jpg similarity index 100% rename from images/podcast/s01e03-building-ds-team.jpg rename to images/podcast/building-data-team.jpg diff --git a/images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg b/images/podcast/building-domestic-risk-assessment-tool.jpg similarity index 100% rename from images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg rename to images/podcast/building-domestic-risk-assessment-tool.jpg diff --git a/images/podcast/s14e09-interpretable-ai-and-ml.jpg b/images/podcast/building-explainable-and-actionable-ai-ml-systems.jpg similarity index 100% rename from images/podcast/s14e09-interpretable-ai-and-ml.jpg rename to images/podcast/building-explainable-and-actionable-ai-ml-systems.jpg diff --git a/images/podcast/s16e02-bridging-data-science-and-healthcare.jpg b/images/podcast/building-healthcare-machine-learning-systems.jpg similarity index 100% rename from images/podcast/s16e02-bridging-data-science-and-healthcare.jpg rename to images/podcast/building-healthcare-machine-learning-systems.jpg diff --git a/images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg b/images/podcast/building-ml-communities-diversity-and-career-growth.jpg similarity index 100% rename from images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg rename to images/podcast/building-ml-communities-diversity-and-career-growth.jpg diff --git a/images/podcast/s04e04-ml-startup.jpg b/images/podcast/building-mlops-startup.jpg similarity index 100% rename from images/podcast/s04e04-ml-startup.jpg rename to images/podcast/building-mlops-startup.jpg diff --git a/images/podcast/s11e04-large-scale-entity-resolution.jpg b/images/podcast/building-open-source-data-product-for-identity-resolution.jpg similarity index 100% rename from images/podcast/s11e04-large-scale-entity-resolution.jpg rename to images/podcast/building-open-source-data-product-for-identity-resolution.jpg diff --git a/images/podcast/s13e09-building-open-source-nlp-tool.jpg b/images/podcast/building-open-source-nlp-tool.jpg similarity index 100% rename from images/podcast/s13e09-building-open-source-nlp-tool.jpg rename to images/podcast/building-open-source-nlp-tool.jpg diff --git a/images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg b/images/podcast/building-production-ml-platform-and-mlops-team.jpg similarity index 100% rename from images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg rename to images/podcast/building-production-ml-platform-and-mlops-team.jpg diff --git a/images/podcast/s17e09-building-production-search-systems.jpg b/images/podcast/building-production-search-systems.jpg similarity index 100% rename from images/podcast/s17e09-building-production-search-systems.jpg rename to images/podcast/building-production-search-systems.jpg diff --git a/images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg b/images/podcast/building-scalable-and-reliable-machine-learning-systems.jpg similarity index 100% rename from images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg rename to images/podcast/building-scalable-and-reliable-machine-learning-systems.jpg diff --git a/images/podcast/s15e06-democratizing-causality.jpg b/images/podcast/causal-inference-for-machine-learning.jpg similarity index 100% rename from images/podcast/s15e06-democratizing-causality.jpg rename to images/podcast/causal-inference-for-machine-learning.jpg diff --git a/images/podcast/s04e09-chief-data-officer.jpg b/images/podcast/chief-data-officer-data-strategy-and-org-design.jpg similarity index 100% rename from images/podcast/s04e09-chief-data-officer.jpg rename to images/podcast/chief-data-officer-data-strategy-and-org-design.jpg diff --git a/images/podcast/s03e10-data-governance.jpg b/images/podcast/cloud-data-governance.jpg similarity index 100% rename from images/podcast/s03e10-data-governance.jpg rename to images/podcast/cloud-data-governance.jpg diff --git a/images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg b/images/podcast/community-building-and-teaching-in-ai-tech.jpg similarity index 100% rename from images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg rename to images/podcast/community-building-and-teaching-in-ai-tech.jpg diff --git a/images/podcast/s01e02-processes.jpg b/images/podcast/crisp-dm.jpg similarity index 100% rename from images/podcast/s01e02-processes.jpg rename to images/podcast/crisp-dm.jpg diff --git a/images/podcast/s12e03-data-centric-ai.jpg b/images/podcast/data-centric.jpg similarity index 100% rename from images/podcast/s12e03-data-centric-ai.jpg rename to images/podcast/data-centric.jpg diff --git a/images/podcast/s13e04-starting-consultancy-in-data-space.jpg b/images/podcast/data-consulting-business-pricing-and-client-acquisition.jpg similarity index 100% rename from images/podcast/s13e04-starting-consultancy-in-data-space.jpg rename to images/podcast/data-consulting-business-pricing-and-client-acquisition.jpg diff --git a/images/podcast/s08e08-teaching-data-engineers.jpg b/images/podcast/data-engineering-career-path-and-skills.jpg similarity index 100% rename from images/podcast/s08e08-teaching-data-engineers.jpg rename to images/podcast/data-engineering-career-path-and-skills.jpg diff --git a/images/podcast/s07e07-becoming-a-data-engineering-manager.jpg b/images/podcast/data-engineering-leadership-and-modern-data-platforms.jpg similarity index 100% rename from images/podcast/s07e07-becoming-a-data-engineering-manager.jpg rename to images/podcast/data-engineering-leadership-and-modern-data-platforms.jpg diff --git a/images/podcast/s05e02-data-engineering-acronyms.jpg b/images/podcast/data-engineering-tools-modern-data-stack.jpg similarity index 100% rename from images/podcast/s05e02-data-engineering-acronyms.jpg rename to images/podcast/data-engineering-tools-modern-data-stack.jpg diff --git a/images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg b/images/podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg similarity index 100% rename from images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg rename to images/podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg diff --git a/images/podcast/s14e04-data-access-management.jpg b/images/podcast/data-governance-data-access-management.jpg similarity index 100% rename from images/podcast/s14e04-data-access-management.jpg rename to images/podcast/data-governance-data-access-management.jpg diff --git a/images/podcast/s06e02-non-technical-interviews.jpg b/images/podcast/data-interview-behavioral-and-portfolio-prep-guide.jpg similarity index 100% rename from images/podcast/s06e02-non-technical-interviews.jpg rename to images/podcast/data-interview-behavioral-and-portfolio-prep-guide.jpg diff --git a/images/podcast/s11e08-technical-writing-and-data-journalism.jpg b/images/podcast/data-journalism-python-visualization-storytelling.jpg similarity index 100% rename from images/podcast/s11e08-technical-writing-and-data-journalism.jpg rename to images/podcast/data-journalism-python-visualization-storytelling.jpg diff --git a/images/podcast/s18e01-inclusive-data-leadership-coaching.jpg b/images/podcast/data-leadership-coaching.jpg similarity index 100% rename from images/podcast/s18e01-inclusive-data-leadership-coaching.jpg rename to images/podcast/data-leadership-coaching.jpg diff --git a/images/podcast/s03e08-data-led-professional.jpg b/images/podcast/data-led-growth-event-tracking-and-reverse-etl.jpg similarity index 100% rename from images/podcast/s03e08-data-led-professional.jpg rename to images/podcast/data-led-growth-event-tracking-and-reverse-etl.jpg diff --git a/images/podcast/s10e06-data-mesh-101.jpg b/images/podcast/data-mesh-architecture-decentralized-data-products.jpg similarity index 100% rename from images/podcast/s10e06-data-mesh-101.jpg rename to images/podcast/data-mesh-architecture-decentralized-data-products.jpg diff --git a/images/podcast/s14e02-practical-data-privacy.jpg b/images/podcast/data-privacy-engineering-gdpr-machine-learning.jpg similarity index 100% rename from images/podcast/s14e02-practical-data-privacy.jpg rename to images/podcast/data-privacy-engineering-gdpr-machine-learning.jpg diff --git a/images/podcast/s12e02-business-skills-for-data-professionals.jpg b/images/podcast/data-professionals-business-skills-in-saas.jpg similarity index 100% rename from images/podcast/s12e02-business-skills-for-data-professionals.jpg rename to images/podcast/data-professionals-business-skills-in-saas.jpg diff --git a/images/podcast/s03e03-data-observability.jpg b/images/podcast/data-quality-data-observability-data-reliability.jpg similarity index 100% rename from images/podcast/s03e03-data-observability.jpg rename to images/podcast/data-quality-data-observability-data-reliability.jpg diff --git a/images/podcast/s13e02-analytics-for-better-world.jpg b/images/podcast/data-science-and-analytics-for-nonprofits-tech-for-good.jpg similarity index 100% rename from images/podcast/s13e02-analytics-for-better-world.jpg rename to images/podcast/data-science-and-analytics-for-nonprofits-tech-for-good.jpg diff --git a/images/podcast/s02e07-abc-data-science.jpg b/images/podcast/data-science-career-abc-framework.jpg similarity index 100% rename from images/podcast/s02e07-abc-data-science.jpg rename to images/podcast/data-science-career-abc-framework.jpg diff --git a/images/podcast/s03e09-what-data-scientists-dont-mention.jpg b/images/podcast/data-science-failures-and-mlops-lessons.jpg similarity index 100% rename from images/podcast/s03e09-what-data-scientists-dont-mention.jpg rename to images/podcast/data-science-failures-and-mlops-lessons.jpg diff --git a/images/podcast/s10e01-data-science-for-social-impact.jpg b/images/podcast/data-science-for-public-policy-ethical-ai-social-impact.jpg similarity index 100% rename from images/podcast/s10e01-data-science-for-social-impact.jpg rename to images/podcast/data-science-for-public-policy-ethical-ai-social-impact.jpg diff --git a/images/podcast/s03e04-interviewing-300-data-scientists.jpg b/images/podcast/data-science-interview-and-cv-guide.jpg similarity index 100% rename from images/podcast/s03e04-interviewing-300-data-scientists.jpg rename to images/podcast/data-science-interview-and-cv-guide.jpg diff --git a/images/podcast/s10e02-decoding-data-science-job-descriptions.jpg b/images/podcast/data-science-job-red-flags-and-mismatched-roles.jpg similarity index 100% rename from images/podcast/s10e02-decoding-data-science-job-descriptions.jpg rename to images/podcast/data-science-job-red-flags-and-mismatched-roles.jpg diff --git a/images/podcast/s06e09-data-science-manager.jpg b/images/podcast/data-science-leadership-hiring-mlops.jpg similarity index 100% rename from images/podcast/s06e09-data-science-manager.jpg rename to images/podcast/data-science-leadership-hiring-mlops.jpg diff --git a/images/podcast/s13e06-secret-sauce-of-data-science-management.jpg b/images/podcast/data-science-management-and-agile-machine-learning.jpg similarity index 100% rename from images/podcast/s13e06-secret-sauce-of-data-science-management.jpg rename to images/podcast/data-science-management-and-agile-machine-learning.jpg diff --git a/images/podcast/s06e03-manager-vs-expert.jpg b/images/podcast/data-science-manager-vs-expert-hiring-guide.jpg similarity index 100% rename from images/podcast/s06e03-manager-vs-expert.jpg rename to images/podcast/data-science-manager-vs-expert-hiring-guide.jpg diff --git a/images/podcast/s09e07-designing-data-science-organization.jpg b/images/podcast/data-science-team-structure-and-org-design.jpg similarity index 100% rename from images/podcast/s09e07-designing-data-science-organization.jpg rename to images/podcast/data-science-team-structure-and-org-design.jpg diff --git a/images/podcast/s12e05-indie-hacking.jpg b/images/podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg similarity index 100% rename from images/podcast/s12e05-indie-hacking.jpg rename to images/podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg diff --git a/images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg b/images/podcast/data-strategy-and-dataops-for-ai-powered-products.jpg similarity index 100% rename from images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg rename to images/podcast/data-strategy-and-dataops-for-ai-powered-products.jpg diff --git a/images/podcast/s01e01-roles.jpg b/images/podcast/data-team-roles.jpg similarity index 100% rename from images/podcast/s01e01-roles.jpg rename to images/podcast/data-team-roles.jpg diff --git a/images/podcast/s03e04-effective-communication-with-business.jpg b/images/podcast/data-translator-role-and-data-strategy.jpg similarity index 100% rename from images/podcast/s03e04-effective-communication-with-business.jpg rename to images/podcast/data-translator-role-and-data-strategy.jpg diff --git a/images/podcast/s11e03-from-data-science-to-dataops.jpg b/images/podcast/dataops-and-gitops-best-practices-for-data-teams.jpg similarity index 100% rename from images/podcast/s11e03-from-data-science-to-dataops.jpg rename to images/podcast/dataops-and-gitops-best-practices-for-data-teams.jpg diff --git a/images/podcast/s08e05-storytime-for-dataops.jpg b/images/podcast/dataops-automation-and-reliable-data-pipelines.jpg similarity index 100% rename from images/podcast/s08e05-storytime-for-dataops.jpg rename to images/podcast/dataops-automation-and-reliable-data-pipelines.jpg diff --git a/images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg b/images/podcast/dataops-for-data-engineering.jpg similarity index 100% rename from images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg rename to images/podcast/dataops-for-data-engineering.jpg diff --git a/images/podcast/s02e11-dataops.jpg b/images/podcast/dataops-principles-and-scalable-data-platforms.jpg similarity index 100% rename from images/podcast/s02e11-dataops.jpg rename to images/podcast/dataops-principles-and-scalable-data-platforms.jpg diff --git a/images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg b/images/podcast/datatalksclub-building-scaling-data-community.jpg similarity index 100% rename from images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg rename to images/podcast/datatalksclub-building-scaling-data-community.jpg diff --git a/images/podcast/s16e01-datatalks-club-anniversary-interview.jpg b/images/podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg similarity index 100% rename from images/podcast/s16e01-datatalks-club-anniversary-interview.jpg rename to images/podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg diff --git a/images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg b/images/podcast/datatalksclub-scaling-and-free-courses.jpg similarity index 100% rename from images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg rename to images/podcast/datatalksclub-scaling-and-free-courses.jpg diff --git a/images/podcast/s15e03-llms-for-everyone.jpg b/images/podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg similarity index 100% rename from images/podcast/s15e03-llms-for-everyone.jpg rename to images/podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg diff --git a/images/podcast/s03e07-market-yourself.jpg b/images/podcast/developer-personal-brand-learn-in-public.jpg similarity index 100% rename from images/podcast/s03e07-market-yourself.jpg rename to images/podcast/developer-personal-brand-learn-in-public.jpg diff --git a/images/podcast/s02e02-developer-advocacy.jpg b/images/podcast/devrel-data-science-open-source-tools.jpg similarity index 100% rename from images/podcast/s02e02-developer-advocacy.jpg rename to images/podcast/devrel-data-science-open-source-tools.jpg diff --git a/images/podcast/s14e06-data-developer-relations.jpg b/images/podcast/devrel-open-source-machine-learning.jpg similarity index 100% rename from images/podcast/s14e06-data-developer-relations.jpg rename to images/podcast/devrel-open-source-machine-learning.jpg diff --git a/images/podcast/s19e09-linguistics-and-fairness.jpg b/images/podcast/fairness-in-ai-ml-engineering.jpg similarity index 100% rename from images/podcast/s19e09-linguistics-and-fairness.jpg rename to images/podcast/fairness-in-ai-ml-engineering.jpg diff --git a/images/podcast/s05e09-business-acumen.jpg b/images/podcast/feature-engineering-model-monitoring-and-data-governance.jpg similarity index 100% rename from images/podcast/s05e09-business-acumen.jpg rename to images/podcast/feature-engineering-model-monitoring-and-data-governance.jpg diff --git a/images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg b/images/podcast/finops-for-data-engineers.jpg similarity index 100% rename from images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg rename to images/podcast/finops-for-data-engineers.jpg diff --git a/images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg b/images/podcast/freelance-data-engineering-pricing-and-clients.jpg similarity index 100% rename from images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg rename to images/podcast/freelance-data-engineering-pricing-and-clients.jpg diff --git a/images/podcast/s04e08-freelancing.jpg b/images/podcast/freelancing-in-machine-learning.jpg similarity index 100% rename from images/podcast/s04e08-freelancing.jpg rename to images/podcast/freelancing-in-machine-learning.jpg diff --git a/images/podcast/s12e09-staff-ai-engineer.jpg b/images/podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg similarity index 100% rename from images/podcast/s12e09-staff-ai-engineer.jpg rename to images/podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg diff --git a/images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg b/images/podcast/from-academic-research-to-data-engineering-freelancing.jpg similarity index 100% rename from images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg rename to images/podcast/from-academic-research-to-data-engineering-freelancing.jpg diff --git a/images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg b/images/podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg similarity index 100% rename from images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg rename to images/podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg diff --git a/images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg b/images/podcast/from-computer-vision-research-to-autonomous-driving-ai.jpg similarity index 100% rename from images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg rename to images/podcast/from-computer-vision-research-to-autonomous-driving-ai.jpg diff --git a/images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg b/images/podcast/from-data-freelancer-to-startup-open-source-products.jpg similarity index 100% rename from images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg rename to images/podcast/from-data-freelancer-to-startup-open-source-products.jpg diff --git a/images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg b/images/podcast/from-devops-to-data-engineering-automation-open-source-volunteering.jpg similarity index 100% rename from images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg rename to images/podcast/from-devops-to-data-engineering-automation-open-source-volunteering.jpg diff --git a/images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg b/images/podcast/from-game-ai-to-modern-ai-agents.jpg similarity index 100% rename from images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg rename to images/podcast/from-game-ai-to-modern-ai-agents.jpg diff --git a/images/podcast/s15e08-from-data-manager-to-data-architect.jpg b/images/podcast/from-iot-data-engineering-to-leading-data-architect.jpg similarity index 100% rename from images/podcast/s15e08-from-data-manager-to-data-architect.jpg rename to images/podcast/from-iot-data-engineering-to-leading-data-architect.jpg diff --git a/images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg b/images/podcast/from-large-hadron-collider-to-data-science-research-software-engineering.jpg similarity index 100% rename from images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg rename to images/podcast/from-large-hadron-collider-to-data-science-research-software-engineering.jpg diff --git a/images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg b/images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg similarity index 100% rename from images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg rename to images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg diff --git a/images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg b/images/podcast/from-math-graduate-to-data-analytics.jpg similarity index 100% rename from images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg rename to images/podcast/from-math-graduate-to-data-analytics.jpg diff --git a/images/podcast/s03e06-from-physics-to-machine-learning.jpg b/images/podcast/from-physics-to-computer-vision-career-transition.jpg similarity index 100% rename from images/podcast/s03e06-from-physics-to-machine-learning.jpg rename to images/podcast/from-physics-to-computer-vision-career-transition.jpg diff --git a/images/podcast/s21e05-from-astronomy-to-applied-ml.jpg b/images/podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.jpg similarity index 100% rename from images/podcast/s21e05-from-astronomy-to-applied-ml.jpg rename to images/podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.jpg diff --git a/images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg b/images/podcast/from-semiconductor-data-to-applied-machine-learning.jpg similarity index 100% rename from images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg rename to images/podcast/from-semiconductor-data-to-applied-machine-learning.jpg diff --git a/images/podcast/s04e01-from-swe-to-ml.jpg b/images/podcast/from-software-engineer-to-machine-learning.jpg similarity index 100% rename from images/podcast/s04e01-from-swe-to-ml.jpg rename to images/podcast/from-software-engineer-to-machine-learning.jpg diff --git a/images/podcast/s07e08-from-data-science-to-data-engineering.jpg b/images/podcast/from-software-engineering-data-science-to-data-engineering-leadership.jpg similarity index 100% rename from images/podcast/s07e08-from-data-science-to-data-engineering.jpg rename to images/podcast/from-software-engineering-data-science-to-data-engineering-leadership.jpg diff --git a/images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg b/images/podcast/from-software-engineering-to-leading-data-science-teams.jpg similarity index 100% rename from images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg rename to images/podcast/from-software-engineering-to-leading-data-science-teams.jpg diff --git a/images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg b/images/podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg similarity index 100% rename from images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg rename to images/podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg diff --git a/images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg b/images/podcast/from-startup-engineering-to-freelance-data-science.jpg similarity index 100% rename from images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg rename to images/podcast/from-startup-engineering-to-freelance-data-science.jpg diff --git a/images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg b/images/podcast/generative-ai-chatbots-in-production-security.jpg similarity index 100% rename from images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg rename to images/podcast/generative-ai-chatbots-in-production-security.jpg diff --git a/images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg b/images/podcast/get-data-analytics-and-data-engineering-job.jpg similarity index 100% rename from images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg rename to images/podcast/get-data-analytics-and-data-engineering-job.jpg diff --git a/images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg b/images/podcast/get-data-engineering-job-prep-and-interview.jpg similarity index 100% rename from images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg rename to images/podcast/get-data-engineering-job-prep-and-interview.jpg diff --git a/images/podcast/s01e04-standing-out-as-a-data-scientist.jpg b/images/podcast/get-data-scientist-job.jpg similarity index 100% rename from images/podcast/s01e04-standing-out-as-a-data-scientist.jpg rename to images/podcast/get-data-scientist-job.jpg diff --git a/images/podcast/s07e04-career-coaching.jpg b/images/podcast/get-junior-data-job-and-transferable-skills.jpg similarity index 100% rename from images/podcast/s07e04-career-coaching.jpg rename to images/podcast/get-junior-data-job-and-transferable-skills.jpg diff --git a/images/podcast/s11e02-data-science-career-development.jpg b/images/podcast/hiring-and-managing-data-science-teams-in-b2b-saas.jpg similarity index 100% rename from images/podcast/s11e02-data-science-career-development.jpg rename to images/podcast/hiring-and-managing-data-science-teams-in-b2b-saas.jpg diff --git a/images/podcast/s07e02-recruiting-data-professionals.jpg b/images/podcast/hiring-data-scientists-and-analysts.jpg similarity index 100% rename from images/podcast/s07e02-recruiting-data-professionals.jpg rename to images/podcast/hiring-data-scientists-and-analysts.jpg diff --git a/images/podcast/s08e06-recruiting-data-engineers.jpg b/images/podcast/hiring-for-data-engineering-jobs-in-europe.jpg similarity index 100% rename from images/podcast/s08e06-recruiting-data-engineers.jpg rename to images/podcast/hiring-for-data-engineering-jobs-in-europe.jpg diff --git a/images/podcast/s09e09-hiring-data-science-talent.jpg b/images/podcast/hiring-for-data-science-jobs-interview-questions-skills.md.jpg similarity index 100% rename from images/podcast/s09e09-hiring-data-science-talent.jpg rename to images/podcast/hiring-for-data-science-jobs-interview-questions-skills.md.jpg diff --git a/images/podcast/s09e05-data-scientists-at-work.jpg b/images/podcast/how-to-break-into-data-science.jpg similarity index 100% rename from images/podcast/s09e05-data-scientists-at-work.jpg rename to images/podcast/how-to-break-into-data-science.jpg diff --git a/images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg b/images/podcast/how-to-grow-your-ml-engineering-career.jpg similarity index 100% rename from images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg rename to images/podcast/how-to-grow-your-ml-engineering-career.jpg diff --git a/images/podcast/s08e02-hacking-your-data-career.jpg b/images/podcast/how-to-stand-out-in-data-science.jpg similarity index 100% rename from images/podcast/s08e02-hacking-your-data-career.jpg rename to images/podcast/how-to-stand-out-in-data-science.jpg diff --git a/images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg b/images/podcast/how-to-switch-to-ml-tech-without-experience.jpg similarity index 100% rename from images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg rename to images/podcast/how-to-switch-to-ml-tech-without-experience.jpg diff --git a/images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg b/images/podcast/how-to-transition-into-ml-and-data-engineering-from-qa.jpg similarity index 100% rename from images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg rename to images/podcast/how-to-transition-into-ml-and-data-engineering-from-qa.jpg diff --git a/images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg b/images/podcast/hugging-face-contributions-and-nlp-portfolio.jpg similarity index 100% rename from images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg rename to images/podcast/hugging-face-contributions-and-nlp-portfolio.jpg diff --git a/images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg b/images/podcast/human-centered-ai-automatic-speech-recognition.jpg similarity index 100% rename from images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg rename to images/podcast/human-centered-ai-automatic-speech-recognition.jpg diff --git a/images/podcast/s04e06-humans-in-the-loop.jpg b/images/podcast/human-centered-mlops-and-model-monitoring.jpg similarity index 100% rename from images/podcast/s04e06-humans-in-the-loop.jpg rename to images/podcast/human-centered-mlops-and-model-monitoring.jpg diff --git a/images/podcast/s13e08-navigating-industrial-data-challenges.jpg b/images/podcast/industrial-data-small-data-production-machine-learning.jpg similarity index 100% rename from images/podcast/s13e08-navigating-industrial-data-challenges.jpg rename to images/podcast/industrial-data-small-data-production-machine-learning.jpg diff --git a/images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg b/images/podcast/interpretable-machine-learning.jpg similarity index 100% rename from images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg rename to images/podcast/interpretable-machine-learning.jpg diff --git a/images/podcast/s15e02-investing-in-open-source-data-tools.jpg b/images/podcast/investing-in-open-source-developer-tools.jpg similarity index 100% rename from images/podcast/s15e02-investing-in-open-source-data-tools.jpg rename to images/podcast/investing-in-open-source-developer-tools.jpg diff --git a/images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg b/images/podcast/job-search-strategy-in-tech-projects-skills-cv-networking.jpg similarity index 100% rename from images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg rename to images/podcast/job-search-strategy-in-tech-projects-skills-cv-networking.jpg diff --git a/images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg b/images/podcast/kaggle-grandmaster-to-production-ml-and-education.jpg similarity index 100% rename from images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg rename to images/podcast/kaggle-grandmaster-to-production-ml-and-education.jpg diff --git a/images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg b/images/podcast/knowledge-graphs-and-llms-for-automotive-rnd.jpg similarity index 100% rename from images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg rename to images/podcast/knowledge-graphs-and-llms-for-automotive-rnd.jpg diff --git a/images/podcast/s05e08-the-last-mile-in-data.jpg b/images/podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg similarity index 100% rename from images/podcast/s05e08-the-last-mile-in-data.jpg rename to images/podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg diff --git a/images/podcast/s04e07-launching-a-startup.jpg b/images/podcast/launch-and-build-retail-startup.jpg similarity index 100% rename from images/podcast/s04e07-launching-a-startup.jpg rename to images/podcast/launch-and-build-retail-startup.jpg diff --git a/images/podcast/s20e04-mlops-in-corporations-and-startups.jpg b/images/podcast/lean-mlops-for-startups.jpg similarity index 100% rename from images/podcast/s20e04-mlops-in-corporations-and-startups.jpg rename to images/podcast/lean-mlops-for-startups.jpg diff --git a/images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg b/images/podcast/learning-machine-learning-self-taught-bioinformatics.jpg similarity index 100% rename from images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg rename to images/podcast/learning-machine-learning-self-taught-bioinformatics.jpg diff --git a/images/podcast/s12e06-preparing-for-data-science-interview.jpg b/images/podcast/machine-learning-data-science-interview-prep.jpg similarity index 100% rename from images/podcast/s12e06-preparing-for-data-science-interview.jpg rename to images/podcast/machine-learning-data-science-interview-prep.jpg diff --git a/images/podcast/s02e06-decision-optimization.jpg b/images/podcast/machine-learning-decision-optimization.jpg similarity index 100% rename from images/podcast/s02e06-decision-optimization.jpg rename to images/podcast/machine-learning-decision-optimization.jpg diff --git a/images/podcast/s04e05-running-from-complexity.jpg b/images/podcast/machine-learning-engineering-production-best-practices.jpg similarity index 100% rename from images/podcast/s04e05-running-from-complexity.jpg rename to images/podcast/machine-learning-engineering-production-best-practices.jpg diff --git a/images/podcast/s09e02-using-data-for-asteroid-mining.jpg b/images/podcast/machine-learning-for-asteroid-mining-and-water-detection.jpg similarity index 100% rename from images/podcast/s09e02-using-data-for-asteroid-mining.jpg rename to images/podcast/machine-learning-for-asteroid-mining-and-water-detection.jpg diff --git a/images/podcast/s09e01-machine-learning-in-marketing.jpg b/images/podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg similarity index 100% rename from images/podcast/s09e01-machine-learning-in-marketing.jpg rename to images/podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg diff --git a/images/podcast/s07e05-machine-learning-system-design-interview.jpg b/images/podcast/machine-learning-system-design-interview.jpg similarity index 100% rename from images/podcast/s07e05-machine-learning-system-design-interview.jpg rename to images/podcast/machine-learning-system-design-interview.jpg diff --git a/images/podcast/s02e09-roles-skills-monetizing-ml.jpg b/images/podcast/make-money-with-machine-learning-roles-skills.jpg similarity index 100% rename from images/podcast/s02e09-roles-skills-monetizing-ml.jpg rename to images/podcast/make-money-with-machine-learning-roles-skills.jpg diff --git a/images/podcast/s01e05-mentoring.jpg b/images/podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg similarity index 100% rename from images/podcast/s01e05-mentoring.jpg rename to images/podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg diff --git a/images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg b/images/podcast/mindful-data-strategy-for-business-impact.jpg similarity index 100% rename from images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg rename to images/podcast/mindful-data-strategy-for-business-impact.jpg diff --git a/images/podcast/s05e03-metrics-and-kpis.jpg b/images/podcast/ml-engineering-kpis-and-metrics-strategy.jpg similarity index 100% rename from images/podcast/s05e03-metrics-and-kpis.jpg rename to images/podcast/ml-engineering-kpis-and-metrics-strategy.jpg diff --git a/images/podcast/s06e07-product-management-for-machine-learning.jpg b/images/podcast/ml-product-manager-and-mlops-platform-strategy.jpg similarity index 100% rename from images/podcast/s06e07-product-management-for-machine-learning.jpg rename to images/podcast/ml-product-manager-and-mlops-platform-strategy.jpg diff --git a/images/podcast/s15e01-why-machine-learning-design-broken.jpg b/images/podcast/ml-system-design.jpg similarity index 100% rename from images/podcast/s15e01-why-machine-learning-design-broken.jpg rename to images/podcast/ml-system-design.jpg diff --git a/images/podcast/s17e05-machine-learning-engineering-in-finance.jpg b/images/podcast/mlops-and-ml-engineering-in-finance.jpg similarity index 100% rename from images/podcast/s17e05-machine-learning-engineering-in-finance.jpg rename to images/podcast/mlops-and-ml-engineering-in-finance.jpg diff --git a/images/podcast/s19e04-mlops-as-team.jpg b/images/podcast/mlops-at-scale-reproducibility-adoption.jpg similarity index 100% rename from images/podcast/s19e04-mlops-as-team.jpg rename to images/podcast/mlops-at-scale-reproducibility-adoption.jpg diff --git a/images/podcast/s02e12-communities.jpg b/images/podcast/mlops-community-building-and-meetups.jpg similarity index 100% rename from images/podcast/s02e12-communities.jpg rename to images/podcast/mlops-community-building-and-meetups.jpg diff --git a/images/podcast/s02e05-feature-stores.jpg b/images/podcast/mlops-feature-stores-feature-stores-feast-tecton.jpg similarity index 100% rename from images/podcast/s02e05-feature-stores.jpg rename to images/podcast/mlops-feature-stores-feature-stores-feast-tecton.jpg diff --git a/images/podcast/s02e04-mlops.jpg b/images/podcast/mlops-kubeflow-model-monitoring.jpg similarity index 100% rename from images/podcast/s02e04-mlops.jpg rename to images/podcast/mlops-kubeflow-model-monitoring.jpg diff --git a/images/podcast/s10e03-mlops-architect.jpg b/images/podcast/mlops-model-monitoring-data-observability.jpg similarity index 100% rename from images/podcast/s10e03-mlops-architect.jpg rename to images/podcast/mlops-model-monitoring-data-observability.jpg diff --git a/images/podcast/s14e07-from-mlops-to-dataops.jpg b/images/podcast/modern-data-pipelines-orchestration-ingestion-modeling.jpg similarity index 100% rename from images/podcast/s14e07-from-mlops-to-dataops.jpg rename to images/podcast/modern-data-pipelines-orchestration-ingestion-modeling.jpg diff --git a/images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg b/images/podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.jpg similarity index 100% rename from images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg rename to images/podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.jpg diff --git a/images/podcast/s10e07-dataset-creation-and-curation.jpg b/images/podcast/nlp-dataset-creation-annotation-tools-workflows.jpg similarity index 100% rename from images/podcast/s10e07-dataset-creation-and-curation.jpg rename to images/podcast/nlp-dataset-creation-annotation-tools-workflows.jpg diff --git a/images/podcast/s06e08-nlp-teams.jpg b/images/podcast/nlp-team-hiring-and-production-mlops.jpg similarity index 100% rename from images/podcast/s06e08-nlp-teams.jpg rename to images/podcast/nlp-team-hiring-and-production-mlops.jpg diff --git a/images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg b/images/podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg similarity index 100% rename from images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg rename to images/podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg diff --git a/images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg b/images/podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg similarity index 100% rename from images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg rename to images/podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg diff --git a/images/podcast/s02e03-open-source.jpg b/images/podcast/open-source-ml-contributions.jpg similarity index 100% rename from images/podcast/s02e03-open-source.jpg rename to images/podcast/open-source-ml-contributions.jpg diff --git a/images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg b/images/podcast/open-source-ml-tools-strategy-and-business-models.jpg similarity index 100% rename from images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg rename to images/podcast/open-source-ml-tools-strategy-and-business-models.jpg diff --git a/images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg b/images/podcast/open-source-turned-into-career-and-startup-creation.jpg similarity index 100% rename from images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg rename to images/podcast/open-source-turned-into-career-and-startup-creation.jpg diff --git a/images/podcast/s02e08-personal-branding.jpg b/images/podcast/personal-brand-for-data-professionals.jpg similarity index 100% rename from images/podcast/s02e08-personal-branding.jpg rename to images/podcast/personal-brand-for-data-professionals.jpg diff --git a/images/podcast/s06e06-from-academia-to-industry.jpg b/images/podcast/postdoc-to-data-science-lead-career-transition.jpg similarity index 100% rename from images/podcast/s06e06-from-academia-to-industry.jpg rename to images/podcast/postdoc-to-data-science-lead-career-transition.jpg diff --git a/images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg b/images/podcast/practical-devrel-demofirst-education-and-open-source.jpg similarity index 100% rename from images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg rename to images/podcast/practical-devrel-demofirst-education-and-open-source.jpg diff --git a/images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg b/images/podcast/practical-generative-ai-consulting-from-expertise-to-impact.jpg similarity index 100% rename from images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg rename to images/podcast/practical-generative-ai-consulting-from-expertise-to-impact.jpg diff --git a/images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg b/images/podcast/practical-llm-engineering-and-rag.jpg similarity index 100% rename from images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg rename to images/podcast/practical-llm-engineering-and-rag.jpg diff --git a/images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg b/images/podcast/practical-llm-use-cases-and-product-patterns.jpg similarity index 100% rename from images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg rename to images/podcast/practical-llm-use-cases-and-product-patterns.jpg diff --git a/images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg b/images/podcast/pragmatic-and-standardized-mlops.jpg similarity index 100% rename from images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg rename to images/podcast/pragmatic-and-standardized-mlops.jpg diff --git a/images/podcast/s06e04-becoming-a-data-product-manager.jpg b/images/podcast/product-designer-to-data-product-manager.jpg similarity index 100% rename from images/podcast/s06e04-becoming-a-data-product-manager.jpg rename to images/podcast/product-designer-to-data-product-manager.jpg diff --git a/images/podcast/s05e07-ml-vs-analytics.jpg b/images/podcast/production-ml-mlops-and-data-team-building.jpg similarity index 100% rename from images/podcast/s05e07-ml-vs-analytics.jpg rename to images/podcast/production-ml-mlops-and-data-team-building.jpg diff --git a/images/podcast/s04e02-build-your-own-data-pipeline.jpg b/images/podcast/production-ml-pipelines-with-aws-and-kafka.jpg similarity index 100% rename from images/podcast/s04e02-build-your-own-data-pipeline.jpg rename to images/podcast/production-ml-pipelines-with-aws-and-kafka.jpg diff --git a/images/podcast/s17e08-building-machine-learning-products.jpg b/images/podcast/production-ml-search-vector-search-embeddings-hybrid search.jpg similarity index 100% rename from images/podcast/s17e08-building-machine-learning-products.jpg rename to images/podcast/production-ml-search-vector-search-embeddings-hybrid search.jpg diff --git a/images/podcast/s20e05-data-intensive-ai.jpg b/images/podcast/production-ready-ai-engineering.jpg similarity index 100% rename from images/podcast/s20e05-data-intensive-ai.jpg rename to images/podcast/production-ready-ai-engineering.jpg diff --git a/images/podcast/s03e01-from-pm-to-ds.jpg b/images/podcast/project-manager-to-data-scientist.jpg similarity index 100% rename from images/podcast/s03e01-from-pm-to-ds.jpg rename to images/podcast/project-manager-to-data-scientist.jpg diff --git a/images/podcast/s02e10-public-speaking.jpg b/images/podcast/public-speaking-for-data-scientists.jpg similarity index 100% rename from images/podcast/s02e10-public-speaking.jpg rename to images/podcast/public-speaking-for-data-scientists.jpg diff --git a/images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg b/images/podcast/remote-data-engineering-work-and-building-iot-platforms.jpg similarity index 100% rename from images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg rename to images/podcast/remote-data-engineering-work-and-building-iot-platforms.jpg diff --git a/images/podcast/s05e05-researchers-vs-engineers.jpg b/images/podcast/research-to-production-ml-systems-roadmap.jpg similarity index 100% rename from images/podcast/s05e05-researchers-vs-engineers.jpg rename to images/podcast/research-to-production-ml-systems-roadmap.jpg diff --git a/images/podcast/s10e09-responsible-and-explainable-ai.jpg b/images/podcast/responsible-explainable-ai-bias-detection.jpg similarity index 100% rename from images/podcast/s10e09-responsible-and-explainable-ai.jpg rename to images/podcast/responsible-explainable-ai-bias-detection.jpg diff --git a/images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg b/images/podcast/scaling-data-engineering-teams-self-service-platforms.jpg similarity index 100% rename from images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg rename to images/podcast/scaling-data-engineering-teams-self-service-platforms.jpg diff --git a/images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg b/images/podcast/scaling-enterprise-ai-mlops-data-first-strategy.jpg similarity index 100% rename from images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg rename to images/podcast/scaling-enterprise-ai-mlops-data-first-strategy.jpg diff --git a/images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg b/images/podcast/software-engineering-for-machine-learning.jpg similarity index 100% rename from images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg rename to images/podcast/software-engineering-for-machine-learning.jpg diff --git a/images/podcast/s05e04-introducing-data-science-in-startups.jpg b/images/podcast/solopreneur-data-scientist.jpg similarity index 100% rename from images/podcast/s05e04-introducing-data-science-in-startups.jpg rename to images/podcast/solopreneur-data-scientist.jpg diff --git a/images/podcast/s06e01-solopreneur.jpg b/images/podcast/solopreneur-developer-and-data-professional.jpg similarity index 100% rename from images/podcast/s06e01-solopreneur.jpg rename to images/podcast/solopreneur-developer-and-data-professional.jpg diff --git a/images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg b/images/podcast/teaching-mentoring-data-analytics-fintech.jpg similarity index 100% rename from images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg rename to images/podcast/teaching-mentoring-data-analytics-fintech.jpg diff --git a/images/podcast/s12e04-doing-software-engineering-in-academia.jpg b/images/podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg similarity index 100% rename from images/podcast/s12e04-doing-software-engineering-in-academia.jpg rename to images/podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg diff --git a/images/podcast/s02e01-writing.jpg b/images/podcast/technical-writing-for-data-scientists.jpg similarity index 100% rename from images/podcast/s02e01-writing.jpg rename to images/podcast/technical-writing-for-data-scientists.jpg diff --git a/images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg b/images/podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg similarity index 100% rename from images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg rename to images/podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg diff --git a/images/podcast/s20e03-trends-in-data-engineering.jpg b/images/podcast/trends-in-modern-data-engineering.jpg similarity index 100% rename from images/podcast/s20e03-trends-in-data-engineering.jpg rename to images/podcast/trends-in-modern-data-engineering.jpg diff --git a/images/podcast/s19e01-using-data-to-create-liveable-cities.jpg b/images/podcast/urban-data-science.jpg similarity index 100% rename from images/podcast/s19e01-using-data-to-create-liveable-cities.jpg rename to images/podcast/urban-data-science.jpg diff --git a/images/podcast/s08e01-visualising-machine-learning.jpg b/images/podcast/visualizing-machine-learning-concepts-to-explain-ml.jpg similarity index 100% rename from images/podcast/s08e01-visualising-machine-learning.jpg rename to images/podcast/visualizing-machine-learning-concepts-to-explain-ml.jpg diff --git a/podcast-errors-found.md b/podcast-errors-found.md deleted file mode 100644 index 4a6c7313..00000000 --- a/podcast-errors-found.md +++ /dev/null @@ -1,242 +0,0 @@ -# Podcast Files - Errors Found - -## Summary - -A comprehensive analysis of all podcast markdown files in `_podcast/` directory revealed several categories of errors affecting multiple files. - ---- - -## 1. ✅ FIXED: Truncated Anchor IDs (264 instances across 187 files) - -**Status:** Already corrected in your recent edits - -**Issue:** The `ids.anchor` field had truncated values missing the proper prefix: -- `atatalksclub` → should be `datatalksclub` (missing 'd') -- `atalksclub` → should be `datatalksclub` (missing 'dat') -- `lub/episodes/` → should be `datatalksclub/episodes/` - -**Example:** -```yaml -# INCORRECT -ids: - anchor: atatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi - -# CORRECT -ids: - anchor: datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi -``` - ---- - -## 2. 🔴 Special Dash Characters (187 files affected) - -**Issue:** Non-ASCII dash characters used throughout files: -- `‑` (U+2011: Non-breaking hyphen) -- `—` (U+2014: Em dash) -- `–` (U+2013: En dash) - -**Impact:** -- May cause encoding issues -- Inconsistent with standard ASCII hyphen `-` -- Can break parsing or search functionality - -**Most affected files:** -- `algorithmic-trading-with-python-and-machine-learning.md` (123 instances) -- `from-data-freelancer-to-startup-open-source-products.md` (136 instances) -- `from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md` (134 instances) -- `building-data-team.md` (104 instances) - -**Example locations in ai-for-ecology-biodiversity-and-conservation.md:** -- Line 22: `Berger‑Wolf` (should be `Berger-Wolf`) -- Line 63: `Photo‑ID` (should be `Photo-ID`) -- Throughout intro text - -**Recommendation:** Replace all special dashes with standard ASCII hyphen `-` - ---- - -## 3. 🔴 YAML Escaped Quotes (130 files) - -**Issue:** Doubled single quotes `''` used for escaping in YAML strings - -**Example:** -```yaml -context: 'AI''s most important role...' -``` - -**Current Status:** This is actually **valid YAML syntax** for escaping single quotes within single-quoted strings. However, it may look confusing. - -**Alternative approaches:** -1. Use double quotes: `"AI's most important role"` -2. Keep as-is (valid YAML) -3. Use multiline literal blocks - -**Verdict:** Not necessarily an error, but could be standardized for consistency. - ---- - -## 4. 🔴 QuotableClips with Same Start/End Offset (15 files) - -**Issue:** Final quotableClip entries have `startOffset` equal to `endOffset`, creating zero-duration clips - -**Affected files:** -1. `ai-for-ecology-biodiversity-and-conservation.md` - 'Episode Closing: Key Takeaways and Next Steps' (3720) -2. `open-source-ml-contributions.md` - 'Episode Wrap-Up and Final Advice' (2280) -3. `technical-writing-for-data-scientists.md` - 'Podcast Wrap-Up and Resources' (3630) -4. `mindful-data-strategy-for-business-impact.md` - 'Episode Outro and Hummus Banter' (3965) -5. `nlp-dataset-creation-annotation-tools-workflows.md` - 'Contact & Resources' (3820) -6. `personal-brand-for-data-professionals.md` - 'Episode Close and Links' (3030) -7. `building-domestic-risk-assessment-tool.md` - 'Episode Wrap-Up' (3840) -8. `mlops-feature-stores-feature-stores-feast-tecton.md` - 'Episode Close' (3450) -9. `data-freelancing-career-strategy-market-demand-and-client-acquisition.md` - 'Episode Wrap-up' (3929) -10. `from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md` - 'Episode Wrap-Up' (3822) -... and 5 more - -**Example from current file (lines 131-134):** -```yaml -- name: 'Episode Closing: Key Takeaways and Next Steps' - startOffset: 3720 - url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3720 - endOffset: 3720 # ⚠️ Same as startOffset! -``` - -**Recommendation:** Either: -- Set endOffset to actual video duration -- Remove these zero-duration closing clips -- Add a small duration (e.g., +60 seconds) - ---- - -## 5. 🟡 Missing Duration Field (12 files) - -**Issue:** `duration:` field missing from frontmatter (should be in ISO 8601 format like `PT01H02M30S`) - -**Affected files:** -1. **ai-for-ecology-biodiversity-and-conservation.md** ⬅️ **CURRENT FILE** -2. open-source-ml-contributions.md -3. technical-writing-for-data-scientists.md -4. personal-brand-for-data-professionals.md -5. building-domestic-risk-assessment-tool.md -6. crisp-dm.md -7. building-data-products-lead-data-scientist.md -8. mlops-feature-stores-feature-stores-feast-tecton.md -9. from-marketing-to-product-owner-in-search.md -10. machine-learning-decision-optimization.md -11. data-team-roles.md -12. mentoring-in-tech-how-to-find-and-become-a-mentor.md.md - -**Impact:** May affect schema markup, SEO, and podcast platforms - -**How to calculate:** Based on the highest `endOffset` value (in seconds), convert to `PT[H]H[M]M[S]S` format - -**For current file:** -- Highest endOffset: 3720 seconds = 62 minutes = 1 hour 2 minutes -- Should add: `duration: PT01H02M00S` - ---- - -## 6. 🟡 Missing Topics Field (63 files) - -**Issue:** `topics:` field missing from frontmatter - -**Impact:** Reduced discoverability, categorization, and filtering capabilities - -**Sample of affected files:** -- algorithmic-trading-with-python-and-machine-learning.md -- ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md -- mindful-data-strategy-for-business-impact.md -- fairness-in-ai-ml-engineering.md -- modern-search-systems-vector-databases-llms-semantic-retrieval.md -... and 58 more - -**Recommendation:** Add relevant topic tags like: -```yaml -topics: -- data science -- machine learning -- mlops -- career -``` - ---- - -## 7. 🔴 TODO Placeholders (4 files) - -**Issue:** Unfinished placeholder values still present - -**Affected files:** -1. technical-writing-for-data-scientists.md -2. crisp-dm.md -3. data-team-roles.md -4. mentoring-in-tech-how-to-find-and-become-a-mentor.md.md - -**Common locations:** -```yaml -links: - spotify: TODO - apple: TODO -``` - -**Recommendation:** Find correct URLs and replace TODO values - ---- - -## 8. 🟢 Description Length (Looks Good!) - -**Status:** Most descriptions are within the recommended 140-155 character range - -**Current file description:** -> "Discover AI-driven computer vision and remote sensing strategies to scale biodiversity monitoring, improve species ID, and inform conservation policy." - -**Length:** 150 characters ✓ (optimal per your memory guidelines) - ---- - -## Priority Recommendations - -### High Priority (Data Quality Issues) -1. **Fix zero-duration quotableClips** (15 files) - Set proper endOffset values -2. **Remove TODO placeholders** (4 files) - Add real URLs or remove fields -3. **Replace special dashes** (187 files) - Use standard ASCII `-` for consistency - -### Medium Priority (Missing Metadata) -4. **Add duration field** (12 files including current) - Calculate from endOffset -5. **Add topics field** (63 files) - Improve categorization - -### Low Priority (Style/Consistency) -6. **Standardize YAML quote escaping** (130 files) - Optional, current syntax is valid - ---- - -## Files Needing Most Attention - -Based on multiple issues: - -1. **ai-for-ecology-biodiversity-and-conservation.md** (current file) - - ✅ Anchor ID fixed - - 🔴 13 special dashes - - 🔴 Zero-duration closing clip - - 🟡 Missing duration field - - 🟡 Doubled quotes in context - -2. **technical-writing-for-data-scientists.md** - - 🔴 TODO placeholders - - 🔴 Zero-duration clip - - 🟡 Missing duration - - 🟡 Missing topics - -3. **crisp-dm.md** & **data-team-roles.md** - - 🔴 TODO placeholders - - 🟡 Missing duration - - 🟡 Missing topics - ---- - -## Automated Fix Suggestions - -You could create scripts to: -1. Replace all `‑–—` with `-` across all files -2. Calculate and add `duration:` based on max `endOffset` -3. Fix zero-duration clips by adding 60-120 seconds to final clips -4. Find TODO placeholders and flag for manual review - diff --git a/scripts/podcast-rename-mapping.md b/scripts/podcast-rename-mapping.md new file mode 100644 index 00000000..0c46c1c0 --- /dev/null +++ b/scripts/podcast-rename-mapping.md @@ -0,0 +1,192 @@ +# Podcast File Rename Mapping +This table documents all podcast file renames in the current PR. + +| Old Name | New Name | +|----------|----------| +| `s07e06-ab-testing.md` | `ab-testing-and-product-experimentation.md` | +| `s18e03-ai-for-ecology-biodiversity-and-conservation.md` | `ai-for-ecology-biodiversity-and-conservation.md` | +| `s08e04-machine-learning-and-personalization-in-healthcare.md` | `ai-in-healthcare-and-digital-therapeutics.md` | +| `s20e01-trends-in-ai-infrastructure.md` | `ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md` | +| `s08e03-innovation-and-design-for-machine-learning.md` | `ai-ml-product-design-and-experimentation.md` | +| `s17e03-stock-market-analysis-with-python-and-machine-learning.md` | `algorithmic-trading-with-python-and-machine-learning.md` | +| `s05e01-mastering-algorithms-and-data-structures.md` | `algorithms-data-structures-for-engineers.md` | +| `s03e11-analytics-engineer.md` | `analytics-engineer-skills-tools.md` | +| `s03e02-from-analytics-to-data-science.md` | `analytics-to-data-science-with-kaggle-portfolio.md` | +| `s20e07-build-strong-career-in-data.md` | `applied-llm-research-and-career-growth-in-practice.md` | +| `s17e04-bayesian-modeling-and-probabilistic-programming.md` | `bayesian-modeling-workflows-and-tools.md` | +| `s16e09-become-data-freelancer.md` | `becoming-data-freelancer.md` | +| `s06e05-post-doctoral-research.md` | `big-data-analytics-and-postdoc-research.md` | +| `s04e03-big-data-engineer-vs-data-scientist.md` | `big-data-engineer-vs-data-scientist.md` | +| `s13e03-biohacking-for-data-scientists-and-ml-engineers.md` | `biohacking-productivity-for-data-scientists-and-ml-engineers.md` | +| `s22e03-from-biotechnology-to-bioinformatics-software.md` | `bioinformatics-worflows-tools-and-data-science.md` | +| `s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md` | `building-agentic-ai-engineering-tooling-retrieval-evaluation.md` | +| `s16e08-ai-for-digital-health.md` | `building-ai-digital-health-startups.md` | +| `s07e03-product-management-essentials.md` | `building-and-scaling-ai-data-products-with-mlops.md` | +| `s15e09-data-engineering-for-fraud-prevention.md` | `building-and-scaling-data-engineering-systems-for-fraud-detection.md` | +| `s11e05-building-data-science-practice.md` | `building-and-scaling-data-science-practice-industrial-ai-mlops.md` | +| `s05e06-building-and-leading-data-teams.md` | `building-and-scaling-data-team.md` | +| `s11e06-product-owners-in-data-science.md` | `building-data-products-product-owner-vs-product-manager.md` | +| `s10e08-leading-data-research.md` | `building-data-science-programs-and-democratizing-high-performance-computing.md` | +| `s01e03-building-ds-team.md` | `building-data-team.md` | +| `s18e07-building-domestic-risk-assessment-tool.md` | `building-domestic-risk-assessment-tool.md` | +| `s14e09-interpretable-ai-and-ml.md` | `building-explainable-and-actionable-ai-ml-systems.md` | +| `s16e02-bridging-data-science-and-healthcare.md` | `building-healthcare-machine-learning-systems.md` | +| `s13e01-accelerating-adoption-of-ai-through-diversity.md` | `building-ml-communities-diversity-and-career-growth.md` | +| `s04e04-ml-startup.md` | `building-mlops-startup.md` | +| `s11e04-large-scale-entity-resolution.md` | `building-open-source-data-product-for-identity-resolution.md` | +| `s13e09-building-open-source-nlp-tool.md` | `building-open-source-nlp-tool.md` | +| `s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md` | `building-production-ml-platform-and-mlops-team.md` | +| `s17e09-building-production-search-systems.md` | `building-production-search-systems.md` | +| `s14e01-building-scalable-and-reliable-machine-learning-systems.md` | `building-scalable-and-reliable-machine-learning-systems.md` | +| `s15e06-democratizing-causality.md` | `causal-inference-for-machine-learning.md` | +| `s04e09-chief-data-officer.md` | `chief-data-officer-data-strategy-and-org-design.md` | +| `s03e10-data-governance.md` | `cloud-data-governance.md` | +| `s18e05-community-building-and-teaching-in-ai-tech.md` | `community-building-and-teaching-in-ai-tech.md` | +| `s01e02-processes.md` | `crisp-dm.md` | +| `s12e03-data-centric-ai.md` | `data-centric.md` | +| `s13e04-starting-consultancy-in-data-space.md` | `data-consulting-business-pricing-and-client-acquisition.md` | +| `s08e08-teaching-data-engineers.md` | `data-engineering-career-path-and-skills.md` | +| `s07e07-becoming-a-data-engineering-manager.md` | `data-engineering-leadership-and-modern-data-platforms.md` | +| `s05e02-data-engineering-acronyms.md` | `data-engineering-tools-modern-data-stack.md` | +| `s20e09-taking-your-freelance-career-to-next-level.md` | `data-freelancing-career-strategy-market-demand-and-client-acquisition.md` | +| `s14e04-data-access-management.md` | `data-governance-data-access-management.md` | +| `s06e02-non-technical-interviews.md` | `data-interview-behavioral-and-portfolio-prep-guide.md` | +| `s11e08-technical-writing-and-data-journalism.md` | `data-journalism-python-visualization-storytelling.md` | +| `s18e01-inclusive-data-leadership-coaching.md` | `data-leadership-coaching.md` | +| `s03e08-data-led-professional.md` | `data-led-growth-event-tracking-and-reverse-etl.md` | +| `s10e06-data-mesh-101.md` | `data-mesh-architecture-decentralized-data-products.md` | +| `s14e02-practical-data-privacy.md` | `data-privacy-engineering-gdpr-machine-learning.md` | +| `s12e02-business-skills-for-data-professionals.md` | `data-professionals-business-skills-in-saas.md` | +| `s03e03-data-observability.md` | `data-quality-data-observability-data-reliability.md` | +| `s13e02-analytics-for-better-world.md` | `data-science-and-analytics-for-nonprofits-tech-for-good.md` | +| `s02e07-abc-data-science.md` | `data-science-career-abc-framework.md` | +| `s03e09-what-data-scientists-dont-mention.md` | `data-science-failures-and-mlops-lessons.md` | +| `s10e01-data-science-for-social-impact.md` | `data-science-for-public-policy-ethical-ai-social-impact.md` | +| `s03e04-interviewing-300-data-scientists.md` | `data-science-interview-and-cv-guide.md` | +| `s10e02-decoding-data-science-job-descriptions.md` | `data-science-job-red-flags-and-mismatched-roles.md` | +| `s06e09-data-science-manager.md` | `data-science-leadership-hiring-mlops.md` | +| `s13e06-secret-sauce-of-data-science-management.md` | `data-science-management-and-agile-machine-learning.md` | +| `s06e03-manager-vs-expert.md` | `data-science-manager-vs-expert-hiring-guide.md` | +| `s09e07-designing-data-science-organization.md` | `data-science-team-structure-and-org-design.md` | +| `s12e05-indie-hacking.md` | `data-scientist-and-indie-hacker-bootstrapping-side-projects.md` | +| `s14e03-data-strategy-key-principles-and-best-practices.md` | `data-strategy-and-dataops-for-ai-powered-products.md` | +| `s01e01-roles.md` | `data-team-roles.md` | +| `s03e04-effective-communication-with-business.md` | `data-translator-role-and-data-strategy.md` | +| `s11e03-from-data-science-to-dataops.md` | `dataops-and-gitops-best-practices-for-data-teams.md` | +| `s08e05-storytime-for-dataops.md` | `dataops-automation-and-reliable-data-pipelines.md` | +| `s18e09-dataops-observability-and-cure-for-data-team-blues.md` | `dataops-for-data-engineering.md` | +| `s02e11-dataops.md` | `dataops-principles-and-scalable-data-platforms.md` | +| `s07e01-datatalksclub-behind-the-scenes.md` | `datatalksclub-building-scaling-data-community.md` | +| `s16e01-datatalks-club-anniversary-interview.md` | `datatalksclub-building-sustainable-data-community-3-years-anniversary.md` | +| `s19e03-datatalks-club-anniversary-podcast.md` | `datatalksclub-scaling-and-free-courses.md` | +| `s15e03-llms-for-everyone.md` | `deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md` | +| `s03e07-market-yourself.md` | `developer-personal-brand-learn-in-public.md` | +| `s02e02-developer-advocacy.md` | `devrel-data-science-open-source-tools.md` | +| `s14e06-data-developer-relations.md` | `devrel-open-source-machine-learning.md` | +| `s19e09-linguistics-and-fairness.md` | `fairness-in-ai-ml-engineering.md` | +| `s05e09-business-acumen.md` | `feature-engineering-model-monitoring-and-data-governance.md` | +| `s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md` | `finops-for-data-engineers.md` | +| `s09e04-freelancing-and-consulting-with-data-engineering.md` | `freelance-data-engineering-pricing-and-clients.md` | +| `s04e08-freelancing.md` | `freelancing-in-machine-learning.md` | +| `s12e09-staff-ai-engineer.md` | `from-academia-to-staff-ai-engineer-interviews-and-career-growth.md` | +| `s21e01-from-simulation-algorithms-to-production-grade-data-systems.md` | `from-academic-research-to-data-engineering-freelancing.md` | +| `s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md` | `from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md` | +| `s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md` | `from-computer-vision-research-to-autonomous-driving-ai.md` | +| `s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md` | `from-data-freelancer-to-startup-open-source-products.md` | +| `s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md` | `from-devops-to-data-engineering-automation-open-source-volunteering.md` | +| `s21e07-lessons-from-two-decades-of-ai.md` | `from-game-ai-to-modern-ai-agents.md` | +| `s15e08-from-data-manager-to-data-architect.md` | `from-iot-data-engineering-to-leading-data-architect.md` | +| `s19e05-large-hadron-collider-and-mentorship.md` | `from-large-hadron-collider-to-data-science-research-software-engineering.md` | +| `s11e07-from-digital-marketing-to-analytics-engineering.md` | `from-marketing-to-analytics-engineering-sql-dbt-career-switch.md` | +| `s07e09-from-math-teacher-to-analytics-engineer.md` | `from-math-graduate-to-data-analytics.md` | +| `s03e06-from-physics-to-machine-learning.md` | `from-physics-to-computer-vision-career-transition.md` | +| `s21e05-from-astronomy-to-applied-ml.md` | `from-radio-astronomy-to-machine-learning-and-data-engineering.md` | +| `s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md` | `from-semiconductor-data-to-applied-machine-learning.md` | +| `s04e01-from-swe-to-ml.md` | `from-software-engineer-to-machine-learning.md` | +| `s07e08-from-data-science-to-data-engineering.md` | `from-software-engineering-data-science-to-data-engineering-leadership.md` | +| `s12e01-from-software-engineer-to-data-science-manager.md` | `from-software-engineering-to-leading-data-science-teams.md` | +| `s16e06-unwritten-rules-for-success-in-machine-learning.md` | `from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md` | +| `s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md` | `from-startup-engineering-to-freelance-data-science.md` | +| `s19e06-ai-in-industry-trust-return-on-investment-and-future.md` | `generative-ai-chatbots-in-production-security.md` | +| `s08e09-from-academia-to-data-analytics-and-engineering.md` | `get-data-analytics-and-data-engineering-job.md` | +| `s09e03-getting-data-engineering-job-(summary-and-q&a).md` | `get-data-engineering-job-prep-and-interview.md` | +| `s01e04-standing-out-as-a-data-scientist.md` | `get-data-scientist-job.md` | +| `s07e04-career-coaching.md` | `get-junior-data-job-and-transferable-skills.md` | +| `s11e02-data-science-career-development.md` | `hiring-and-managing-data-science-teams-in-b2b-saas.md` | +| `s07e02-recruiting-data-professionals.md` | `hiring-data-scientists-and-analysts.md` | +| `s08e06-recruiting-data-engineers.md` | `hiring-for-data-engineering-jobs-in-europe.md` | +| `s09e09-hiring-data-science-talent.md` | `hiring-for-data-science-jobs-interview-questions-skills.md.md` | +| `s09e05-data-scientists-at-work.md` | `how-to-break-into-data-science.md` | +| `s12e07-navigating-career-changes-in-machine-learning.md` | `how-to-grow-your-ml-engineering-career.md` | +| `s08e02-hacking-your-data-career.md` | `how-to-stand-out-in-data-science.md` | +| `s08e07-from-roasting-coffee-to-backend-development.md` | `how-to-switch-to-ml-tech-without-experience.md` | +| `s11e01-from-testing-phones-to-managing-nlp-projects.md` | `how-to-transition-into-ml-and-data-engineering-from-qa.md` | +| `s09e06-developer-advocacy-engineer-for-open-source.md` | `hugging-face-contributions-and-nlp-portfolio.md` | +| `s19e02-human-centered-ai-for-disordered-speech-recognition.md` | `human-centered-ai-automatic-speech-recognition.md` | +| `s04e06-humans-in-the-loop.md` | `human-centered-mlops-and-model-monitoring.md` | +| `s13e08-navigating-industrial-data-challenges.md` | `industrial-data-small-data-production-machine-learning.md` | +| `s16e07-cracking-code-machine-learning-made-understandable.md` | `interpretable-machine-learning.md` | +| `s15e02-investing-in-open-source-data-tools.md` | `investing-in-open-source-developer-tools.md` | +| `s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md` | `job-search-strategy-in-tech-projects-skills-cv-networking.md` | +| `s20e02-competitive-machine-learning-and-teaching.md` | `kaggle-grandmaster-to-production-ml-and-education.md` | +| `s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md` | `knowledge-graphs-and-llms-for-automotive-rnd.md` | +| `s05e08-the-last-mile-in-data.md` | `last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md` | +| `s04e07-launching-a-startup.md` | `launch-and-build-retail-startup.md` | +| `s20e04-mlops-in-corporations-and-startups.md` | `lean-mlops-for-startups.md` | +| `s13e07-mastering-self-learning-in-machine-learning.md` | `learning-machine-learning-self-taught-bioinformatics.md` | +| `s12e06-preparing-for-data-science-interview.md` | `machine-learning-data-science-interview-prep.md` | +| `s02e06-decision-optimization.md` | `machine-learning-decision-optimization.md` | +| `s04e05-running-from-complexity.md` | `machine-learning-engineering-production-best-practices.md` | +| `s09e02-using-data-for-asteroid-mining.md` | `machine-learning-for-asteroid-mining-and-water-detection.md` | +| `s09e01-machine-learning-in-marketing.md` | `machine-learning-in-marketing-attribution-marketing-mix-modeling.md` | +| `s07e05-machine-learning-system-design-interview.md` | `machine-learning-system-design-interview.md` | +| `s02e09-roles-skills-monetizing-ml.md` | `make-money-with-machine-learning-roles-skills.md` | +| `s01e05-mentoring.md` | `mentoring-in-tech-how-to-find-and-become-a-mentor.md.md` | +| `s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md` | `mindful-data-strategy-for-business-impact.md` | +| `s05e03-metrics-and-kpis.md` | `ml-engineering-kpis-and-metrics-strategy.md` | +| `s06e07-product-management-for-machine-learning.md` | `ml-product-manager-and-mlops-platform-strategy.md` | +| `s15e01-why-machine-learning-design-broken.md` | `ml-system-design.md` | +| `s17e05-machine-learning-engineering-in-finance.md` | `mlops-and-ml-engineering-in-finance.md` | +| `s19e04-mlops-as-team.md` | `mlops-at-scale-reproducibility-adoption.md` | +| `s02e12-communities.md` | `mlops-community-building-and-meetups.md` | +| `s02e05-feature-stores.md` | `mlops-feature-stores-feature-stores-feast-tecton.md` | +| `s02e04-mlops.md` | `mlops-kubeflow-model-monitoring.md` | +| `s10e03-mlops-architect.md` | `mlops-model-monitoring-data-observability.md` | +| `s14e07-from-mlops-to-dataops.md` | `modern-data-pipelines-orchestration-ingestion-modeling.md` | +| `s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md` | `modern-search-systems-vector-databases-llms-semantic-retrieval.md` | +| `s10e07-dataset-creation-and-curation.md` | `nlp-dataset-creation-annotation-tools-workflows.md` | +| `s06e08-nlp-teams.md` | `nlp-team-hiring-and-production-mlops.md` | +| `s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md` | `nonlinear-path-to-machine-learning-freelancing-and-public-learning.md` | +| `s17e07-make-impact-through-volunteering-open-source-work.md` | `open-source-and-volunteering-in-ai-for-data-ml-career-growth.md` | +| `s02e03-open-source.md` | `open-source-ml-contributions.md` | +| `s18e04-working-in-open-source-probabl-ai-and-sklearn.md` | `open-source-ml-tools-strategy-and-business-models.md` | +| `s09e08-from-open-source-maintainer-to-founder.md` | `open-source-turned-into-career-and-startup-creation.md` | +| `s02e08-personal-branding.md` | `personal-brand-for-data-professionals.md` | +| `s06e06-from-academia-to-industry.md` | `postdoc-to-data-science-lead-career-transition.md` | +| `s20e08-from-hackathons-to-developer-advocacy.md` | `practical-devrel-demofirst-education-and-open-source.md` | +| `s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md` | `practical-generative-ai-consulting-from-expertise-to-impact.md` | +| `s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md` | `practical-llm-engineering-and-rag.md` | +| `s15e04-good-bad-and-ugly-of-gpt.md` | `practical-llm-use-cases-and-product-patterns.md` | +| `s15e07-pragmatic-and-standardized-mlops.md` | `pragmatic-and-standardized-mlops.md` | +| `s06e04-becoming-a-data-product-manager.md` | `product-designer-to-data-product-manager.md` | +| `s05e07-ml-vs-analytics.md` | `production-ml-mlops-and-data-team-building.md` | +| `s04e02-build-your-own-data-pipeline.md` | `production-ml-pipelines-with-aws-and-kafka.md` | +| `s17e08-building-machine-learning-products.md` | `production-ml-search-vector-search-embeddings-hybrid search.md` | +| `s20e05-data-intensive-ai.md` | `production-ready-ai-engineering.md` | +| `s03e01-from-pm-to-ds.md` | `project-manager-to-data-scientist.md` | +| `s02e10-public-speaking.md` | `public-speaking-for-data-scientists.md` | +| `s15e05-mastering-data-engineering-as-remote-worker.md` | `remote-data-engineering-work-and-building-iot-platforms.md` | +| `s05e05-researchers-vs-engineers.md` | `research-to-production-ml-systems-roadmap.md` | +| `s10e09-responsible-and-explainable-ai.md` | `responsible-explainable-ai-bias-detection.md` | +| `s10e05-growing-data-engineering-team-in-scale-up.md` | `scaling-data-engineering-teams-self-service-platforms.md` | +| `s10e04-lessons-learned-about-data-&-ai-at-enterprises.md` | `scaling-enterprise-ai-mlops-data-first-strategy.md` | +| `s13e05-se4ml-software-engineering-for-machine-learning.md` | `software-engineering-for-machine-learning.md` | +| `s05e04-introducing-data-science-in-startups.md` | `solopreneur-data-scientist.md` | +| `s06e01-solopreneur.md` | `solopreneur-developer-and-data-professional.md` | +| `s11e09-teaching-and-mentoring-in-data-analytics.md` | `teaching-mentoring-data-analytics-fintech.md` | +| `s12e04-doing-software-engineering-in-academia.md` | `teaching-reproducible-research-and-open-science-coding-practices-for-academia.md` | +| `s02e01-writing.md` | `technical-writing-for-data-scientists.md` | +| `s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md` | `theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md` | +| `s20e03-trends-in-data-engineering.md` | `trends-in-modern-data-engineering.md` | +| `s19e01-using-data-to-create-liveable-cities.md` | `urban-data-science.md` | +| `s08e01-visualising-machine-learning.md` | `visualizing-machine-learning-concepts-to-explain-ml.md` | diff --git a/scripts/podcasts.txt b/scripts/podcasts.txt deleted file mode 100644 index a5b8f663..00000000 --- a/scripts/podcasts.txt +++ /dev/null @@ -1,21 +0,0 @@ -_podcast/s03e04-interviewing-300-data-scientists.md -_podcast/s03e07-market-yourself.md -_podcast/s04e08-freelancing.md -_podcast/s05e02-data-engineering-acronyms.md -_podcast/s06e01-solopreneur.md -_podcast/s06e02-non-technical-interviews.md -_podcast/s07e05-machine-learning-system-design-interview.md -_podcast/s07e08-from-data-science-to-data-engineering.md -_podcast/s08e02-hacking-your-data-career.md -_podcast/s08e06-recruiting-data-engineers.md -_podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md -_podcast/s09e04-freelancing-and-consulting-with-data-engineering.md -_podcast/s10e02-decoding-data-science-job-descriptions.md -_podcast/s12e05-indie-hacking.md -_podcast/s13e04-starting-consultancy-in-data-space.md -_podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md -_podcast/s15e02-investing-in-open-source-data-tools.md -_podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md -_podcast/s16e09-become-data-freelancer.md -_podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md -_podcast/s20e09-taking-your-freelance-career-to-next-level.md \ No newline at end of file diff --git a/scripts/podcasts2.txt b/scripts/podcasts2.txt deleted file mode 100644 index b88d209b..00000000 --- a/scripts/podcasts2.txt +++ /dev/null @@ -1,189 +0,0 @@ -https://datatalks.club/podcast/s01e01-roles.html -https://datatalks.club/podcast/s01e02-processes.html -https://datatalks.club/podcast/s01e03-building-ds-team.html -https://datatalks.club/podcast/s01e04-standing-out-as-a-data-scientist.html -https://datatalks.club/podcast/s01e05-mentoring.html -https://datatalks.club/podcast/s02e01-writing.html -https://datatalks.club/podcast/s02e02-developer-advocacy.html -https://datatalks.club/podcast/s02e03-open-source.html -https://datatalks.club/podcast/s02e04-mlops.html -https://datatalks.club/podcast/s02e05-feature-stores.html -https://datatalks.club/podcast/s02e06-decision-optimization.html -https://datatalks.club/podcast/s02e07-abc-data-science.html -https://datatalks.club/podcast/s02e08-personal-branding.html -https://datatalks.club/podcast/s02e09-roles-skills-monetizing-ml.html -https://datatalks.club/podcast/s02e10-public-speaking.html -https://datatalks.club/podcast/s02e11-dataops.html -https://datatalks.club/podcast/s02e12-communities.html -https://datatalks.club/podcast/s03e01-from-pm-to-ds.html -https://datatalks.club/podcast/s03e02-from-analytics-to-data-science.html -https://datatalks.club/podcast/s03e03-data-observability.html -https://datatalks.club/podcast/s03e04-effective-communication-with-business.html -https://datatalks.club/podcast/s03e04-interviewing-300-data-scientists.html -https://datatalks.club/podcast/s03e06-from-physics-to-machine-learning.html -https://datatalks.club/podcast/s03e07-market-yourself.html -https://datatalks.club/podcast/s03e08-data-led-professional.html -https://datatalks.club/podcast/s03e09-what-data-scientists-dont-mention.html -https://datatalks.club/podcast/s03e10-data-governance.html -https://datatalks.club/podcast/s03e11-analytics-engineer.html -https://datatalks.club/podcast/s04e01-from-swe-to-ml.html -https://datatalks.club/podcast/s04e02-build-your-own-data-pipeline.html -https://datatalks.club/podcast/s04e03-big-data-engineer-vs-data-scientist.html -https://datatalks.club/podcast/s04e04-ml-startup.html -https://datatalks.club/podcast/s04e05-running-from-complexity.html -https://datatalks.club/podcast/s04e06-humans-in-the-loop.html -https://datatalks.club/podcast/s04e07-launching-a-startup.html -https://datatalks.club/podcast/s04e08-freelancing.html -https://datatalks.club/podcast/s04e09-chief-data-officer.html -https://datatalks.club/podcast/s05e01-mastering-algorithms-and-data-structures.html -https://datatalks.club/podcast/s05e02-data-engineering-acronyms.html -https://datatalks.club/podcast/s05e03-metrics-and-kpis.html -https://datatalks.club/podcast/s05e04-introducing-data-science-in-startups.html -https://datatalks.club/podcast/s05e05-researchers-vs-engineers.html -https://datatalks.club/podcast/s05e06-building-and-leading-data-teams.html -https://datatalks.club/podcast/s05e07-ml-vs-analytics.html -https://datatalks.club/podcast/s05e08-the-last-mile-in-data.html -https://datatalks.club/podcast/s05e09-business-acumen.html -https://datatalks.club/podcast/s06e01-solopreneur.html -https://datatalks.club/podcast/s06e02-non-technical-interviews.html -https://datatalks.club/podcast/s06e03-manager-vs-expert.html -https://datatalks.club/podcast/s06e04-becoming-a-data-product-manager.html -https://datatalks.club/podcast/s06e05-post-doctoral-research.html -https://datatalks.club/podcast/s06e06-from-academia-to-industry.html -https://datatalks.club/podcast/s06e07-product-management-for-machine-learning.html -https://datatalks.club/podcast/s06e08-nlp-teams.html -https://datatalks.club/podcast/s06e09-data-science-manager.html -https://datatalks.club/podcast/s07e01-datatalksclub-behind-the-scenes.html -https://datatalks.club/podcast/s07e02-recruiting-data-professionals.html -https://datatalks.club/podcast/s07e03-product-management-essentials.html -https://datatalks.club/podcast/s07e04-career-coaching.html -https://datatalks.club/podcast/s07e05-machine-learning-system-design-interview.html -https://datatalks.club/podcast/s07e06-ab-testing.html -https://datatalks.club/podcast/s07e07-becoming-a-data-engineering-manager.html -https://datatalks.club/podcast/s07e08-from-data-science-to-data-engineering.html -https://datatalks.club/podcast/s07e09-from-math-teacher-to-analytics-engineer.html -https://datatalks.club/podcast/s08e01-visualising-machine-learning.html -https://datatalks.club/podcast/s08e02-hacking-your-data-career.html -https://datatalks.club/podcast/s08e03-innovation-and-design-for-machine-learning.html -https://datatalks.club/podcast/s08e04-machine-learning-and-personalization-in-healthcare.html -https://datatalks.club/podcast/s08e05-storytime-for-dataops.html -https://datatalks.club/podcast/s08e06-recruiting-data-engineers.html -https://datatalks.club/podcast/s08e07-from-roasting-coffee-to-backend-development.html -https://datatalks.club/podcast/s08e08-teaching-data-engineers.html -https://datatalks.club/podcast/s08e09-from-academia-to-data-analytics-and-engineering.html -https://datatalks.club/podcast/s09e01-machine-learning-in-marketing.html -https://datatalks.club/podcast/s09e02-using-data-for-asteroid-mining.html -https://datatalks.club/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).html -https://datatalks.club/podcast/s09e04-freelancing-and-consulting-with-data-engineering.html -https://datatalks.club/podcast/s09e05-data-scientists-at-work.html -https://datatalks.club/podcast/s09e06-developer-advocacy-engineer-for-open-source.html -https://datatalks.club/podcast/s09e07-designing-data-science-organization.html -https://datatalks.club/podcast/s09e08-from-open-source-maintainer-to-founder.html -https://datatalks.club/podcast/s09e09-hiring-data-science-talent.html -https://datatalks.club/podcast/s10e01-data-science-for-social-impact.html -https://datatalks.club/podcast/s10e02-decoding-data-science-job-descriptions.html -https://datatalks.club/podcast/s10e03-mlops-architect.html -https://datatalks.club/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.html -https://datatalks.club/podcast/s10e05-growing-data-engineering-team-in-scale-up.html -https://datatalks.club/podcast/s10e06-data-mesh-101.html -https://datatalks.club/podcast/s10e07-dataset-creation-and-curation.html -https://datatalks.club/podcast/s10e08-leading-data-research.html -https://datatalks.club/podcast/s10e09-responsible-and-explainable-ai.html -https://datatalks.club/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.html -https://datatalks.club/podcast/s11e02-data-science-career-development.html -https://datatalks.club/podcast/s11e03-from-data-science-to-dataops.html -https://datatalks.club/podcast/s11e04-large-scale-entity-resolution.html -https://datatalks.club/podcast/s11e05-building-data-science-practice.html -https://datatalks.club/podcast/s11e06-product-owners-in-data-science.html -https://datatalks.club/podcast/s11e07-from-digital-marketing-to-analytics-engineering.html -https://datatalks.club/podcast/s11e08-technical-writing-and-data-journalism.html -https://datatalks.club/podcast/s11e09-teaching-and-mentoring-in-data-analytics.html -https://datatalks.club/podcast/s12e01-from-software-engineer-to-data-science-manager.html -https://datatalks.club/podcast/s12e02-business-skills-for-data-professionals.html -https://datatalks.club/podcast/s12e03-data-centric-ai.html -https://datatalks.club/podcast/s12e04-doing-software-engineering-in-academia.html -https://datatalks.club/podcast/s12e05-indie-hacking.html -https://datatalks.club/podcast/s12e06-preparing-for-data-science-interview.html -https://datatalks.club/podcast/s12e07-navigating-career-changes-in-machine-learning.html -https://datatalks.club/podcast/s12e09-staff-ai-engineer.html -https://datatalks.club/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.html -https://datatalks.club/podcast/s13e02-analytics-for-better-world.html -https://datatalks.club/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.html -https://datatalks.club/podcast/s13e04-starting-consultancy-in-data-space.html -https://datatalks.club/podcast/s13e05-se4ml-software-engineering-for-machine-learning.html -https://datatalks.club/podcast/s13e06-secret-sauce-of-data-science-management.html -https://datatalks.club/podcast/s13e07-mastering-self-learning-in-machine-learning.html -https://datatalks.club/podcast/s13e08-navigating-industrial-data-challenges.html -https://datatalks.club/podcast/s13e09-building-open-source-nlp-tool.html -https://datatalks.club/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.html -https://datatalks.club/podcast/s14e02-practical-data-privacy.html -https://datatalks.club/podcast/s14e03-data-strategy-key-principles-and-best-practices.html -https://datatalks.club/podcast/s14e04-data-access-management.html -https://datatalks.club/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.html -https://datatalks.club/podcast/s14e06-data-developer-relations.html -https://datatalks.club/podcast/s14e07-from-mlops-to-dataops.html -https://datatalks.club/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.html -https://datatalks.club/podcast/s14e09-interpretable-ai-and-ml.html -https://datatalks.club/podcast/s15e01-why-machine-learning-design-broken.html -https://datatalks.club/podcast/s15e02-investing-in-open-source-data-tools.html -https://datatalks.club/podcast/s15e03-llms-for-everyone.html -https://datatalks.club/podcast/s15e04-good-bad-and-ugly-of-gpt.html -https://datatalks.club/podcast/s15e05-mastering-data-engineering-as-remote-worker.html -https://datatalks.club/podcast/s15e06-democratizing-causality.html -https://datatalks.club/podcast/s15e07-pragmatic-and-standardized-mlops.html -https://datatalks.club/podcast/s15e08-from-data-manager-to-data-architect.html -https://datatalks.club/podcast/s15e09-data-engineering-for-fraud-prevention.html -https://datatalks.club/podcast/s16e01-datatalks-club-anniversary-interview.html -https://datatalks.club/podcast/s16e02-bridging-data-science-and-healthcare.html -https://datatalks.club/podcast/s16e03-collaborative-data-science-in-business.html -https://datatalks.club/podcast/s16e04-from-marketing-to-product-owner-in-search.html -https://datatalks.club/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.html -https://datatalks.club/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.html -https://datatalks.club/podcast/s16e07-cracking-code-machine-learning-made-understandable.html -https://datatalks.club/podcast/s16e08-ai-for-digital-health.html -https://datatalks.club/podcast/s16e09-become-data-freelancer.html -https://datatalks.club/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.html -https://datatalks.club/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.html -https://datatalks.club/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.html -https://datatalks.club/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.html -https://datatalks.club/podcast/s17e05-machine-learning-engineering-in-finance.html -https://datatalks.club/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.html -https://datatalks.club/podcast/s17e07-make-impact-through-volunteering-open-source-work.html -https://datatalks.club/podcast/s17e08-building-machine-learning-products.html -https://datatalks.club/podcast/s17e09-building-production-search-systems.html -https://datatalks.club/podcast/s18e01-inclusive-data-leadership-coaching.html -https://datatalks.club/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.html -https://datatalks.club/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.html -https://datatalks.club/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.html -https://datatalks.club/podcast/s18e05-community-building-and-teaching-in-ai-tech.html -https://datatalks.club/podcast/s18e07-building-domestic-risk-assessment-tool.html -https://datatalks.club/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.html -https://datatalks.club/podcast/s19e01-using-data-to-create-liveable-cities.html -https://datatalks.club/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.html -https://datatalks.club/podcast/s19e03-datatalks-club-anniversary-podcast.html -https://datatalks.club/podcast/s19e04-mlops-as-team.html -https://datatalks.club/podcast/s19e05-large-hadron-collider-and-mentorship.html -https://datatalks.club/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.html -https://datatalks.club/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.html -https://datatalks.club/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.html -https://datatalks.club/podcast/s19e09-linguistics-and-fairness.html -https://datatalks.club/podcast/s20e01-trends-in-ai-infrastructure.html -https://datatalks.club/podcast/s20e02-competitive-machine-learning-and-teaching.html -https://datatalks.club/podcast/s20e03-trends-in-data-engineering.html -https://datatalks.club/podcast/s20e04-mlops-in-corporations-and-startups.html -https://datatalks.club/podcast/s20e05-data-intensive-ai.html -https://datatalks.club/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.html -https://datatalks.club/podcast/s20e07-build-strong-career-in-data.html -https://datatalks.club/podcast/s20e08-from-hackathons-to-developer-advocacy.html -https://datatalks.club/podcast/s20e09-taking-your-freelance-career-to-next-level.html -https://datatalks.club/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.html -https://datatalks.club/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.html -https://datatalks.club/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.html -https://datatalks.club/podcast/s21e05-from-astronomy-to-applied-ml.html -https://datatalks.club/podcast/s21e07-lessons-from-two-decades-of-ai.html -https://datatalks.club/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.html -https://datatalks.club/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.html -https://datatalks.club/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.html -https://datatalks.club/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.html -https://datatalks.club/podcast/s22e03-from-biotechnology-to-bioinformatics-software.html -https://datatalks.club/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.html diff --git a/scripts/rename_podcast_images.py b/scripts/rename_podcast_images.py new file mode 100755 index 00000000..b6dcd580 --- /dev/null +++ b/scripts/rename_podcast_images.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +""" +Script to rename podcast images according to updated podcast filenames. + +This script: +1. Reads the podcast rename mapping from scripts/podcast-rename-mapping.md +2. Renames image files in images/podcast/ directory +3. Updates image references in podcast markdown files +""" + +import re +import sys +from pathlib import Path +from typing import Dict, Tuple + + +def parse_mapping_file(mapping_file: Path) -> Dict[str, str]: + """ + Parse the markdown mapping file and extract old -> new name mappings. + + Returns a dictionary mapping old names (without .md) to new names (without .md). + Note: Some new names may contain .md in the middle (e.g., "name.md.md" becomes "name.md"). + """ + mappings = {} + + if not mapping_file.exists(): + print(f"Error: Mapping file not found: {mapping_file}", file=sys.stderr) + sys.exit(1) + + with open(mapping_file, 'r', encoding='utf-8') as f: + for line in f: + # Match markdown table rows: | `old-name.md` | `new-name.md` | + # The regex captures everything before the final .md + match = re.match(r'^\|\s*`([^`]+)\.md`\s*\|\s*`([^`]+)\.md`\s*\|', line) + if match: + old_name = match.group(1) + new_name = match.group(2) # This may contain .md in the middle + mappings[old_name] = new_name + + return mappings + + +def rename_image_file(old_path: Path, new_path: Path, dry_run: bool = False) -> bool: + """Rename an image file.""" + if not old_path.exists(): + print(f"Warning: Image file not found: {old_path}") + return False + + if new_path.exists(): + print(f"Warning: Target image file already exists: {new_path}") + return False + + if dry_run: + print(f"Would rename: {old_path} -> {new_path}") + return True + + try: + old_path.rename(new_path) + print(f"Renamed: {old_path.name} -> {new_path.name}") + return True + except Exception as e: + print(f"Error renaming {old_path}: {e}", file=sys.stderr) + return False + + +def update_podcast_file_references( + podcast_file: Path, + old_image_name: str, + new_image_name: str, + dry_run: bool = False +) -> bool: + """Update image references in a podcast markdown file.""" + if not podcast_file.exists(): + return False + + try: + with open(podcast_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Pattern to match image references like: + # image: images/podcast/old-name.jpg + old_pattern = f'images/podcast/{old_image_name}.jpg' + new_pattern = f'images/podcast/{new_image_name}.jpg' + + if old_pattern in content: + new_content = content.replace(old_pattern, new_pattern) + + if dry_run: + print(f"Would update image reference in: {podcast_file.name}") + print(f" {old_pattern} -> {new_pattern}") + else: + with open(podcast_file, 'w', encoding='utf-8') as f: + f.write(new_content) + print(f"Updated image reference in: {podcast_file.name}") + return True + + return False + except Exception as e: + print(f"Error updating {podcast_file}: {e}", file=sys.stderr) + return False + + +def main(): + """Main function to rename podcast images.""" + import argparse + + parser = argparse.ArgumentParser( + description='Rename podcast images according to updated podcast filenames' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Show what would be done without making changes' + ) + parser.add_argument( + '--mapping-file', + type=Path, + default=Path(__file__).parent / 'podcast-rename-mapping.md', + help='Path to the mapping file (default: scripts/podcast-rename-mapping.md)' + ) + parser.add_argument( + '--images-dir', + type=Path, + default=Path(__file__).parent.parent / 'images' / 'podcast', + help='Path to podcast images directory (default: images/podcast)' + ) + parser.add_argument( + '--podcast-dir', + type=Path, + default=Path(__file__).parent.parent / '_podcast', + help='Path to podcast markdown files directory (default: _podcast)' + ) + + args = parser.parse_args() + + # Get project root + project_root = Path(__file__).parent.parent + + # Parse mappings + print(f"Reading mappings from: {args.mapping_file}") + mappings = parse_mapping_file(args.mapping_file) + print(f"Found {len(mappings)} mappings\n") + + if not mappings: + print("No mappings found. Exiting.") + sys.exit(1) + + # Track statistics + images_renamed = 0 + images_not_found = 0 + references_updated = 0 + + # Process each mapping + for old_name, new_name in sorted(mappings.items()): + print(f"\nProcessing: {old_name} -> {new_name}") + + # Rename image file + old_image_path = args.images_dir / f"{old_name}.jpg" + new_image_path = args.images_dir / f"{new_name}.jpg" + + if rename_image_file(old_image_path, new_image_path, args.dry_run): + images_renamed += 1 + else: + images_not_found += 1 + + # Update references in podcast markdown file + podcast_file = args.podcast_dir / f"{new_name}.md" + if update_podcast_file_references(podcast_file, old_name, new_name, args.dry_run): + references_updated += 1 + + # Print summary + print("\n" + "=" * 60) + print("Summary:") + print(f" Images renamed: {images_renamed}") + print(f" Images not found: {images_not_found}") + print(f" References updated: {references_updated}") + if args.dry_run: + print("\n (Dry run - no files were actually changed)") + print("=" * 60) + + +if __name__ == '__main__': + main() + diff --git a/scripts/timestamps.txt b/scripts/timestamps.txt deleted file mode 100644 index 057ed756..00000000 --- a/scripts/timestamps.txt +++ /dev/null @@ -1,20 +0,0 @@ -00:00:00 Episode Introduction -00:01:59 Career Journey: Academia to Industry -00:06:13 Consulting Focus: ML in Production & Organizational Advice -00:07:53 Transition to Freelancing: First Clients & Momentum -00:10:08 Client Acquisition: Network, Word-of-Mouth & Personal Branding -00:15:28 Networking Tactics: Coffee Chats, LinkedIn & Lunchclub -00:19:09 Intro Calls & Pre-sales: Building Trust Before Billing -00:21:37 Problem Discovery: Needs vs. Prescribed Solutions -00:22:18 Proposal Best Practices: Written Summaries & Scope Alignment -00:23:52 Pricing Strategies: Hourly, Fixed-Price, and Value-Based Trade-offs -00:31:52 Financial Planning: Vacation, Risk Buffer, and Expected Income -00:33:58 Workload Management: Capacity, Calendars, and Burnout Prevention -00:36:11 Specialization & Productizing Consulting for Predictability -00:41:19 Freelance Trade-offs: Freedom, Overhead, and Side Projects -00:45:15 Scaling Paths: Building an Agency, a Product, or Returning to Employment -00:48:27 Deliverables: Mentoring, Workshops, Prototypes, and Team Outcomes -00:53:30 Admin & Insurance: German Registration, Taxes, and Liability Coverage -00:58:59 Global Market Dynamics: Remote Work, Competition, and Differentiation -01:01:02 Starter Advice: Try Freelancing with a Safety Net -01:03:12 Wrap-up & Contact: Where to Find Mikio From c257f178578cb43177cf1206a020ea1e51ba11bd Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Tue, 18 Nov 2025 17:08:39 +0100 Subject: [PATCH 6/9] Correct sorting --- Gemfile.lock | 4 +- _config.yml | 1 + _layouts/podcast.html | 165 +- ...lding-data-products-lead-data-scientist.md | 1396 ++++++++++++++++- ...om-marketing-to-product-owner-in-search.md | 1095 ++++++++++++- podcast.md | 11 +- 6 files changed, 2578 insertions(+), 94 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 3fae0114..10ee5728 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -36,6 +36,8 @@ GEM logger faraday-net_http (3.4.1) net-http (>= 0.5.0) + faraday-retry (2.3.2) + faraday (~> 2.0) ffi (1.17.2) forwardable-extended (2.6.0) gemoji (4.1.0) @@ -272,8 +274,8 @@ PLATFORMS x64-mingw32 DEPENDENCIES + faraday-retry github-pages - jekyll jekyll-feed (~> 0.12) jekyll-theme-cayman tzinfo (~> 1.2) diff --git a/_config.yml b/_config.yml index 619f8849..48b538b6 100644 --- a/_config.yml +++ b/_config.yml @@ -19,6 +19,7 @@ exclude: - Pipfile.lock - Makefile - .gitignore + - sh-thd-* collections: diff --git a/_layouts/podcast.html b/_layouts/podcast.html index be24e64e..2ee2fc90 100644 --- a/_layouts/podcast.html +++ b/_layouts/podcast.html @@ -338,10 +338,6 @@

Listen to or watch on your favorite platform

Spotify
{% endif %} - - Anchor icon -
Anchor
-
@@ -368,7 +364,60 @@

Show Notes

{{ content }}
+ + +
+

Timestamps

+ {% if page.transcript %} +

Click any timestamp to jump to that moment in the video

+
+
    + {% assign found_header = false %} + {% assign header_text = "" %} + {% for line in page.transcript %} + {% if line.header %} + {% assign found_header = true %} + {% assign header_text = line.header %} + {% elsif found_header and line.sec %} +
  • + + {{ line.time }} + {{ header_text }} + +
  • + {% assign found_header = false %} + {% endif %} + {% endfor %} +
+
+ {% else %} +

Timestamps coming soon...

+ {% endif %} +
+ + {% if page.transcript %} +
+

Transcript

+
+

+ The transcripts are edited for clarity, sometimes with AI. + If you notice any incorrect information, + let us know. +

+ + {% for line in page.transcript %} + {% if line.header %} +

{{ line.header }}

+ {% else %} +

{{ line.who }}: {{ line.line }}{% if line.sec %} ({{ line.time }}){% endif %}

+ {% endif %} + {% endfor %} +
+
+ {% endif %} + + diff --git a/_podcast/building-data-products-lead-data-scientist.md b/_podcast/building-data-products-lead-data-scientist.md index 18bb96ec..61590c7f 100644 --- a/_podcast/building-data-products-lead-data-scientist.md +++ b/_podcast/building-data-products-lead-data-scientist.md @@ -1,6 +1,1398 @@ --- -description: Discover actionable podcast strategy and growth tactics to boost audience, - optimize episodes, and convert listeners into customers with measurable results. +title: "Building Data Products at Scale: Intake, A/B Testing, and MLOps in a Marketing Organization" +short: Collaborative Data Science in Business +season: 16 +episode: 3 +guests: +- ioannismesionis +image: images/podcast/s16e03-collaborative-data-science-in-business.jpg +ids: + anchor: atatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c + youtube: 1pExOVuCF8Q +links: + anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c + apple: https://podcasts.apple.com/us/podcast/collaborative-data-science-in-business-ioannis-mesionis/id1541710331?i=1000632860980 + spotify: https://open.spotify.com/episode/46DN6rAlufvvXaqdOomoTe?si=OMPDN8m5QZWsc5kJY8IcAA + youtube: https://www.youtube.com/watch?v=1pExOVuCF8Q + +description: Discover MLOps tactics to prioritize data products, run A/B testing and enable model monitoring for faster validation, reliable rollouts and stakeholder buy-in +intro: How do you prioritize data product work, validate models in production, and keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a practical data product operating model for tackling those challenges.

Drawing on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains a four‑phase funnel with a "single front door" intake, a Definition of Done template with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR feasibility. He breaks down when to treat work as analytics vs. research, how R&D sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies for production rollout as MLOps capabilities evolve. Technical tooling and monitoring get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection — plus pragmatic dashboarding and alerting patterns. Listeners will come away with actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder engagement, and the estimation and cadence practices that make ML teams productive +dateadded: 2023-10-29 + +duration: PT01H14S + +quotableClips: +- name: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) + startOffset: 100 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=100 + endOffset: 154 +- name: Career origin & early projects (mathematics degree, master's, internship model) + startOffset: 154 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=154 + endOffset: 443 +- name: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' + startOffset: 443 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=443 + endOffset: 512 +- name: 'Stakeholder collaboration: weekly embedded meetings and observation' + startOffset: 512 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=512 + endOffset: 675 +- name: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' + startOffset: 675 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=675 + endOffset: 840 +- name: 'Operating model for data products: four-phase funnel and accountability' + startOffset: 840 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=840 + endOffset: 923 +- name: 'Project intake & prioritization: "single front door" and cross-functional + kickoff' + startOffset: 923 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=923 + endOffset: 1057 +- name: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' + startOffset: 1057 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1057 + endOffset: 1254 +- name: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' + startOffset: 1254 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1254 + endOffset: 1272 +- name: 'Data science vs analytics: choosing technical approach and leads' + startOffset: 1272 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1272 + endOffset: 1368 +- name: 'Research & development: modeling work, sprint planning and Kanban usage' + startOffset: 1368 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1368 + endOffset: 1517 +- name: 'Pilot & A/B testing: validating models against baseline KPIs and feedback + loops' + startOffset: 1517 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1517 + endOffset: 1645 +- name: 'Production rollout: spectrum of production and evolving MLOps capabilities' + startOffset: 1645 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1645 + endOffset: 1698 +- name: 'Organizational structure: domain-focused lead data scientists (scheduling, + ops, pricing)' + startOffset: 1698 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1698 + endOffset: 1821 +- name: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' + startOffset: 1821 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1821 + endOffset: 2138 +- name: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' + startOffset: 2138 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2138 + endOffset: 2297 +- name: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' + startOffset: 2297 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2297 + endOffset: 2449 +- name: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' + startOffset: 2449 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2449 + endOffset: 2493 +- name: 'Communicating technical results: simplifying concepts for non‑technical audiences' + startOffset: 2493 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2493 + endOffset: 2710 +- name: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a helper' + startOffset: 2710 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2710 + endOffset: 2918 +- name: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' + startOffset: 2918 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2918 + endOffset: 2950 +- name: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' + startOffset: 2950 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2950 + endOffset: 3213 +- name: 'Model monitoring with Evidently: drift detection and integration plans' + startOffset: 3213 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3213 + endOffset: 3311 +- name: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' + startOffset: 3311 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3311 + endOffset: 3429 +- name: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' + startOffset: 3429 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3429 + endOffset: 3660 +- name: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' + startOffset: 3660 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3660 + endOffset: 3614 + +transcript: +- header: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) +- line: This week, we'll talk about collaborative data science in business. We have + a special guest today, Ioannis. Ioannis is a lead data scientist at EasyJet, if + you’ve heard about this airline – I certainly have because I used it a couple + of times. In his role, he works on creating data products and solving business + problems. He also leads the EasyJet MLOps team. Ioannis is also one of the graduates + of our MLOps Zoomcamp. I was quite surprised that he actually took it – with his + experience, he should have been one of the instructors. [Ioannis chuckles] But + I'm pretty happy that you, Ioannis, did take the course because otherwise, we + wouldn't be talking now otherwise. Welcome! + sec: 100 + time: '1:40' + who: Alexey +- line: Yeah. Thanks for having me and for the introduction. It's been a pleasure. + sec: 150 + time: '2:30' + who: Ioannis +- header: Career origin & early projects (mathematics degree, master's, internship + model) +- line: Before we go into our main topic of business and data science, let's start + with your background. Can you tell us about your career journey so far? + sec: 154 + time: '2:34' + who: Alexey +- line: Yeah, absolutely. Education-wise, I have a bachelor’s in mathematics and a + postgraduate in data science from Essex University. It's been fun because I wasn't + always planning to become a data scientist. Essentially, I'm Greek and this is + important, because in Greece, usually when you have a bachelor’s in mathematics, + there are not many things that you can do with this degree. You either become + a teacher – which is, although exciting, wasn't something that I wanted to pursue + – or you find a way to mix it with some other things. After I finished my Bachelor’s, + I was thinking about financial mathematics or actuarial mathematics. I didn't + know what to do. + sec: 165 + time: '2:45' + who: Ioannis +- line: Luckily, I got introduced to the notion of data science by watching Netflix + – actually, the famous Sherlock Series. There was a moment when Sherlock and John + Watson were on-screen, and John Watson was impressed by Sherlock’s decision-making + skills. I remember he asked him, “How do you make decisions that fast and so accurately?” + And Sherlock replied, “You see, but you do not observe.” So that was John's problem. + That really sat well with me, and I was thinking, “I want to improve my decision-making + skills.” And this is how I started Googling around “decision-making, inference” + and all this kind of stuff. I came across data science as a profession. That was + back in 2016, I think. So yeah, I did a master’s in data science from Essex University, + followed by a three-month internship, where I was able to develop a machine learning + model to predict children who are being abused in their current environment. That + was great because it showed me the power that lies behind data science and machine + learning in general. I knew that this was what I wanted to do. + sec: 165 + time: '2:45' + who: Ioannis +- line: After the internship, I had a four-month experience working as a data scientist + consultant at a company named AKKA Technologies in Geneva, Switzerland. After + four months, I decided to move back to the UK, where I started working as a data + scientist for EasyJet, where I'm still working. I started as a graduate data scientist, + got promoted to senior data scientist, and right now, I'm still a lead data scientist, + working with business stakeholders and trying to transform Easy to become the + world's most data-driven headline. Yeah, that's pretty much me. + sec: 165 + time: '2:45' + who: Ioannis +- line: Do you get a discount at EasyJet if you want to go somewhere? + sec: 321 + time: '5:21' + who: Alexey +- line: '[chuckles] I think that''s one of the best perks that we have. [chuckles] + Yeah, the truth is that we do and it''s an excellent discount. I use it all the + time to travel to different European cities. It''s been great.' + sec: 325 + time: '5:25' + who: Ioannis +- line: Because EasyJet is… when it comes to Berlin, I don't know about the other + cities and I'm based in Berlin – it's one of the airlines I usually use when I + want to go somewhere. + sec: 340 + time: '5:40' + who: Alexey +- line: I'm happy to hear that we're doing something good, then. [chuckles] + sec: 353 + time: '5:53' + who: Ioannis +- line: Well, in terms of coverage, it's probably one of the best ones – at least + going to Italy or some other countries. Funny that you… [cross-talk] It’s funny + that you mentioned the Sherlock TV show. Have you seen…? There is another different + TV show (an American one) called Numbers. Have you seen that one? + sec: 356 + time: '5:56' + who: Alexey +- line: Oh, that's interesting. Not really. But noted. + sec: 381 + time: '6:21' + who: Ioannis +- line: It's about a mathematician who uses his skills to solve crimes. They use statistics + and data science. Well, I wouldn't call it “data science” in the sense that you + and I mean it. But still, it's quite close. + sec: 385 + time: '6:25' + who: Alexey +- line: I'm always excited to hear about these use cases where data science is being + used for good, like the project that you just mentioned – to solve crimes or the + internship that I did. I think it's great to show how data science can serve the + people or not be present to replace people’s jobs are some of the things that + you hear from time to time. + sec: 408 + time: '6:48' + who: Ioannis +- line: Yeah, so it's called Numbers. And I think the E is spelled with a 3. So, it's + like Numb3rs. + sec: 431 + time: '7:11' + who: Alexey +- line: I think it rings a bell. + sec: 440 + time: '7:20' + who: Ioannis +- header: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' +- line: Yeah. Anyways, what do you do as a lead data scientist? + sec: 443 + time: '7:23' + who: Alexey +- line: Currently, my role as a lead data scientist is a partnership with the business + stakeholders from Digital Customer and Marketing. These are the departments that + I oversee from the data science and analytics perspective. I try to understand + their pain points and translate them into data products and data solutions that + go into production and solve whatever problem we encounter at the time. You can + think of my role as having accountability for the projects to ensure that they + reach production and, of course, we meet the financial benefits that have been + agreed upon at the beginning of every financial year. + sec: 448 + time: '7:28' + who: Ioannis +- line: In practice, what do you mean when you say that you “partner with business + stakeholders from Digital Marketing”? What does it look like in practice? Is it + you proactively reaching out to them saying, “Hey, can we talk?” Or do they reach + out to you? Or is it a combination of both? What does this collaboration look + like in your case? + sec: 489 + time: '8:09' + who: Alexey +- header: 'Stakeholder collaboration: weekly embedded meetings and observation' +- line: It's a great question. Usually, one of the things that I love about EasyJet + is that it's a really friendly environment. You can think of it as me having a + close collaboration in terms of meetings, sitting with them during the business + days, and trying to understand what decisions they have to make on a daily basis + and then trying to understand, from their perspective, what their strategies are + and what their vision is for their department, and understand how data science + can support reaching their vision. This is how it looks on a day-to-day basis + – meetings and meetups, etc. + sec: 512 + time: '8:32' + who: Ioannis +- line: So they have their usual day-to-day meetings, and you’re like, “Hey, can I + join you? I just want to observe what you do.” + sec: 552 + time: '9:12' + who: Alexey +- line: Kind of, yes. We have a recurring meeting where we discuss what they're doing, + brainstorm together to have – let's call it a framework, where we discuss their + day-to-day job and what they're trying to improve and see how I can support them + with data science. + sec: 561 + time: '9:21' + who: Ioannis +- line: So you have a monthly meeting or something like that? + sec: 582 + time: '9:42' + who: Alexey +- line: Even more frequent – weekly, actually. + sec: 585 + time: '9:45' + who: Ioannis +- line: Weekly, okay. [Ioannis chuckles] There are some leaders from these departments, + and you talk to them saying, “Hey, what’s up? What are the current problems you + have? How's it going with the previous projects we implemented for you?” And things + like that. Right? + sec: 589 + time: '9:49' + who: Alexey +- line: Absolutely. The way I frame it is – I think of the heads of the different + departments, from Digital Customer and Marketing as being my best friends in the + working environment and try to understand how I can be supportive and how I can + help them. + sec: 609 + time: '10:09' + who: Ioannis +- line: So how can you be supportive? + sec: 624 + time: '10:24' + who: Alexey +- line: '[chuckles] Exactly!' + sec: 626 + time: '10:26' + who: Ioannis +- line: What does it look like? + sec: 629 + time: '10:29' + who: Alexey +- line: Usually, it involves me getting enough business knowledge. If we talk about + the Digital [department], it involves me understanding how, let's say, how the + PPC advertisements work or how the SEO organic results work, and trying to understand + what their aim is – which metrics they're interested in and what they do on a + day-to-day basis. Then I see, “You know what? If we had a predictive model that + could do X, Y, and Z, would that benefit you?” And then we have this kind of discussion + that would essentially create some clarity on the business problem that we will + then try to tackle. + sec: 633 + time: '10:33' + who: Ioannis +- header: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' +- line: I’ve heard the term “digital department” [from you] many times but to be honest, + I have no idea what it actually means. It probably means different things at different + companies, right? [Ioannis agrees] because different companies need to do different + things. In your case, you mentioned PPC advertisement – I don't know what PPC + is – Pay Per Click, right? + sec: 675 + time: '11:15' + who: Alexey +- line: Exactly. Pay Per Click. + sec: 697 + time: '11:37' + who: Ioannis +- line: So the digital department is also some marketing stuff, right? + sec: 699 + time: '11:39' + who: Alexey +- line: Exactly. Pay per click, if you think about it, these are the sponsor ads that + you see on Google. If you go on Google, and you type “flights from London Gatwick + to Berlin,” let's say, and you press “enter,” you see the 10 results that appear + on the first page of Google. What you can see there first are usually the sponsored + ads. These are the pay-per-click ads, as they’re known. The reason they're called + “pay per click” is because there is an incurred cost every time a person clicks + on that specific ad. We're trying to, in a way, optimize sponsored ads that appear + on top. And we do the same thing for SEO results – we tag the organic URLs that + appear which are usually below the sponsor ads. In a way, it’s an optimization + that we're trying to do, so that the flights that we want to promote always appear + on top and then, hence we can improve the conversion rate. + sec: 703 + time: '11:43' + who: Ioannis +- line: The other day, I was checking the cost per click in Google for keywords like + “MLOps,” or “MLOps courses”. [Ioannis chuckles] Sometimes, for more niche words, + it's like three euros per click, and then for more broad ones, it's like four + or five, which was like, “Wow, is it that expensive?” + sec: 767 + time: '12:47' + who: Alexey +- line: Yeah, yeah. [chuckles] You have to bid on the right keywords, and then become + relevant and all this kind of stuff that is happening in Google behind the curtains. + sec: 794 + time: '13:14' + who: Ioannis +- line: For you, as a lead data scientist, you need to figure out what these people + talk about, like, “What does PPC mean?” “What do people care about?” “What is + optimization?” And then, with this knowledge that you can extract from them (learn + from them) you then go and share this knowledge with the data science team and + you say, “Okay, these are the problems that these departments are struggling with. + Let's think about how we can help them.” Right? [Ioannis agrees] And then you + translate the problems into the language of data science and then, together with + the team, you work on solving this. Right? + sec: 802 + time: '13:22' + who: Alexey +- line: Exactly. Yeah, absolutely. + sec: 838 + time: '13:58' + who: Ioannis +- header: 'Operating model for data products: four-phase funnel and accountability' +- line: In addition to communicating with stakeholders, I think you mentioned other + things – you make sure that projects reach production. What does that mean for + you? Okay, you first talked with the stakeholders, you understood that these are + the pain points they have – what happens next? What do you do next as the lead + data scientist? + sec: 840 + time: '14:00' + who: Alexey +- line: As soon as I have the problem statement defined, we have an operating model + within EasyJet that really helps us to understand, first of all, what the different + steps are that we have to take to ensure that this resolution of the problem will + reach production, and then we make sure that we adhere to all these different + steps. There's a sequence that we follow. As a lead data scientist, I am accountable + for ensuring that all of these processes are being followed. We make sure that + when the data product reaches production, it will have the impact that was expected. + And yeah, that's pretty much it in terms of my role. I can talk a little bit more + about the framework if you want me to. + sec: 863 + time: '14:23' + who: Ioannis +- line: That’s quite interesting. What are these steps and what is this operating + model? + sec: 917 + time: '15:17' + who: Alexey +- header: 'Project intake & prioritization: "single front door" and cross-functional + kickoff' +- line: Yes, the operating model that we have, I think is one of the best things that + we have created in EasyJet. I had a speech about that at the MLOps Summit. The + operating model consists of different stages – I think it's four phases, if you + will, that highlight all the different steps that we need to take to ensure that + the model will reach production. The first thing is to get clarity on the problem + statement, and this is pretty much my role. We like to call this a “single front + door,” where we take a business problem or an idea into the funnel. + sec: 923 + time: '15:23' + who: Ioannis +- line: As soon as we do this, we have a meeting where all the relevant stakeholders + come together and discuss the idea a little bit more. In attendance, you would + expect people such as the business analysts and the finance team to understand + the financial benefits that might be involved with the project, a lead data scientist, + data engineers – every single person that needs to be involved in that specific + project. As soon as we do that and we understand, “You know what? There's a real + possibility of something good in this project,” we can take this on. We prioritize + based on different ideas that have been submitted over time. And then we create + something like a priority, “You know what? This problem is the most crucial one, + so let's try to work on that first.” + sec: 923 + time: '15:23' + who: Ioannis +- line: As soon as we pick up a project, we will create the so-called “Definition + of ‘Done,’” which is at the business understanding phase, where we try to understand + a little bit more about the requirements that we need pick to make this project + a success, which business KPIs we need to influence, improve, or increase or decrease, + and how we can measure the benefits. For the latter, it means, let's say, I give + you random numbers as an outcome, how do you know whether these random numbers + are good or not? So we make sure that we create a document (the Definition of + Done document) that highlights, “This is the data product. This is what production + looks like. These are the benefits that are going to come about based on this + calculation methodology.” + sec: 923 + time: '15:23' + who: Ioannis +- header: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' +- line: A large document? + sec: 1057 + time: '17:37' + who: Alexey +- line: Not that large. Usually it's a single document – we have a template. You can + think about two to three pages, tops. + sec: 1060 + time: '17:40' + who: Ioannis +- line: Two or three, okay. + sec: 1069 + time: '17:49' + who: Alexey +- line: Yeah. It's not that bad, I think. It outlines on a high level what things + we need to make sure to deliver at the end of the day so that we don't have really + much of a moving target, if you will. + sec: 1071 + time: '17:51' + who: Ioannis +- line: I assume you have some sort of a template, right? A Google Document or maybe + a Confluence page, and then you just copy this page and fill in all the things. + sec: 1085 + time: '18:05' + who: Alexey +- line: Fill in the information. Absolutely. + sec: 1096 + time: '18:16' + who: Ioannis +- line: And you do this? + sec: 1098 + time: '18:18' + who: Alexey +- line: Not me, at this stage. I oversee the entire procedure, but usually, we would + have a business analyst having workshops with the business stakeholders who are + going to be the business accountable for the project. We try to capture every + single requirement in this Definition of Done document. + sec: 1100 + time: '18:20' + who: Ioannis +- line: Here, you don't talk about machine learning yet? It’s more about, “Okay, this + is the project and this is the impact that we expect this project to achieve. + This is how we measure this impact.” Things like that, right? You don't talk about + machine learning at all at this stage. Right? + sec: 1119 + time: '18:39' + who: Alexey +- line: Nothing at all. It just captures the definition of “done”. It captures just + the “what” of the product, not the “how”. + sec: 1142 + time: '19:02' + who: Ioannis +- line: There’s no discussion of the solution at all, right? + sec: 1151 + time: '19:11' + who: Alexey +- line: Nothing whatsoever. + sec: 1159 + time: '19:19' + who: Ioannis +- line: Okay. + sec: 1163 + time: '19:23' + who: Alexey +- line: Because at the end of the day, we may have a document and we may realize down + the line that it's not something feasible. We may know what we need to do, but + after we have established all the requirements, we may realize, “You know what, + the data is not actually there, which means that this is a no-go.” When that happens, + although it doesn't happen frequently, this is a “fail fast” scenario. Then we + say, “You know what, we cannot proceed with that. Let's take the second in line.” + sec: 1162 + time: '19:22' + who: Ioannis +- line: But this happens later, right? [Ioannis agrees] At the business understanding + step you come up with this Definition of Done document for a project, which is + like two or three pages long, and then I guess you proceed to the next step, which + is, as you mentioned, checking data and things like that. + sec: 1188 + time: '19:48' + who: Alexey +- line: Exactly. As soon as everybody has signed off on this document – the business + stakeholders, data scientist (which is me, in this case) , the data engineer, + and every single person involved – then we proceed to the next phase. This is + where the data science-y involvement starts to kick in – inception. You can think + of it as the EDA (exploratory data analysis) where we try to ensure that we have + everything that we need. That includes access to the data, if the data is already + present, any GDPR concerns that we might encounter, exploring the data sources + as in different distributions and these kinds of constraints that we might have. + Yeah, that's pretty much it. + sec: 1203 + time: '20:03' + who: Ioannis +- header: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' +- line: At which stage do you actually…? You said that this is when data science kicks + in. Is this the stage when you think, “Do I even need machine learning here or + is it more like an analytical project?” + sec: 1254 + time: '20:54' + who: Alexey +- line: Absolutely. + sec: 1269 + time: '21:09' + who: Ioannis +- line: Okay. + sec: 1271 + time: '21:11' + who: Alexey +- header: 'Data science vs analytics: choosing technical approach and leads' +- line: As soon as we kick off the inception phase, this is where the data scientists + and analysts come together, and we brainstorm about the solution – we discuss + the “how”. At this point, we understand whether this is a data science project + that would involve machine learning or data analytics, or whether it's a hybrid + between the two different sub-teams (data science and analytics). + sec: 1272 + time: '21:12' + who: Ioannis +- line: To be honest, we do have some idea, when the business stakeholders discuss + the problem, and we may have already decided at this point that this is a data + science project or a data analytics one. But at the inception phase, we’re absolutely + certain that, “You know what? This is 100% a data science project,” for instance. + It’s just the confirmation that we have of when we started. + sec: 1272 + time: '21:12' + who: Ioannis +- line: And depending on whether it is a data science project or not, I guess the + next step would be different, right? + sec: 1329 + time: '22:09' + who: Alexey +- line: Absolutely, yeah. + sec: 1335 + time: '22:15' + who: Ioannis +- line: Then if it’s not a data science project, you say, “Okay, I'm a data scientist, + I cannot help you,” and then somebody else takes this over, right? + sec: 1338 + time: '22:18' + who: Alexey +- line: Not really. I’m accountable for both the data science and analytics projects. + The only difference is that if it's an analytics project, the technical lead who + will work on the project is going to be a data analyst instead of a data scientist. + I still hold the accountability for making sure that the product is delivered + end-to-end. + sec: 1347 + time: '22:27' + who: Ioannis +- header: 'Research & development: modeling work, sprint planning and Kanban usage' +- line: So what's the next step? Or is it different for different projects? + sec: 1368 + time: '22:48' + who: Alexey +- line: Not really. As soon as you have an idea and you have defined the “how” of + solving the problem statement, this is where we move into the research and development + phase. These are the hardcore modeling steps in data science, where we follow + all the different design methodologies – sprint planning, stand-ups, retrospective + – all the usual suspects are usually there, where we discuss all the different + stories that we have defined in a Kanban board, for instance. We define sprints, + “This is the goal for sprint one, sprint two.” This is where we start building + whatever that solution might look like. We also make sure that the stakeholders + are closely working with us because you have to make sure that… It's a common + problem that we're trying to tackle so you want to make sure that the business + stakeholders are part of the team and they're not just sitting around waiting + for a delivery in three to six months’ time, depending on the complexity. So we + make sure that we tackle that as a single team. + sec: 1375 + time: '22:55' + who: Ioannis +- line: So that's why you have regular (at least weekly) meetings with them, right? + You want to keep them updated on, “What is the progress? What is being solved + right now? What stage are each of the projects?” Things like that? + sec: 1448 + time: '24:08' + who: Alexey +- line: Absolutely. Also, at the end of every sprint, which is usually bi-weekly, + we have a demo where we show, “These are the things that we have delivered.” And, + if possible, we have an actual demo where they can get a sense of what we're building + and influence some of the steps that we might take on the future sprint. They + oversee the project from the beginning all the way to the end so they make sure + that what gets delivered at the end of the day is something that they will end + up using. + sec: 1462 + time: '24:22' + who: Ioannis +- line: So I guess you also give them some sort of demo – a Streamlit App or something + like this – that they can play around with so they see, “Okay, this is not what + I meant.” Or “Yeah, this is what I need. + sec: 1499 + time: '24:59' + who: Alexey +- line: Absolutely, yeah. + sec: 1513 + time: '25:13' + who: Ioannis +- header: 'Pilot & A/B testing: validating models against baseline KPIs and feedback + loops' +- line: After the R&D phase, is there anything else? + sec: 1517 + time: '25:17' + who: Alexey +- line: Yes. Then we have the pilot phase. In the Definition of Done, we have already + defined the KPIs and the baseline that we're trying to beat. Usually, there's + an existing “as-is” process that we're trying to beat with a new solution. Then + we move into the pilot phase, which usually looks like A/B testing, where we test + the “as-is” process compared to the “to be” process and ensure that the product + that we have built improves the KPI of interest. + sec: 1522 + time: '25:22' + who: Ioannis +- line: During that time, we also collect feedback from the business stakeholders + because that can influence a second iteration of the product if needed. After + the creation of the model, usually, it's the pilot phase, to ensure that we get + the benefits that we were expecting. If that succeeds, then, I guess, it's deployment. + sec: 1522 + time: '25:22' + who: Ioannis +- line: I’m just trying to come up with a joke about the “pilot phase”. [Ioannis and + Alexey laugh] I’m not creative enough. [chuckles] + sec: 1575 + time: '26:15' + who: Alexey +- line: '[laughs] I know what you mean.' + sec: 1584 + time: '26:24' + who: Ioannis +- line: 'So okay – the steps are (the phases are): first, it''s the business understanding + phase, when we come up with this Definition of Done for a project. Then it’s the + inception phase, where people actually… In the first step, you talk about the + “what” and not the “how” but in the second step, you discuss their actual solution + and you also decide if it''s a data science project or more like an analytical + project. Then, during the R&D phase, you work on the development – the research + and development of the project. Then you also talked about how exactly you do + this – all these agile techniques. At the end, there is the pilot phase, where + you take what you developed and you see if the KPIs you defined in the Definition + of Done are actually met. Right?' + sec: 1586 + time: '26:26' + who: Alexey +- line: Absolutely. Yeah, that's correct. + sec: 1642 + time: '27:22' + who: Ioannis +- header: 'Production rollout: spectrum of production and evolving MLOps capabilities' +- line: So those are the four steps that you mentioned. Is there a fifth one after + the pilot? Like, the production part? + sec: 1645 + time: '27:25' + who: Alexey +- line: It's usually the production. As you probably already know, “production” is + a spectrum. Production might mean surfacing some insights into a Tableau dashboard, + for instance. It can be some predictions being surfaced into an external tool. + That can be all sorts of different things. Depending on what this means, we have + the appropriate, let's say, production framework, which is still being developed + at the moment. Of course, MLOps is certainly still at the beginning. But yeah, + after we see that the benefits are already there and we beat the baseline, we + roll this out to the entire market, depending on the project, of course. + sec: 1652 + time: '27:32' + who: Ioannis +- header: 'Organizational structure: domain-focused lead data scientists (scheduling, + ops, pricing)' +- line: The use cases you deal with are mostly related to marketing and similar cases + – all these campaigns. + sec: 1698 + time: '28:18' + who: Alexey +- line: Нes. Mostly Digital and Marketing. + sec: 1707 + time: '28:27' + who: Ioannis +- line: So you don't try to work with the actual planes and the schedules? + sec: 1710 + time: '28:30' + who: Alexey +- line: Not myself. But that's an excellent question because, as a data scientist, + I look after Digital Customer and Marketing, but actually we have two or three + more lead data scientists, where every single one looks after a different division + of the business. So we have a lead data scientist who looks after Scheduling and + Network, and another lead data scientist who looks after the Ops when needed, + and, of course, Pricing and Revenue. + sec: 1719 + time: '28:39' + who: Ioannis +- line: I noticed that tickets became more expensive after COVID. [Ioannis laughs] + sec: 1752 + time: '29:12' + who: Alexey +- line: I have no idea about this. [laughs] No comments. + sec: 1757 + time: '29:17' + who: Ioannis +- line: Well, you have a discount, right? [chuckles] + sec: 1761 + time: '29:21' + who: Alexey +- line: Yeah. [chuckles] + sec: 1764 + time: '29:24' + who: Ioannis +- line: I remember that a trip to Italy, before COVID, cost… Sometimes it was actually + more expensive to get the bus that goes from the airport to the city than the + actual ticket. These days are gone. Now it's more expensive to travel. + sec: 1765 + time: '29:25' + who: Alexey +- line: Yeah, I guess inflation as well. Yep. + sec: 1786 + time: '29:46' + who: Ioannis +- line: I was always wondering how companies like RyanAir can keep their costs that + low – when it's like 10 euros for a ticket. But they probably cannot anymore because + now it's different. + sec: 1791 + time: '29:51' + who: Alexey +- line: Exactly. I think it's because of the different business models that different + airlines operate under. There's a specific mindset that allows, let's say, RyanAir + to operate with tickets that have an X price compared to EasyJet or Wizz Air – + different competitors, of course. + sec: 1801 + time: '30:01' + who: Ioannis +- header: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' +- line: You already talked a little bit about Agile methodologies that you use during + the R&D phase and I was wondering if maybe you can talk more about this? How do + you structure your day-to-day work when it comes to working on data science projects? + In my experience, I remember… It was some time ago, and we tried Scrum. Maybe + I'll take a step back. My background was originally a Java developer, and Scrum + works well for well-defined developed software engineering projects. + sec: 1821 + time: '30:21' + who: Alexey +- line: But when it comes to data science, it's a little bit more ambiguous, because + you don't know whether what you will have at the end (the thing you build) will + work or not. In software engineering, it's usually less nondeterministic, let's + say. Usually, you know that you will eventually build the thing that solves the + problem, you just don't always know how long it will take. + sec: 1821 + time: '30:21' + who: Alexey +- line: When it comes to data science, you not only don't know how long it will take, + but you also don't know whether it will actually work in the end. [Ioannis agrees] + How do you structure your processes around this problem? You mentioned agile sprint + planning and Kanban – so I'm curious to know in more detail how exactly you structure + the work. + sec: 1821 + time: '30:21' + who: Alexey +- line: Yes, absolutely. Of course, I was working as a technical lead (as a senior + data scientist) which means that, now, as a lead data scientist, I don't schedule + all the agile ceremonies. But as a technical lead, when I was a senior, I did + have that experience. What I was following was all the different agile methodologies + that have been introduced – I was making sure to stick with them. What you said + about being ambiguous is actually true. Because in data science, you don't really + know what you're building until you go and actually build it. This is when you + realize whether it works or not. + sec: 1911 + time: '31:51' + who: Ioannis +- line: So what we try to do to make the process a little bit simpler – to ensure + that it's working – is we have the notion of MVPs (minimum viable products) which + means that, in the Definition of Done document, we have the list of all the requirements + that we know we have to build, which means that we kind of already have a sense + of what we're building and which direction that we'll be taking. And because we + know what we're building, it's a bit easier to estimate the time that it might + take for us to deliver a single requirement or a single feature. That doesn't + mean that we're always following Scrum – personally, I'm an advocate of Kanban, + because of the complexities that have to do with data science and machine learning. + But usually, we’re pretty good at estimating whether a specific feature is going + to take, let's say, a week and a half. Even though we may not strictly follow + the Scrum methodology, we actually have a Kanban board, and we try to put some + timelines into our schedule to ensure that, “You know what? We'll have something + built by the end of this two-week sprint.” + sec: 1911 + time: '31:51' + who: Ioannis +- line: Of course, we do this with all the different agile ceremonies that we mentioned + – we have sprint planning, which ensures that we have the different complexities + allocated to the different stories. Of course, there are many ways to do that. + At the end of the day, we do have some sense of how long something is going to + take because of the notion of MVP, and we try to stick to these two-week sprints. + sec: 1911 + time: '31:51' + who: Ioannis +- line: So you group all your work into these two-week sprints and at the beginning + of each sprint, you do some sort of planning where you decide, “Okay, for these + two weeks (for this sprint) we take this, this, and this. It will take probably + the entire two weeks to do.” Right? And then during the week… + sec: 2064 + time: '34:24' + who: Alexey +- line: Exactly, depending on the resources. + sec: 2091 + time: '34:51' + who: Ioannis +- line: The resources are the people who work on this, right? + sec: 2094 + time: '34:54' + who: Alexey +- line: Yeah. Something to add here, which also helps us estimate the different stores + and how much they're going to take, also comes at the inception phase. At the + inception phase, we dive into the data and try to understand a little bit about + the quality of the data, how much preprocessing we might have to do, or how much + time a specific implementation might take depending on the complexity of the project. + The inception phase also gives us an understanding of how much time this specific + implementation is going to take. That helps us estimate the timing a bit. + sec: 2100 + time: '35:00' + who: Ioannis +- header: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' +- line: Can you maybe walk us through the entire sprint? So, the sprint starts with + planning and I think it ends with a demo – what happens in between? + sec: 2138 + time: '35:38' + who: Alexey +- line: Yes. In between, we have daily stand-ups. Of course, it can be a written stand-up, + or an actual 15-minute stand-up, usually in the morning, where the entire team + comes together and we say, “I've been working on this story. This is the progress + I’ve made so far. This is the plan that I'm going to work on today (or for the + next couple of days). These are the blockers (if any) that I'm encountering at + the moment.” Usually, when this happens, you have a senior member jump in to support + – we make sure that all the blockers are removed so we can deliver the project + or the feature on time. + sec: 2147 + time: '35:47' + who: Ioannis +- line: Of course, depending on the complexity of the project, that can be an everyday + stand-up or every other day – it really depends. But I think what works the best, + according to my experience, is having two stand-ups per week so that it gives + time for the people to work on the different stories. And, of course, if something + goes wrong, you can always reach out to a teammate to ask for support. That's + pretty much it in terms of stand-up. And of course… [cross-talk] + sec: 2147 + time: '35:47' + who: Ioannis +- line: It’s not a very heavy process, right? What I understood is that you have this + estimate – the start of the sprint where you estimate. Then you have some stand-up + meetings during the week. Then, at the end, you have the demo. Right? That's basically + the process. So it's not very heavy. [Ioannis agrees] Because I know in Scrum, + there are all sorts of other things like grooming. I don't even remember what + else, but I remember that the backlog grooming can get quite heavy if you follow + the book and try to implement everything. + sec: 2222 + time: '37:02' + who: Alexey +- line: That's true. But I think the notion of Agile is actually being agile and seeing + what works for your team and what doesn't. We have tried with different meetings, + according to what has been proposed over time. But we have identified that this + framework that we have works great for our team and we follow this specific framework. + One of the things that Ben Diaz, who is the Director of the Data Science and Analytics + team, says is, “We have to be agile at being agile.” I think that summarizes everything. + [chuckles] + sec: 2262 + time: '37:42' + who: Ioannis +- header: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' +- line: What does estimating look like for you? Do you use something like PlanningPoker + or things like that? + sec: 2297 + time: '38:17' + who: Alexey +- line: It depends. Different teams use different techniques. We have T-shirt sizing, + sometimes we follow the Fibonacci sequence to allocate points. We also have Scrum + masters who support us in that way. We make sure that we don't use days as a way + of estimation. So, whatever has worked for the different team members over time, + it's usually the technical leader of the project who decides which method they + want to use. + sec: 2306 + time: '38:26' + who: Ioannis +- line: Yeah, interesting. So you do some sort of planning poker, right? Or? + sec: 2337 + time: '38:57' + who: Alexey +- line: Yeah, yeah. + sec: 2344 + time: '39:04' + who: Ioannis +- line: And what does it look like? I imagine that there's a meeting, and in this + meeting, you have different people –you, a scrum master, project lead, data scientists + can implement this, and then somebody (for example, you, as the project lead) + says, “Now, let's talk about this task (this story) that we are going to take + in this sprint, which is about changing the color or changing the chart on this + dashboard (or whatever).” Right? + sec: 2346 + time: '39:06' + who: Alexey +- line: Yeah, whatever that may be. + sec: 2377 + time: '39:37' + who: Ioannis +- line: Everyone says, “Okay, I think this is a very easy task.” Right? + sec: 2379 + time: '39:39' + who: Alexey +- line: Exactly, that you put that number on top. Depending on which one you think + is the most complex, you put the corresponding numbers. Yeah, this is pretty much + it. Every single team member… Of course, there are always outliers, but usually, + you have all the different stories and you say, “Okay, which one do we think is + the most complex one?” This gets allocated with that specific number, and then + we increase the complexity depending on the methodology that we use. + sec: 2384 + time: '39:44' + who: Ioannis +- line: Yeah, interesting. In your experience, does it work well? + sec: 2410 + time: '40:10' + who: Alexey +- line: I think so. There have been examples where it has worked out perfectly and, + of course, there are always [chuckles] the bad examples where you can see that + you're quite tough when it comes to timelines. But I think the bottom line is + that you have to adjust and be mindful of the fact that not everything is expected + to go well on every single project. As soon as you manage your expectations, I + think you're good. + sec: 2415 + time: '40:15' + who: Ioannis +- header: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' +- line: When it comes to business stakeholders, I assume you don't invite them to + your stand-ups, but you probably invite them to demos, right? + sec: 2449 + time: '40:49' + who: Alexey +- line: Yes, that's correct. I think that's a great way for the business stakeholders + to get a sense of what we're building because they can get an early interaction + with the tool and the direction that we're taking. They also feel like a part + of the team and that makes them more engaged in what we're building and quickly + sense that we're a team and we're trying to tackle this problem together instead + of us acting like consultants, “This is what we're building for you. Just use + it.” + sec: 2461 + time: '41:01' + who: Ioannis +- header: 'Communicating technical results: simplifying concepts for non‑technical + audiences' +- line: I also imagine that the business stakeholders – it could be the Head of Marketing + or Head of Digital, or some other Head – don't necessarily know what every C-curve + means or precision-recall and things like that. [Ioannis agrees] When it comes + to demos that are maybe a little bit more technical, they sit there and are just + like, “Okay, I don't understand this, but I trust that you’re doing your work.” + How do you deal with this – when stakeholders do not necessarily understand what + the team is talking about? Or do you maybe educate the stakeholders, educate the + team, or both? What helps? + sec: 2493 + time: '41:33' + who: Alexey +- line: I think, in cases like that, you really have to be a chameleon and this is + where soft skills come into place. When we have a demo session at the end of every + sprint, we have to make sure that we never use technical language with them, because + you have to adjust your context for a non-technical audience. I don't think there's + been a single project where we have thrown some technical jargon, if you will, + at all. + sec: 2535 + time: '42:15' + who: Ioannis +- line: You educate the team members. You can say, “Look, if you say ‘ROC curve,’ + they will be like, ‘Okay, what is that?’” So you teach them how they can present + findings, the projects, and the demos, in a way that stakeholders will understand. + sec: 2572 + time: '42:52' + who: Alexey +- line: Exactly. We never use any technical language with them. And if there's something + that you need to explain that might require some technical knowledge, we always + make sure that we use examples that can be easily interpretable compared to a + technical implementation that you have seen. For instance, if you think about + recommender systems and you want to understand how a specific person is closely + related to another, you wouldn’t say, “As a measure of understanding how close + two individuals are, we use the Keegan distance.” + sec: 2594 + time: '43:14' + who: Ioannis +- line: Instead, you put forward two examples where you say, “You see that these two + people look similar?” And you don't really need to define similar in this context, + because they can see that all the different roles, for instance, look the same, + compared to another individual that is completely on a different cluster. So when + you want to explain these kinds of technical details, you can always use an example + that would make sense for a non-technical audience. + sec: 2594 + time: '43:14' + who: Ioannis +- line: Well, I assume that this is also a skill – presenting your findings in a way + that non-technical people can understand. [Ioannis agrees] It can be even more + difficult to learn this skill, to master this skill – let's say, even more difficult + than learning machine learning, at least for technical people. + sec: 2659 + time: '44:19' + who: Alexey +- line: Potentially, yes. [laughs] + sec: 2679 + time: '44:39' + who: Ioannis +- line: People who are used to terminals and notebooks and all this stuff – going + in and presenting something to business stakeholders might not be something that + they're used to doing. So how do you educate people? How do you help them learn + this skill or master this skill? + sec: 2680 + time: '44:40' + who: Alexey +- header: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a + helper' +- line: I don't think there's an easy way. I think this comes with experience and + just making sure that you always enhance your soft skills. One of the things that + usually helps is thinking about all the different inner sentences that people + usually say, “Pitch it to me like I'm a five-year-old.” Or I think Einstein had + said, “If you can’t explain something in simple terms, you don't know it that + well.” So, I guess it's just a matter of reminding people that the people that + we have on the other side of the call don't have the technical experience that + you have, so try to speak their language and explain what you're doing like you're + speaking to a five-year-old. I guess there's no easy way to do this, it just comes + with experience and constant feedback, of course. + sec: 2710 + time: '45:10' + who: Ioannis +- line: And I guess having a five-year-old helps. [chuckles] + sec: 2764 + time: '46:04' + who: Alexey +- line: Yeah. [laughs] I can only imagine. + sec: 2775 + time: '46:15' + who: Ioannis +- line: Maybe if you don't have a kid who's five years old, you have no idea how much + knowledge they actually have. [Ioannis agrees, chuckles] I have a son. He's seven + years old. He sometimes asks me things like how GPS works. And I have no idea. + Let's say if I go on the internet and type, “How does GPS work?” then the explanation + would be super technical. Then I think, “Okay, how do you explain this to my son?” + So it's a skill. Well, one hack I found quite useful is just asking ChatGPT. I + guess everyone uses this now. + sec: 2777 + time: '46:17' + who: Alexey +- line: Oh, yeah, of course. Absolutely. I still remember the days when ChatGPT wasn't + out – I remember, I was a graduate data scientist at the time. I got the opportunity + to present something to business stakeholders. I think this is when he found out, + not in a nice way, that my ways of presenting and soft skills are not as good + [as I thought]. I remember there was a really cringe moment where I was trying + to explain why having 99% accuracy as a wider term doesn't mean anything unless + you know about the balance with the labels. Yeah, I think it didn't go well. I + think this pushed me a little bit to try to understand how I can present to someone + who doesn't have technical expertise. I think it comes with experience at the + end of the day. + sec: 2818 + time: '46:58' + who: Ioannis +- line: Actually, we can think of ourselves as five-year-old kids too, when it comes + to learning new things. For example, when I read this article about how GPS works, + I'm clueless. Okay, there are a bunch of us that try to explain it, but I don't + really understand what's happening there. So the explanation that ChatGPT gave + to my son was actually helpful for me to also understand that. I don't know if + I should say that, but maybe we can think of stakeholders as kids. [chuckles] + sec: 2865 + time: '47:45' + who: Alexey +- line: '[laughs] Yeah, I think I know what you mean. I''m really happy that all the + stakeholders that we have at EasyJet are really literate in terms of data science + and mathematics. That makes our work really, really easy. So I''m so thankful + for that.' + sec: 2903 + time: '48:23' + who: Ioannis +- header: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' +- line: Yeah. Great. Also, I actually wanted to spend a bit of time talking about + the MLOps Zoomcamp course, because I was… + sec: 2918 + time: '48:38' + who: Alexey +- line: Yeah, of course! + sec: 2927 + time: '48:47' + who: Ioannis +- line: I was really surprised when I looked at your background – I thought, “Why + would Ioannis even consider it?” Because with your experience – you're already + doing all the things you talked about right now – I'm wondering, what inspired + you to take our course? Why did you decide to take it? + sec: 2929 + time: '48:49' + who: Alexey +- header: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' +- line: Yeah, absolutely. The thing is, as a lead data scientist, my role has become + a little bit more managerial compared to the amount of time that I have to spend + doing technical stuff. And if you ask me, having a bachelor of mathematics, I'm + a geek at heart, which means that every opportunity I get to get my hands dirty + with some data and build something myself – I always take it. MLOps specifically + is, from my experience – I'm usually involved in, let's say, building the models + and I didn't get much exposure to the productionization side of things. I was + just intrigued by the course and the content. Of course, I was using MLflow, but + then we had Prefect – the data engineering team – and we have been using airflow. + And I'm like, “Let me get into that engineering side of things a little bit more + and also get the opportunity to get my hands dirty.” I think this is what clicked + for me. And I'm like, “Yeah, let me go for it.” + sec: 2950 + time: '49:10' + who: Ioannis +- line: Well, as somebody who was a lead data scientist in the past, one problem for + me was always time. [Ioannis chuckles] With all this stakeholder management, how + do I actually find time to still be hands-on and experiment with things? [Ioannis + agrees] And then sometimes, I wanted to take a course, but then I didn't have + time, because there’s only 40 hours that you spend at work. How did you solve + this problem? + sec: 3022 + time: '50:22' + who: Alexey +- line: Yeah, that's a great question. I think one of the good things about my decision + to become a data scientist is that I genuinely love the profession. I would be + a data scientist as a hobby if my day job was something different. This means + that even when I finish my work, I don't feel drained from all the information + that I had to go through throughout the day. + sec: 3053 + time: '50:53' + who: Ioannis +- line: I genuinely enjoy working as a data scientist, which means that I consider + that as an activity rather than, let's say, something that will consume my time. + So yeah, it was just great. I had my morning cup of coffee, and during the weekends, + I took my laptop, went to a nice coffee place and just watched your courses and + tried to do the assignments. It's been fun. And I got a little experience out + of it, to be honest. So yeah, it was just great. + sec: 3053 + time: '50:53' + who: Ioannis +- line: So instead of watching Netflix, you watched the courses. + sec: 3112 + time: '51:52' + who: Alexey +- line: What was that? + sec: 3117 + time: '51:57' + who: Ioannis +- line: Instead of watching Netflix, you watched the courses. Or… Maybe in addition + to. + sec: 3118 + time: '51:58' + who: Alexey +- line: Yes! [laughs] Absolutely. + sec: 3121 + time: '52:01' + who: Ioannis +- line: Okay. Well, it sounded like the course was useful for you, right? Was it mostly + like… I don't know if I should call it that – entertainment? Or more like self-educating? + Or did you also get something out of this course and apply it at work? + sec: 3124 + time: '52:04' + who: Alexey +- line: It was a little bit of both. It was entertainment in the sense that I got + confirmation that what I'm doing is correct. But also, I got the opportunity to + play with technologies that I otherwise wouldn't have time to. One of the examples + is Prefect, for instance. Because as a lead data scientist, I’m not that involved + in the engineering side of things, so I wouldn’t get the opportunity to play with + Airflow or Prefect. So I think it had a good balance of both – getting the confirmation + that what I'm doing is correct, but also learning something new. This is really + important because as you mentioned in the beginning, I'm leading the MLOps team + within EasyJet. Even though I give the guidance and have an influence on where + we're going as a data science and analytics team with our MLOps journey, it was + great for me to understand a little bit about the technical landscape. I feel + that that's the best way to influence a specific direction. So that really worked + well. + sec: 3144 + time: '52:24' + who: Ioannis +- header: 'Model monitoring with Evidently: drift detection and integration plans' +- line: Actually, before our conversation (before our interview) I had a chat with + Elena from Evidently and she said, “Oh, Ioannis is coming to your podcast? Make + sure to ask about Evidently!” [chuckles] + sec: 3213 + time: '53:33' + who: Alexey +- line: Absolutely. Evidently, I think – and I'm not afraid to say this, but I think + Evidently is the best Python library out there for model monitoring. This is something… + the final assignment that I did for the MLOps Zoomcamp also gave me the opportunity + to play with the Evidently library a little bit more. I had the time to play with + Evidently, I think, two years ago, when it was still, in a way, the dev version. + I remember the first time that I reached out to them, because I said, “You know + what? I have implemented that and it doesn't look correct.” There was actually + a bug and this is how the networking kicked in. But yeah, Evidently – absolutely + the best Python library for model monitoring. + sec: 3228 + time: '53:48' + who: Ioannis +- line: Do you use it at EasyJet as well? + sec: 3280 + time: '54:40' + who: Alexey +- line: Absolutely. We will use it to their sense of embedding that within our MLOps + framework. It's still a work in progress but we have made tremendous progress + throughout all these years. I think, especially now that we're trying to define + our MLOps capabilities, Evidently is the best thing that could have happened to + me and to EasyJet to that extent. + sec: 3283 + time: '54:43' + who: Ioannis +- header: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' +- line: Just curious – I know Evidently, right now, has its own dashboard, but what + you do is probably based on some sort of other monitoring framework, like Grafana + or something like that, right? + sec: 3311 + time: '55:11' + who: Alexey +- line: Yeah, I mean, right now we're thinking about using the Tableau dashboard and + I have a proof of concept that I'm about to present to the EasyJet MLOps team. + But before that, because I had already implemented a proof of concept, we weren't + using Grafana – we didn't have the UI. To be honest, I had implemented a custom + function that would trigger an email alert to the technical lead of the project + in case there was data drift or model drift detected. It was, I think, two to + three years ago. + sec: 3325 + time: '55:25' + who: Ioannis +- line: You mentioned Tableau, and it's interesting how versatile this tool is. [Ioannis + chuckles and agrees] It's not just a dashboard, you can even build simple, rudimentary + monitoring in Tableau. I remember we had problems with data quality and then our + analyst quickly came up with a dashboard that shows how many records there are + each day in the important tables. Then, what he did next was configure Tableau + to send an alert if the number for one of the days was less than expected. He + did that in like 30 minutes or something. That was amazing. + sec: 3361 + time: '56:01' + who: Alexey +- line: Okay. That's great. It indeed sounds amazing. Goodness. + sec: 3404 + time: '56:44' + who: Ioannis +- line: I mean, at the end, it's just a bunch of SQL queries and then knowing where + to put these queries and which button to click to create an alert, he knew how + to do this. Not everyone knows that. But it was a quick and dirty solution that + worked pretty well. It's amazing. + sec: 3407 + time: '56:47' + who: Alexey +- line: Yeah, that's good. It's always exciting when someone delivers something that + fancy in such a short period of time. + sec: 3423 + time: '57:03' + who: Ioannis +- header: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' +- line: Yeah, I think we should be finishing soon. So maybe I'll ask you one thing. + We talked a lot about communicating with business stakeholders, we also talked + about Agile processes. We talked a little bit about MLOps. Are there any good + resources that you can recommend to our listeners who want to learn more about + these topics? + sec: 3429 + time: '57:09' + who: Alexey +- line: About which topic specifically? + sec: 3456 + time: '57:36' + who: Ioannis +- line: Well, about any of those that we discussed – let's say, about processes, about + communicating with business stakeholders? When you were learning how to do your + job well, maybe you came across some books or courses that helped you. + sec: 3459 + time: '57:39' + who: Alexey +- line: There is a single resource that I would recommend to every single aspiring + data scientist/data analyst to watch out for. I'm not sure if you know Cassie + Kozyrkov – she’s the Decision Intelligence Advocate for Google, at least she used + to be – she resigned. But Cassie Kozyrkov and her course on YouTube, Making Friends + with Machine Learning, I think, is the best resource out there, in order to understand + how you can communicate technical details to a non-technical audience. I think + the way she speaks and expresses these kinds of technical details in such a nice + and direct way, is one of the best skills that someone can get. And I think, watching + her YouTube videos helped me to really understand “What would be the best way + to explain a technical term to someone that is not familiar with my world and + data science in general?” + sec: 3478 + time: '57:58' + who: Ioannis +- line: I spent, I think, countless hours watching her videos, trying to analyze the + way that she approaches things, terms, or explains how linear regression works. + So if you want, Cassie Kozyrkov from Decision Intelligence from Google – her YouTube + videos, Making Friends with Machine Learning. At least this is how to communicate + to a non-technical audience. When it comes to technical details, I think different + books like, Pattern Recognition from Gibson is one of the best books that you + can go with. It's really heavy, so you have to make sure that you're comfortable + with mathematics. + sec: 3478 + time: '57:58' + who: Ioannis +- line: In many senses – because I remember we used this book for my machine learning + classes and it was heavy for the class too. [chuckles] + sec: 3584 + time: '59:44' + who: Alexey +- line: It was heavy, indeed. But I'm telling you, if you spend time and you actually + focus – let's say you have a two-hour block of time and you go through that, it's + one of the best things that you read to understand the mathematics behind machine + learning and how it really works. Of course, LinkedIn helps a lot with different + posts and resources that are being recommended. I think on a day-to-day basis, + LinkedIn is my go-to resource website. + sec: 3594 + time: '59:54' + who: Ioannis +- line: Cassie… I think this is how I know her – from LinkedIn. I don't know if she's + active anymore, but she used to be quite active on LinkedIn and this is where + I went to see her content. + sec: 3622 + time: '1:00:22' + who: Alexey +- line: She is amazing, yeah – podcast, YouTube, LinkedIn, of course. I think she + was all over the place. I think now she's building something on her own. This + is why she left Google. And I'm really interested to see what this is going to + be. I know this is about decision-making and decision intelligence, which is something + she has established on her own. So yeah, I'm really looking forward to seeing + her content. + sec: 3636 + time: '1:00:36' + who: Ioannis +- header: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' +- line: Yeah. Thanks, Ioannis, for joining us today, and for sharing all that you + shared with us today. Yeah, it was amazing. Thanks for finding time. And thanks, + everyone else, too, for joining us and being active here. I think… I actually + forgot – we had only one question that I accidentally forgot to mention. Is it + okay, Ioannis, if Dave reaches out to you on LinkedIn and asks this question? + sec: 3660 + time: '1:01:00' + who: Alexey +- line: Yeah, absolutely. I'm always open. I'm super active on LinkedIn. Any question, + whatever that may be – feel free to reach out on LinkedIn and I’ll make sure to + get back to you. + sec: 3694 + time: '1:01:34' + who: Ioannis +- line: Okay, thanks. And with that, I guess we’re finished. + sec: 3707 + time: '1:01:47' + who: Alexey +- line: Amazing. Thanks for having me! + sec: 3711 + time: '1:01:51' + who: Ioannis +- line: Yeah. Thanks. Bye, everyone. + sec: 3714 + time: '1:01:54' + who: Alexey --- Links: diff --git a/_podcast/from-marketing-to-product-owner-in-search.md b/_podcast/from-marketing-to-product-owner-in-search.md index 90c6d7ef..fbf2e5ef 100644 --- a/_podcast/from-marketing-to-product-owner-in-search.md +++ b/_podcast/from-marketing-to-product-owner-in-search.md @@ -1,7 +1,1096 @@ --- -description: Learn actionable growth strategies, marketing tips, and productivity - hacks to scale faster—get frameworks, examples, and clear next steps today. +title: 'Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook' +short: From Digital Marketing to Analytics Engineering +season: 11 +episode: 7 +guests: +- nikolamaksimovic +image: images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg +ids: + anchor: From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s + youtube: GawJ7mG5ElQ +links: + anchor: https://anchor.fm/datatalksclub/episodes/From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s + apple: https://podcasts.apple.com/us/podcast/from-digital-marketing-to-analytics-engineering-nikola/id1541710331?i=1000586740912 + spotify: https://open.spotify.com/episode/5VwS6ijaToirTzR7Xd5Phw?si=OsOVLOzBSt2sIgvbRS3krg + youtube: https://www.youtube.com/watch?v=GawJ7mG5ElQ + +description: "Discover DBT, SQL & data modeling tactics for pivoting into analytics engineering: learn migration, tooling, A/B testing, and a career playbook to get hired." +intro: "How do you transition from digital marketing into analytics engineering—and master DBT, SQL, and data modeling in the process? In this episode, Nikola Maksimovic shares his complete career transformation journey, from startup marketing roles in London and Berlin to growth marketing at Ecosia, and ultimately his pandemic-driven pivot into BI and analytics engineering. Nikola reveals the step-by-step learning path that worked for him—SQL fundamentals, hands-on BI projects, strategic conversations with internal data teams—plus the essential technical skills that got him hired: advanced SQL, data pipeline understanding, and Python foundations.

You'll get an inside look at real analytics engineering work: spearheading a company-wide DBT migration, navigating data modeling decisions (wide vs narrow tables, incremental strategies), and working with modern data stacks including Snowplow, DBT, Looker/LookML, Redshift, Airflow, Airbyte, and Redash. We also explore A/B testing frameworks, product analytics implementation, and the nuanced differences between analytics engineer and data analyst roles. Nikola shares his proven transition playbook (Excel → SQL → dashboards → meaningful projects), networking tactics that opened doors, mentorship approaches, and the communities and resources that accelerated his learning.

Whether you're in marketing, operations, or any non-technical role considering a move into data, this episode provides a concrete roadmap with actionable steps, realistic timelines, and insider insights to help you successfully pivot into analytics engineering." +topics: +- data science +- analytics engineering +- career transition +- tools +dateadded: 2022-11-19 + +duration: PT00H54M34S + +quotableClips: +- name: 'Episode Overview: Switching from Marketing to Analytics Engineering' + startOffset: 0 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=0 + endOffset: 32 +- name: 'Early Career & Startup Experience: London, Berlin, Movinga' + startOffset: 32 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=32 + endOffset: 64 +- name: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' + startOffset: 64 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=64 + endOffset: 173 +- name: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' + startOffset: 173 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=173 + endOffset: 438 +- name: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' + startOffset: 438 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=438 + endOffset: 525 +- name: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' + startOffset: 525 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=525 + endOffset: 593 +- name: 'Internal Pathway: Conversations with BI Team and Required Skills' + startOffset: 593 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=593 + endOffset: 662 +- name: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' + startOffset: 662 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=662 + endOffset: 770 +- name: 'Transition Phase: Balancing Marketing Work and BI Projects' + startOffset: 770 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=770 + endOffset: 854 +- name: 'Current Responsibilities: Analytics Engineering, Product Support & A/B Testing' + startOffset: 854 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=854 + endOffset: 1114 +- name: 'Data Modeling in Practice: DBT Migration and Transformation Layers' + startOffset: 1114 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1114 + endOffset: 1234 +- name: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, + Redash' + startOffset: 1234 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1234 + endOffset: 1328 +- name: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' + startOffset: 1328 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1328 + endOffset: 1392 +- name: 'Looker & LookML Experience: Reporting and Dashboard Building' + startOffset: 1392 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 + endOffset: 1491 +- name: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' + startOffset: 1491 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 + endOffset: 1506 +- name: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational + Fit' + startOffset: 1506 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1506 + endOffset: 1720 +- name: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' + startOffset: 1720 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1720 + endOffset: 1828 +- name: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' + startOffset: 1828 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1828 + endOffset: 2026 +- name: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' + startOffset: 2026 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 + endOffset: 2130 +- name: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and Udemy + SQL' + startOffset: 2130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 + endOffset: 2307 +- name: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' + startOffset: 2307 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2307 + endOffset: 2376 +- name: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' + startOffset: 2376 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2376 + endOffset: 2510 +- name: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' + startOffset: 2510 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2510 + endOffset: 2709 +- name: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' + startOffset: 2709 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2709 + endOffset: 3023 +- name: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' + startOffset: 3023 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3023 + endOffset: 3130 +- name: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally + Optimistic)' + startOffset: 3130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 + endOffset: 3226 +- name: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' + startOffset: 3226 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 + endOffset: 3274 + +transcript: +- header: 'Episode Overview: Switching from Marketing to Analytics Engineering' +- line: This week, we'll talk about switching careers from marketing to analytics + engineering. We have a special guest today, Nikola. Nikki started her career as + a performance marketing specialist and quickly realized that she needs to rely + on data to make good decisions. That's how her data journey started and she eventually + became an analytics engineer. In this interview, we will find out how that happened. + Welcome to our event. + sec: 0 + time: 0:00 + who: Alexey +- line: Thank you very much for having me. + sec: 30 + time: 0:30 + who: Nikola +- header: 'Early Career & Startup Experience: London, Berlin, Movinga' +- line: I want to mention – this is something new – questions for this interview were + prepared by Leat Shemesh, and Victoria Perez Mola, so thanks a lot for your help + in preparing the questions. If anyone here who is listening and wants to help + us prepare for more interviews in the future, please reach out to me. Okay, let's + start. Before we go into our main topic of switching to analytics engineering, + let's start with your background. Can you tell us about your career journey so + far? + sec: 32 + time: 0:32 + who: Alexey +- header: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' +- line: Yeah, of course. I actually studied in the UK, in London, and I moved over + to Berlin soon after graduating from my Bachelor's quite spontaneously. I found + myself just in the data startup scene, like many English-speaking people do, [chuckles] + because it was pretty much the only available route. So I started out working + for Movinga, which was a big removals startup, backed by Rocket Internet. I was + working in the operations team there. I kind of had my first taste of working + at a startup there. + sec: 64 + time: '1:04' + who: Nikola +- line: Of course, as you can imagine, that was very intense – fast growth, lots of + change. It was kind of a baptism of fire for six months. After that, I found a + job at Ecosia. I was really following Ecosia really closely because I really was + inspired by the business model and the mission. For those who don't know, Ecosia + is the search engine that uses its profits to plant trees. It's essentially a + purpose company, which means that profits are basically entirely used towards + financing the tree planting project. Then I suddenly saw a job for a generalist + marketing role, which I applied for. My first role at Ecosia was actually sort + of more generic marketing. + sec: 64 + time: '1:04' + who: Nikola +- line: Was it something that you also did at Movinga? Was it something different? + sec: 166 + time: '2:46' + who: Alexey +- header: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' +- line: It was completely different. But it was a generalist kind of junior role, + where you're helping write press releases, think up campaigns, reach out to potential + partners – this kind of work. I've done quite a lot of that through university + when I've been volunteering for an organization that helps students get into volunteering. + Through that kind of work, and through more not professional work, but rather + just more organizing, political work, event planning and stuff I've done at university, + that's kind of where I had built up those organizational and marketing skills + from. That's essentially what I ended up really speaking about largely at the + interview. + sec: 173 + time: '2:53' + who: Nikola +- line: We were a really small company with 15 people when I joined. So it was really + one of those early-stage startups. We were doing whatever job needed to be done. + Sometimes it was replying to user feedback, other times it was helping test a + new app design – all sorts of things. At some point, I felt like I really wanted + to go deeper into an area and I basically started running the paid campaigns that + we started doing after I joined the company. First it was on Facebook, but later + on we expanded onto YouTube and Instagram. And I really enjoyed that. I found + it very helpful to really focus in on a specific area. + sec: 173 + time: '2:53' + who: Nikola +- line: Something I found very gratifying about performance marketing was that you + get results very quickly, so you can kind of really see what's working and what's + not working. As opposed to other areas of marketing where something like a press + campaign or brand activation, where it's not necessarily clear what impact that + might have had right away. Sometimes it's really difficult or almost impossible + to measure, which I found very frustrating. [chuckles] But with performance marketing, + you're given the data immediately and you can analyze that and make a decision + in minutes on how to move forward. I got really, really into that. + sec: 173 + time: '2:53' + who: Nikola +- line: Of course, there are so many online resources for performance marketing, and + in general. It's a relatively new discipline as well, in the grand scheme of the + history of marketing. So I was really able to dive into that by myself, largely. + I was given a lot of responsibility at the company as well, so I learned that + way. I did that for two years. At some point, I also started to kind of think, + “Okay. Well, I feel like I've kind of understood this. It maybe has its limits + in terms of what's interesting or not.” The part that I really enjoyed was looking + at the data coming in, analyzing what the click-through rates are saying, what + the conversion rates are and what that means. “How can we optimize this campaign + based on the data that we're getting? How does it compare with historical trends?” + All of this sort of work I really enjoyed. + sec: 173 + time: '2:53' + who: Nikola +- line: At the same time around this time, the company had switched to Looker from + Tableau. At the time, we only had one data person at Ecosia. I helped her with + the migration to Looker just as a side project. Since I was kind of the person + who was most comfortable with data and reporting and numbers and measuring KPIs + and whatnot in the marketing team, I took on building out the marketing team reporting. + And I really enjoyed that. + sec: 173 + time: '2:53' + who: Nikola +- line: It was your initiative, right? Nobody told you, “Hey, you should do this.” + You were just like “Okay, this sounds interesting. I really like this topic. And + I kind of learned everything that was there about performance marketing, so let + me try to also run this new tool.” + sec: 424 + time: '7:04' + who: Alexey +- header: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' +- line: Yeah, exactly. I think at this point, it wasn't really clear to me that I + wanted to necessarily move into the data team. I just wanted to maybe have more + focus on numbers and data in general, but probably still within the marketing + team. Eventually, I think the big shift that happened was – the pandemic hit. + Like a lot of people, it just forced me to consider what I was doing and whether + I was happy in my role, and I found that I really wasn't. At this point, I decided + to yet make the shift into the BI team. I think, at this point, I had already + done a SQL course some months before with a view to going down the marketing/analytics + route. But with the pandemic, I really realized that I wanted to move away from + the purely marketing focus and go towards BI. + sec: 438 + time: '7:18' + who: Nikola +- line: These SQL courses – did you have a plan that you wanted to work in the BI + team eventually? Or it was like, “Okay, let me see what I should do in order to + do my job better.”? + sec: 509 + time: '8:29' + who: Alexey +- header: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' +- line: I think I remember speaking to my colleague in the BI team, who was in the + context of being a marketing analyst person. Initially, the idea was kind of that + I'd sit between marketing and BI. But I think it's because I really didn't think + it was possible for me to move departments. That hadn't really happened in the + company before. There wasn't really an example of that to me. So I think I was + rather thinking, “Well, what's possible? What could I do?” And it was this marketing + analyst role. But yeah, I definitely took the SQL course in order to move closer + towards the data side. + sec: 525 + time: '8:45' + who: Nikola +- line: So then you realized, “Okay, maybe I'm not really super happy with the job + I'm doing in the marketing department and there is this BI team.” So did you just + approach them and ask, “Can I just join you and start working with you?” Or how + did this happen? + sec: 576 + time: '9:36' + who: Alexey +- header: 'Internal Pathway: Conversations with BI Team and Required Skills' +- line: Eventually – yes. I think eventually the conversation was already there, as + I mentioned before, around how to become more into this marketing analyst role. + Already, my colleague was giving me lots of advice. It was probably through that + process and those conversations that the possibility of me moving into the BI + team came up, to be honest. I don't remember exactly who brought it up. But what + I remember is my colleague in BI saying, “Well, these are the things that we really + need you to have. Once you have those things, there's no reason why you shouldn't + be able to join the team as a junior analyst.” + sec: 593 + time: '9:53' + who: Nikola +- line: Do you remember what these things were? SQL, I suppose, is one. + sec: 642 + time: '10:42' + who: Alexey +- line: Yes, SQL was the main thing. Then learning and understanding the data pipeline + that we had was another. + sec: 645 + time: '10:45' + who: Nikola +- line: So it wasn't a list of courses that you have to take, but rather, “Okay, these + are the things we’re working on. Try to figure out what's happening there.” + sec: 655 + time: '10:55' + who: Alexey +- header: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' +- line: Yeah, exactly. One of the things was like, “Python would be great.” I ended + up doing a Python course, but barely actually using it. It's been useful to have. + Of all the things, the most useful practically was jumping into… once you know + SQL and you can write and read SQL, you'll still need to get good at reading and + writing SQL. You start coming across much more complicated SQL queries and you're + like, “[expletive], there's like a nested loop here. Where is this coming from?” + sec: 662 + time: '11:02' + who: Nikola +- line: Then improving SQL to be able to read and understand much more complex data + models – that was a big part of the journey. It was really about understanding + what our models were, how everything fit together in the wider scheme of the pipeline, + and how it came to be. Because I had no idea even how a tracker really worked + – I just sort of knew that there was this thing called the Snowplow Tracker that + collected the data. But it was all sorts of not very detailed knowledge. So really + going in and understanding how things really work to get the data from one point + to the other and transform it. + sec: 662 + time: '11:02' + who: Nikola +- line: Did you need to keep doing your old job of marketing specialist, or could + you completely just immerse yourself in BI? Or was there some in-between period + where you had to do both? + sec: 749 + time: '12:29' + who: Alexey +- header: 'Transition Phase: Balancing Marketing Work and BI Projects' +- line: To be honest, as part of the performance marketing role, I was really acting + like a kind of marketing analyst, in a way – building the reporting for the teams + and for the people who are doing other jobs, I was helping them build reports + and managing that. So I was already kind of doing a lot of that kind of work. + There was a transition period where the first projects that I worked on were more + marketing-focused. I think one of the main projects was helping establish how + to measure brand campaigns, looking into that and building dashboards based on + that, and a wider topic around that. + sec: 770 + time: '12:50' + who: Nikola +- line: I think it was kind of a transition period, but at some point, I just handed + over the main performance marketing tasks, which are managing the campaigns. It + was quite a good moment because the pandemic meant that we were already hitting + a slight stagnation point with some of our campaigns. Then the pandemic hit and + it was really difficult to record new ads, as well, in quarantine. There was a + kind of natural slowing down of that side of the work anyway, so it was a good + moment to pivot. + sec: 770 + time: '12:50' + who: Nikola +- header: 'Current Responsibilities: Analytics Engineering, Product Support & A/B + Testing' +- line: And what do you do now? What do your responsibilities include? + sec: 854 + time: '14:14' + who: Alexey +- line: As I've mentioned, I'm working as an analytics engineer, but also as a data + analyst. We are still a relatively small team. We are four people in total. For + reference, the company size is just over 100. None of us have a particularly specialized + role. We kind of do a little bit of everything at the moment. Our team lead is + on extended leave, so I'm acting as interim team lead. A lot of work is really + working with the new CPO who's just come in, reassessing the KPI that we have + at the company and how we measure them. Of course, I think it’s quite common when + a new C-level comes in to rehash the dashboards and rework the core reporting + to suit the new requirements, so a lot of work has been recently done on that. + sec: 860 + time: '14:20' + who: Nikola +- line: There are two of us that are in these analyst roles and we work very closely + with product managers. We're focusing very closely on supporting the various product + teams with experimentation, building out new features, A/B testing, evaluating + those, and when necessary, building out our data model to reflect those new changes. + I think the day to day is really a mixture of supporting the teams – sometimes + ad hoc analysis is needed. For example, there is a new feature being developed + and there's some hypothesis around the kinds of users they want to reach and how + big those cohorts might be, jumping into the data and taking a look at that. Other + work is maybe more on an initiative of our own. For example, recently, we ran + a big RFM analysis (recency, frequency, monetary) user behavior analysis, which + was a bigger project. There's many ways to do it and we took some time to experiment + with different options. That's been a larger project over some months with several + presentations of insights. + sec: 860 + time: '14:20' + who: Nikola +- line: There are those bits of work where we're not necessarily working directly + for an individual product manager, but working on wider pieces of analysis and + insight that's beneficial for the company as a whole. I just wanted to add that + we've also recently started doing a few small data science projects in the team, + just on the side, which I myself am not directly involved with. But one of my + colleagues is. We're trying to basically run some NLP models on trying to improve + how we understand queries that our users make, and try and essentially build better + query categorization so we can ultimately serve better results. It's been really + nice that we've been able to pick up some more data-sciencey topics in the team + and not work exclusively on reporting and internal. + sec: 860 + time: '14:20' + who: Nikola +- line: 'This query understanding – it''s about understanding intent, right? Why a + user is searching for some information: Do they want to come in and navigate to + a certain website? Do they want to get some information? Do they want to buy something?' + sec: 1079 + time: '17:59' + who: Alexey +- line: Yeah, exactly. Specifically, it's around being able to segment various queries + into the correct categories. So “does this query or query group fall into the + category of ‘travel’ or ‘shopping’ or ‘transport’ or etc.?” + sec: 1094 + time: '18:14' + who: Nikola +- header: 'Data Modeling in Practice: DBT Migration and Transformation Layers' +- line: So a different kind of characterization. When you were describing what kind + of duties you have and what kind of things you work on, you mentioned that you're + working on KPIs, dashboards, supporting product teams with experiments, ad hoc + analytics. + sec: 1114 + time: '18:34' + who: Alexey +- line: You also mentioned a data model. Up to the data model, I think I understood, + more or less, what you are doing. But what is a “data model”? Why do you need + to build a data model? Why do you need to update it? + sec: 1114 + time: '18:34' + who: Alexey +- line: We built a data model in DBT based on something called the domain model. Basically, + we began two or so years ago, maybe even longer now. We migrated to DBT. In that + moment, we basically rewrote all our queries basically to build all our tables + – the whole database was rebuilt from scratch. It had evolved over time. We have + something like six installed tables or something ridiculous. + sec: 1147 + time: '19:07' + who: Nikola +- line: Six what tables? + sec: 1190 + time: '19:50' + who: Alexey +- line: Install. + sec: 1191 + time: '19:51' + who: Nikola +- line: The data model is about describing what kind of data you have – all this schema + and definitions, right? + sec: 1195 + time: '19:55' + who: Alexey +- line: Yeah, sorry. For the data model, what I mean is – what we have in DBT, essentially, + is all about different transformation logic for the entire business, from the + most basic staging layer down through to the presentation tables that we then + use for analysis. + sec: 1200 + time: '20:00' + who: Nikola +- header: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, + Redash' +- line: I’m just trying to understand what kind of tools you use. You mentioned three + tools already. You mentioned Snowplow, which is a tool for tracking – to understand + what kind of actions users perform and save intersections. Then you also mentioned + DBT, which is a tool for transformation. You have some data sitting somewhere + and you need to change it slightly, rework, aggregate it, and then put it in such + a form that you can use it for reports. You also have Looker, which is a tool + for dashboards. What else do you use? You probably use some sort of database (a + data warehouse) right? Maybe some other tools too? + sec: 1234 + time: '20:34' + who: Alexey +- line: Yeah, exactly. We use AWS services, so we use S3 and Redshift, and also Spectrum + as well to query Athena. We play around a lot with so-called “hot and cold storage” + so keeping data in Redshift versus keeping it in S3 in parquet files. That's due + to cost optimization. That's what we use for our lake (warehouse). And then we + use Airflow as well, as our orchestration tool and for our extracting and loading + operations. + sec: 1278 + time: '21:18' + who: Nikola +- line: Was it a part of your job to set up all these tools? + sec: 1323 + time: '22:03' + who: Alexey +- header: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' +- line: It was part of my job to set up DBT. That was one of the first big projects. + I'd been in the team for maybe six months or so and then we began the migration + to DBT. We actually worked with a data consultancy, a small one, that helped us + because we were essentially three people. I led that project – it was one of my + first big projects, which was great. It was a really big learning curve. + sec: 1328 + time: '22:08' + who: Nikola +- line: I got to learn not only about DBT (the tool itself) but also data modeling + theory and practices and different ways of doing things – what makes sense depending + on the size of your data and your goals and needs. That was really great. So DBT + is the main one. Looker as well, as I mentioned, I helped to migrate to and implement + in the company. + sec: 1328 + time: '22:08' + who: Nikola +- line: This was before you actually joined the BI team, right? So you started this + in marketing looking at this tool. + sec: 1386 + time: '23:06' + who: Alexey +- header: 'Looker & LookML Experience: Reporting and Dashboard Building' +- line: Actually I strangely learned LookML before I learned SQL, which is a slightly + strange, I think, way of doing it. [chuckles] But there we go, that's how it happened. + And Airflow was set up by my colleague who has more of a data engineering role + within the team. That was also set up relatively recently – in the last two, three + years or so. Those are the main tools. We recently started using Airbyte. Some + people might be familiar with that. It was basically to be able to extract from + some kind of common API's data sources. We haven't used it extensively. + sec: 1392 + time: '23:12' + who: Nikola +- line: So far, we often find that we've got a lot of options, but specifically what + we need often doesn't necessarily have the connection yet. But I think it's a + nice tool – relatively easy to use. We've also recently started using Redash, + which is an open source visualization tool that we use for more ad hoc queries, + to be able to have the visualization attached to them as well. + sec: 1392 + time: '23:12' + who: Nikola +- line: It seems like most of the tools are open source, apart from AWS. Is Looker + open source? + sec: 1466 + time: '24:26' + who: Alexey +- line: No, I don't think so. + sec: 1472 + time: '24:32' + who: Nikola +- line: But the rest are, right? Snowplow is open source. DBT is open source. Airbyte + is open source. Redash – I don’t know. Is it? + sec: 1474 + time: '24:34' + who: Alexey +- line: Redash is open source as well. + sec: 1481 + time: '24:41' + who: Nikola +- line: So you like open source. Don’t you? + sec: 1484 + time: '24:44' + who: Alexey +- line: Yes. [laughs] Exactly. + sec: 1487 + time: '24:47' + who: Nikola +- header: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' +- line: Do you host all these things yourself? For example, when it comes to DBT, + do you use their cloud? + sec: 1491 + time: '24:51' + who: Alexey +- line: No, we host everything ourselves. That's just the general decision of the + engineering department. + sec: 1497 + time: '24:57' + who: Nikola +- header: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational + Fit' +- line: When you joined the BI team were you already called an analytics engineer, + or you just realized over time that, “Okay, this is what I should call myself.”? + sec: 1506 + time: '25:06' + who: Alexey +- line: My official role is Analytics Engineer and Data Analyst, because I really + do both. We’re not the size of a BI team that it's possible for someone to want + too much to do. But I think initially, it was… I don't know what the title was + initially, BI Analyst or something – Data Analyst. At that point, even the term + Analytics Engineer really wasn't common. I think I really only learned about that + in the process of implementing DBT, which was in 2020. + sec: 1517 + time: '25:17' + who: Nikola +- line: Really, some time has passed since DBT has obviously become huge in the data + community. I think this role of an engineer is also becoming much more common. + But I think at the time, when I joined the team, that wasn't even an option. I + don't think anyone even thought of that. I don't think the people in the BI team + were actually calling themselves that, even though that's essentially the job + they were doing. Over time, as we all became familiar with that new term and realized + that it basically described what we were doing – so that was taken on. + sec: 1517 + time: '25:17' + who: Nikola +- line: Do you think there's some hype in that role? I mean, there was no such thing + before and now, all of a sudden, everyone’s talking about analytics engineering. + sec: 1596 + time: '26:36' + who: Alexey +- line: Yeah. To be honest, if you have a small BI team of six or less people – I + guess it depends on your company, and your product and the business model – but + I think it's a little bit overhyped. Ultimately, I still think that you need quite + a large organization to be able to comfortably segment data analysts and analytics + engineers – they have so much crossover anyway. I can see that in larger organizations, + it's really helpful to have that separation. But I think in smaller ones, it's + not that helpful, at least in my experience, which is simply this is one company. + I can't speak for others, but I found that it's helpful in terms of your own personal + progress, because you can align yourself with this role and say, “Okay, yes. This + is what I do. This is somewhere where I could improve and an area that I could + spend more time on, but I'm not necessarily sure.” + sec: 1605 + time: '26:45' + who: Nikola +- line: I think for most small/medium-sized companies, I don't think it's necessary + to get really bogged down into the differences between the two. Ultimately, you’re + still going to need very overlapping skills. You need to be very analytical, very + comfortable with your KPIs, what the business model is, the domain model – all + of that work, which is not limited to an analytics engineer and a good data analyst + needs all of those things. I think there's maybe a little bit of hype. But again, + as I said, it depends on the organization size. If you have a huge company with + a data Department of 20, 30, 40 people, then of course, it just makes structural + sense to split out and focus. + sec: 1605 + time: '26:45' + who: Nikola +- header: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' +- line: Do you think it's synonymous to using DBT? Like “You use DBT, therefore, you’re + an analytics engineer.” And “If you’re an analytics engineer, then you use DBT.”? + Are they the same thing? Or can you be an analytics engineer without using DBT? + sec: 1720 + time: '28:40' + who: Alexey +- line: It's a good question. I feel like DBT themselves have really promoted this + concept, right? + sec: 1740 + time: '29:00' + who: Nikola +- line: I think, yeah. It’s coming from them. + sec: 1747 + time: '29:07' + who: Alexey +- line: Exactly. [chuckles] In a way, yeah – it kind of is synonymous. I, at least, + haven't seen many job applications for an analytics engineer that haven't been + like “Your job is to work with DBT.” [chuckles] I'd be interested in how that + role could look with a different stack. I imagine there are people who are working + under the title of data engineer or data analyst who do the work of an analytics + engineer, but just don't call themselves that in other companies that maybe don’t + use DBT. + sec: 1749 + time: '29:09' + who: Nikola +- line: In the company where I work, we don't have DBT. We have a homegrown DBT kind + of replacement. But it was before DBT was popular. As many other companies, we + kind of invented DBT, which is like an Airflow-based way to schedule SQL queries. + I don't think any of our analysts who use this to call themselves analytics engineers. + I'm wondering, are there any tools that do the same thing as DBT apart from these + homegrown tools like we have? Is there any such thing on the market? + sec: 1788 + time: '29:48' + who: Alexey +- header: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' +- line: I don't know, to be honest. [laughs] I haven't had the time to really look + into it. I think at the moment, DBT is on such a growth trajectory. I see so many + job ads that are looking for people to help them set up DBT. I think it's really + taking off, so I don't presently know. Like you said, we were previously using + SQL Runner, which is like Snowplow. It’s kind of similar. That’s exactly what + you described, basically. An orchestration tool for SQL queries, where you can + specify the order and whatnot. Incrementalization strategies were not invented + by DBT. There's many ways to set those up and there’s other kinds of setups. + sec: 1828 + time: '30:28' + who: Nikola +- line: In terms of analytics engineering, I think for me the focus is on the wider + architecture of the data model, and with data analysts for example, perhaps there’s + not so much focus on that. For me, that's where the analytics engineering role + is, really important. Once you start collecting from various different data sources + you have all of these issues around consistency and, of course, freshness. All + of these various concerns are where an analytics engineer really needs to shine + – to understand how everything fits together in this wider ecosystem. Perhaps + an analyst doesn't necessarily need to understand all the transformations and + how everything connects to each other, but an analytics engineer really does. + sec: 1828 + time: '30:28' + who: Nikola +- line: I think this focus on data modeling theory is much more important. In that + way, it's slightly more like a theoretical role in many ways, which I think is + often not really talked about. Often the focus is on the technical side, which + it is, but I think it's really important to understand, as an analytics engineer, + the different kinds of data modeling frameworks and what's possible. Whether having + a wider table or a narrower table – in which case should you go for one versus + the other? When should you choose a certain kind of incrementalization strategy + and when not? So I think that's part of the role that is very specific. I guess + it’s becoming more and more important, as there is so much more data that companies + in general are collecting. By virtue of more companies, smaller companies, different + kinds of companies, and the traditional big enterprises start using and collecting + data and building up data departments, then, of course, this becomes more of a + need. + sec: 1828 + time: '30:28' + who: Nikola +- header: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' +- line: About this data modeling theory that you mentioned, and selecting whether + it should be a wide table or a narrow table – if I wanted to learn more about + this, where would I go? What kind of resources do you have about this? + sec: 2026 + time: '33:46' + who: Alexey +- line: That is a good question. I really struggled a little bit with this, because + there's really a lot of quite… I wouldn't even call it “advanced” stuff. But the + textbooks that you can buy on data are very dry. [laughs] I'll just be honest. + sec: 2041 + time: '34:01' + who: Nikola +- line: Kimball and this kind of stuff, right? + sec: 2057 + time: '34:17' + who: Alexey +- line: Yeah, Kimball. There's loads of textbooks. + sec: 2060 + time: '34:20' + who: Nikola +- line: It’s something I studied at university but never actually saw this book outside + of university. + sec: 2061 + time: '34:21' + who: Alexey +- line: Exactly. To be honest, I've given them a good shot and I found that I just + learned by doing. I learned through talking to the people who were my mentors + or seniors – who are experts and I just asked as many questions as I could. I + was never afraid to just ask stupid questions (and repeat questions if I needed + to) until it made sense. + sec: 2067 + time: '34:27' + who: Nikola +- line: Sometimes if I had the basic knowledge and had something that I wanted to + understand, I would go and just research online. There are increasingly a lot + of really good blog posts and newsletters that are available. I think increasingly + there are more and more resources that are a lot more accessible to people who + haven't necessarily studied computer science or data science or statistics or + these sorts of subjects at university. + sec: 2067 + time: '34:27' + who: Nikola +- line: You didn't study that, right? Did you? + sec: 2127 + time: '35:27' + who: Alexey +- header: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and + Udemy SQL' +- line: No, I studied classics, which are Latin and ancient Greek. [laughs] + sec: 2130 + time: '35:30' + who: Nikola +- line: That was your education? + sec: 2136 + time: '35:36' + who: Alexey +- line: That was my Bachelor's, yeah. + sec: 2138 + time: '35:38' + who: Nikola +- line: Interesting. So you speak Ancient Greek and Latin? + sec: 2142 + time: '35:42' + who: Alexey +- line: No… I can read it. + sec: 2144 + time: '35:44' + who: Nikola +- line: Interesting. Okay. This just made our interview even more interesting. [both + laugh] How do you go from studying Ancient Greek and Latin to being an analytics + engineer? You learn basically everything you needed yourself, right? + sec: 2148 + time: '35:48' + who: Alexey +- line: Yeah, exactly. Um… + sec: 2166 + time: '36:06' + who: Nikola +- line: By “yourself” I mean not as a part of any official curriculum. + sec: 2169 + time: '36:09' + who: Alexey +- line: Yep. To be honest, I did this SQL course on Udemy that cost me 12 euros. And + it was great. It was really, really good. It was quite long. I can't remember + exactly, but I think it was just called The Complete Guide to SQL and it's run + by this American dude called Colt Steele. It's just a very strange name. He's + got loads of good Python courses as well that I did. I just did that in my spare + time. And to be honest, it was really great that it cost me all of 12 euros and + I haven't done a single other SQL course since. + sec: 2174 + time: '36:14' + who: Nikola +- line: Sometimes I do think, “Oh, should I go and pay for one of these fancy courses + in data science or something because it's nice to have structure and whatnot.” + But then I'm like, “Ah. If I just motivated myself, I could do it.” [laughs] There's + so much stuff online. But it's just a case of me being quite lucky to find a good + course right away. I think there are some not very good courses out there. It's + a little bit of hit and miss. One thing that's really great about software engineering + in general and computer science is that if you don't have a lot of resources, + you can really teach yourself. There are a lot of resources online. + sec: 2174 + time: '36:14' + who: Nikola +- line: At the same time, as I said, practicing is really the thing that makes the + difference and I was very lucky that I was already at a company where I knew the + domain very well, the business model very well, the KPIs. I kind of had all of + that already covered and could just focus on developing the SQL skills and data + modeling, etc. I can imagine that someone who is maybe approaching this as a career + change and maybe taking some time out to do it – it may be a little bit more difficult + because you don't have that context of a specific business or a specific problem + that you can hold in your mind as you think about these problems and have an example + that you can apply the theory to. + sec: 2174 + time: '36:14' + who: Nikola +- header: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' +- line: Yeah, there is a thing called “just in time learning,” and I think you took + this to the extreme. So without any formal education in computer science or analytics, + you just focused on a specific problem, which in your case was marketing and then + you were like “Okay, how do I set up Looker to do this thing?” By the way, are + the tasks that you do now still more or less related to marketing? You mentioned + RFM analysis. I think it's still somewhat related, right? + sec: 2307 + time: '38:27' + who: Alexey +- line: Not really, to be honest. No. At the moment, I'm really working very closely + with the product team. We are focusing on growing, acquiring more users, retaining + more users – which are all of course interlinked goals of the marketing team. + It's not directly relevant, but my direct stakeholders are the product managers. + sec: 2338 + time: '38:58' + who: Nikola +- line: Okay. So I guess your background in marketing really helped you, right? + sec: 2370 + time: '39:30' + who: Alexey +- header: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' +- line: Yeah, it really did. I’ve noticed how just in everyday work, I definitely + see an edge that I have because I'm very comfortable with things like a marketing + funnel and a conversion funnel or web acquisition funnel. For example, a product + manager might be focusing specifically on a part of the funnel or a whole funnel + as part of the user journey and as a marketing person, you think about the user + journey all the time. What are the touch points of the user? How do they feel + at this moment? What are they thinking at this moment? What have they done? Where + have they come from? You have this quite close empathy with the user, and specifically + the journey. + sec: 2376 + time: '39:36' + who: Nikola +- line: At the same time, your goals in marketing are to constantly optimize and grow + and get more users or higher retention or more signups or whatever it might be. + So you have this growth mindset that I think is very useful when you come to advising + people from a data point of view because you can ask the question, “Yes, you've + got some good feedback from the users on this feature. But, ultimately, the top + line hasn't moved at all. We did this because we wanted to grow (whatever this + KPI is).” It definitely does help, largely in the realm of understanding the user + journey. It means that you can really hold this user perspective in your head, + but also the data perspective together with it, and advise with those two things + in your head. + sec: 2376 + time: '39:36' + who: Nikola +- line: If somebody wants to follow your journey – so somebody who's working in marketing + (or not necessarily in marketing, but they really want to go into data and start + doing analytics engineering) and they are experts in their domain – what would + you suggest for them to do? + sec: 2491 + time: '41:31' + who: Alexey +- header: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' +- line: Firstly, I would say [chuckles] Excel is your best friend. Excel is great, + ultimately. [laughs] I know everyone hates it, but it really doesn't get the credit + it deserves. I still have people in the company who really should and don't know + how to make a pivot table. They are quite annoying to make in Excel. The most + difficult pivot table you will make will be in Excel. If you can do it there and + be comfortable (understand what's happening with columns and rows) that’s the + first place to go. So be really, really comfortable with Excel, play around with + functions, pivot tables, and just explore. Look at different ways of trying to + take a dataset that you feel comfortable with – it might just be something really + simple like daily signups by country – and just, in Excel, start playing around + with that and asking questions. + sec: 2510 + time: '41:50' + who: Nikola +- line: Then, of course, SQL is the most important thing. Learn SQL, try and find + some datasets online that you can play around with and practice SQL. That's really, + really useful. But ultimately, where I found a little bit of a gap in the self-learning + was between the online SQL resources and finding advanced SQL queries that made + sense – that weren't written by someone on the other side of the world about a + company that had no connection to, didn't learn from the business models and was + written in a way that, for example, wasn't the style that was going to be written + in my team. It ended up just being a little bit confusing and extra work to try + and understand. So if there's a way to access some of the SQL code that the BI + team are using – maybe you can ask them to share a couple of SQL queries they + use to make the main tables – that's definitely something to do. + sec: 2510 + time: '41:50' + who: Nikola +- line: If your company is using Looker, that's great. That's amazing – to get familiar + with that. Really, just start building, building, building dashboards. Explore + it. Become really comfortable with filtering, pivoting – those sorts of things. + There are a lot of resources from Looker online as well. I think from Tableau + as well, or whatever visualization tool you're using – it doesn't really matter. + Just become comfortable with the basic features of those. Those would be the main + things, I think. Then go from there. Find someone who can be your, if not mentor, + then your champion – an ally, I guess, in the data team. Ask them, “What do I + need to do? What skills are still missing? How do I do them? Do you think it's + possible?” Ask them what they would recommend if you're in an existing company + and you're looking to move to that role. I think that would be my suggestions. + sec: 2510 + time: '41:50' + who: Nikola +- header: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' +- line: How important do you think it is to have a mentor or champion in this journey? + For you, from what I understood, it was quite important. It was crucial. That + person was a marketing analyst, if I remember correctly, that actually helped + you. She told you what you should do, what kind of things you should focus on, + and then she also was helpful for you to actually transition to the team. Right? + sec: 2709 + time: '45:09' + who: Alexey +- line: Exactly. She was the BI analyst (the data analyst) – the only one that we + had at the time. Actually, sorry we already had two people in the data team and + she was one of them. For me, it was very useful and important. To be honest, though, + it depends on the company, your position in the company, how comfortable you feel, + what level of power (so to speak) you have in the company. + sec: 2734 + time: '45:34' + who: Nikola +- line: Also, for me, as a woman, I think transitioning from marketing into a more + technical role (I was going to move to the engineering department, there was a + meeting) I felt an element of imposter syndrome. I thought, “Oh, what am I doing? + Can I really do this?” I think it really helped me to have another female, basically, + mentor to champion me and encourage me and say, “Yeah, you can do this. Definitely, + you can do this. You just need to do this, this and that. You can definitely do + that. Once you've done that, we can find a way.” So it depends. I think if you + have a lot of motivation and you're very clear on what you want, and you're confident, + then I don't think it's necessarily needed. + sec: 2734 + time: '45:34' + who: Nikola +- line: But particularly for minorities, there's a lot of support groups outside of + work like, PyLadies and lots of different various support groups for minorities + in tech, which are great to be inspired by. But I think having that one person + in your company who you can relate to can be really helpful just in terms of building + up your own confidence. It's definitely something that helped me also to not just + transition into the team but, once I was in the team, to accelerate quite quickly. + sec: 2734 + time: '45:34' + who: Nikola +- line: Yes, I was junior when I joined, but my career path up to being a mid-level + analyst and now intern team lead was a lot quicker because I had to fight and + be like, “Well, I have been doing analytics work for years before. I haven't actually + picked all of this up from scratch.” So having the confidence to make that clear + and argue it – it was really helpful having someone to champion me. I would recommend + finding one person in your company who can be that for you. + sec: 2734 + time: '45:34' + who: Nikola +- line: Did you take part in any of these support groups that you mentioned like PyLadies? + Or did you have mentors or people who you constantly talked to outside of the + company? Or was it mostly that person and the rest of the team that you talked + to in order to learn? + sec: 2904 + time: '48:24' + who: Alexey +- line: In my case, it was mainly my two teammates who were the BI team when I joined. + They were incredible. So supportive. They really encouraged me a lot and helped + me hugely. They were very excited for me to join the team and made me feel very + welcome like I deserved to be there. This was very useful because at times, I + was like, “Oh, what am I doing here? This is too hard.” But in terms of external + support, not really, to be honest. I have two very close friends who worked in + data, and it was nice to talk to them and have their advice as well – to have + different perspectives from different companies. + sec: 2925 + time: '48:45' + who: Nikola +- line: Particularly as someone who's been at a company for a very long time, I definitely + feel the need to speak to people in different places and see like, “Oh, is it + also like this where you are? Is this a specific issue that only we're facing + or is this a general thing?” Having that perspective has also been really useful + in order to just benchmark certain issues that you come across. [chuckles] I think + having a few more external mentors or support would be great. In the coming year, + I'll probably look for a mentor just to help with kicking off the next phase of + development. + sec: 2925 + time: '48:45' + who: Nikola +- header: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' +- line: Do you have any ideas where you can look for these mentors? Would it be conferences, + meetup groups or someplace online? + sec: 3023 + time: '50:23' + who: Alexey +- line: Probably a combination of LinkedIn, asking the networks of people that I know + if they have anyone they recommend. Meetups as well. I think that's probably the + best way to go. + sec: 3032 + time: '50:32' + who: Nikola +- line: Is there an analytics engineering meetup in Berlin? + sec: 3050 + time: '50:50' + who: Alexey +- line: I'm not sure. There's definitely a Snowplow meetup that I think has just started + up again (or about to) In terms of the engineering, I'm not sure, to be honest. + I know that there are some data meetups. I'm not sure if that's specifically analytics + engineering. I have kept an eye open on the DBT Slack group, which is extensive + and actually great. They have some city-specific groups and Berlin has yet to + make its appearance. Perhaps in the future, there might be a DBT Berlin. + sec: 3056 + time: '50:56' + who: Nikola +- line: Yeah, I think there should be. One of the people who helped me with the questions + is Victoria. Victoria was a guest on this podcast over a year ago and now she + works at DBT. I think she is or will be organizing something soon. Maybe she will + tell us about that. I see that it's almost time to finish. I wanted to ask you + one last thing. + sec: 3102 + time: '51:42' + who: Alexey +- header: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally + Optimistic)' +- line: You mentioned that you are subscribed to some newsletters. There are good + blog posts, good newsletters, and these newsletters are quite useful for you. + What kind of newsletters are you subscribed to? If I want to keep an eye on what's + happening in this area, what kind of newsletters should I subscribe to? + sec: 3130 + time: '52:10' + who: Alexey +- line: That's a good question. There's one I'm subscribed to (an analytics engineering + one) that I think is called “The Roundup” or something. Analytics Engineering + Roundup. It might be the DBT newsletter, actually. There's another one that I + just subscribed to like a week or two ago. It’s called Lenny's Newsletter. + sec: 3152 + time: '52:32' + who: Nikola +- line: Lenny's Newsletter. Lenny's the name of the person. + sec: 3184 + time: '53:04' + who: Alexey +- line: I've only just subscribed to it recently. I think it was slightly more product + analytics focused. Then there is a blog that I'm sure most of your readers will + know about. I've just forgotten the name of it. It's called something like Profoundly + Optimistic or something… Locally Optimistic, yeah! Yeah that one. + sec: 3189 + time: '53:09' + who: Nikola +- line: Yeah. They have a guest coming in as well. + sec: 3218 + time: '53:38' + who: Alexey +- line: From time to time, I'll check that one. + sec: 3221 + time: '53:41' + who: Nikola +- header: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' +- line: Profoundly Optimistic is also a good name. [both laugh] If somebody has questions + for you, how can they find you? Is it LinkedIn or are there some other ways to + contact you? + sec: 3226 + time: '53:46' + who: Alexey +- line: Yeah, LinkedIn would be best. They can just message me directly there. + sec: 3244 + time: '54:04' + who: Nikola +- line: Okay, Niki. Thank you very much. Thanks for joining us today. It's been a + while since we started this conversation. So finally, we had this interview. Thanks + a lot for joining us today, for telling us about your journey, for sharing all + the experience and expertise you have. And thanks, everyone, also for joining + us, for being active here. Have a great rest of the week. + sec: 3248 + time: '54:08' + who: Alexey +- line: Thank you for having me. + sec: 3274 + time: '54:34' + who: Nikola --- + Links: -* [Post](https://www.linkedin.com/posts/leracaiman_elasticsearch-ecommerce-activity-7106615081588674560-5WQO){:target="_blank"} \ No newline at end of file +* [Nikola's LinkedIn account](https://www.linkedin.com/in/nikola-maksimovic-40188183/){:target="_blank"} \ No newline at end of file diff --git a/podcast.md b/podcast.md index 9f7ed25b..06a71ca9 100644 --- a/podcast.md +++ b/podcast.md @@ -35,7 +35,7 @@ layout: page -{% assign seasons = site.podcast | reverse | group_by: 'season' %} +{% assign all_seasons = site.podcast | map: 'season' | uniq | sort | reverse %}
@@ -46,11 +46,12 @@ layout: page

All Podcast Episodes

- {% for season in seasons %} -
-

Season #{{ season.name }}

+ {% for season_num in all_seasons %} + {% assign season_episodes = site.podcast | where: 'season', season_num | sort: 'episode' | reverse %} +
+

Season #{{ season_num }}

    - {% for episode in season.items %} + {% for episode in season_episodes %}
  • {{ line.time }}){% endif %}

    +

    {{ line.who }}: {{ line.line }}{% if line.sec %} ({{ line.time }}){% endif %}

    {% endif %} {% endfor %} @@ -573,7 +573,7 @@ }); // Timestamp click functionality - const timestampLinks = document.querySelectorAll('.timestamp-link'); + const timestampLinks = document.querySelectorAll('.timestamp-link, .transcript-timestamp-link'); timestampLinks.forEach(link => { link.addEventListener('click', function(e) { e.preventDefault(); From ad25842d0d73dcfb9c5a7832a6ea37ea9c20cf86 Mon Sep 17 00:00:00 2001 From: kavaivaleri Date: Thu, 20 Nov 2025 15:42:23 +0100 Subject: [PATCH 9/9] All podcasts have topics defined; title, intro and description are now in one line bounded with "" --- _podcast/_s12e08.md | 4 +- .../ab-testing-and-product-experimentation.md | 8 +- ...r-ecology-biodiversity-and-conservation.md | 32 +- ...-in-healthcare-and-digital-therapeutics.md | 8 +- ...brid-cloud-on-prem-distributed-training.md | 31 +- ...i-ml-product-design-and-experimentation.md | 12 +- ...rading-with-python-and-machine-learning.md | 30 +- ...lgorithms-data-structures-for-engineers.md | 8 +- _podcast/analytics-engineer-skills-tools.md | 8 +- ...s-to-data-science-with-kaggle-portfolio.md | 8 +- ...-research-and-career-growth-in-practice.md | 34 +- .../bayesian-modeling-workflows-and-tools.md | 30 +- _podcast/becoming-data-freelancer.md | 32 +- ...big-data-analytics-and-postdoc-research.md | 8 +- .../big-data-engineer-vs-data-scientist.md | 8 +- ...ty-for-data-scientists-and-ml-engineers.md | 8 +- ...rmatics-worflows-tools-and-data-science.md | 33 +- ...ngineering-tooling-retrieval-evaluation.md | 31 +- .../building-ai-digital-health-startups.md | 32 +- ...and-scaling-ai-data-products-with-mlops.md | 13 +- ...engineering-systems-for-fraud-detection.md | 12 +- ...ta-science-practice-industrial-ai-mlops.md | 6 +- _podcast/building-and-scaling-data-team.md | 8 +- ...lding-data-products-lead-data-scientist.md | 12 +- ...oducts-product-owner-vs-product-manager.md | 8 +- ...emocratizing-high-performance-computing.md | 8 +- _podcast/building-data-team.md | 8 +- .../building-domestic-risk-assessment-tool.md | 21 +- ...xplainable-and-actionable-ai-ml-systems.md | 4 +- ...ing-healthcare-machine-learning-systems.md | 32 +- ...communities-diversity-and-career-growth.md | 8 +- _podcast/building-mlops-startup.md | 8 +- ...ce-data-product-for-identity-resolution.md | 10 +- _podcast/building-open-source-nlp-tool.md | 8 +- ...g-production-ml-platform-and-mlops-team.md | 8 +- .../building-production-search-systems.md | 33 +- ...e-and-reliable-machine-learning-systems.md | 8 +- .../causal-inference-for-machine-learning.md | 4 +- ...ta-officer-data-strategy-and-org-design.md | 8 +- _podcast/cloud-data-governance.md | 8 +- ...munity-building-and-teaching-in-ai-tech.md | 33 +- _podcast/crisp-dm.md | 6 +- .../{data-centric.md => data-centric-ai.md} | 14 +- ...business-pricing-and-client-acquisition.md | 8 +- ...data-engineering-career-path-and-skills.md | 8 +- ...ng-leadership-and-modern-data-platforms.md | 8 +- ...ata-engineering-tools-modern-data-stack.md | 8 +- ...gy-market-demand-and-client-acquisition.md | 29 +- .../data-governance-data-access-management.md | 8 +- ...iew-behavioral-and-portfolio-prep-guide.md | 12 +- ...alism-python-visualization-storytelling.md | 6 +- _podcast/data-leadership-coaching.md | 31 +- ...d-growth-event-tracking-and-reverse-etl.md | 8 +- ...rchitecture-decentralized-data-products.md | 8 +- ...ivacy-engineering-gdpr-machine-learning.md | 8 +- ...a-professionals-business-skills-in-saas.md | 14 +- ...ity-data-observability-data-reliability.md | 8 +- ...-analytics-for-nonprofits-tech-for-good.md | 8 +- _podcast/data-science-career-abc-framework.md | 8 +- ...data-science-failures-and-mlops-lessons.md | 8 +- ...-public-policy-ethical-ai-social-impact.md | 8 +- .../data-science-interview-and-cv-guide.md | 8 +- ...ence-job-red-flags-and-mismatched-roles.md | 4 +- .../data-science-leadership-hiring-mlops.md | 8 +- ...e-management-and-agile-machine-learning.md | 8 +- ...-science-manager-vs-expert-hiring-guide.md | 8 +- ...a-science-team-structure-and-org-design.md | 8 +- ...ndie-hacker-bootstrapping-side-projects.md | 8 +- ...egy-and-dataops-for-ai-powered-products.md | 8 +- _podcast/data-team-roles.md | 4 +- .../data-translator-role-and-data-strategy.md | 12 +- ...nd-gitops-best-practices-for-data-teams.md | 8 +- ...-automation-and-reliable-data-pipelines.md | 8 +- _podcast/dataops-for-data-engineering.md | 31 +- ...-principles-and-scalable-data-platforms.md | 8 +- ...lksclub-building-scaling-data-community.md | 8 +- ...able-data-community-3-years-anniversary.md | 24 +- .../datatalksclub-scaling-and-free-courses.md | 34 +- ...n-fine-tuning-retrieval-open-source-api.md | 14 +- ...eveloper-personal-brand-learn-in-public.md | 8 +- .../devrel-data-science-open-source-tools.md | 8 +- .../devrel-open-source-machine-learning.md | 12 +- _podcast/fairness-in-ai-ml-engineering.md | 33 +- ...ng-model-monitoring-and-data-governance.md | 8 +- _podcast/finops-for-data-engineers.md | 29 +- ...ce-data-engineering-pricing-and-clients.md | 8 +- _podcast/freelancing-in-machine-learning.md | 8 +- ...i-engineer-interviews-and-career-growth.md | 8 +- ...esearch-to-data-engineering-freelancing.md | 33 +- ...pen-source-computer-vision-transformers.md | 31 +- ...ision-research-to-autonomous-driving-ai.md | 34 +- ...elancer-to-startup-open-source-products.md | 23 +- ...ing-automation-open-source-volunteering.md | 35 +- _podcast/from-game-ai-to-modern-ai-agents.md | 31 +- ...a-engineering-to-leading-data-architect.md | 4 +- ...a-science-research-software-engineering.md | 32 +- ...ytics-engineering-sql-dbt-career-switch.md | 4 +- ...om-marketing-to-product-owner-in-search.md | 4 +- .../from-math-graduate-to-data-analytics.md | 8 +- ...cs-to-computer-vision-career-transition.md | 8 +- ...o-machine-learning-and-data-engineering.md | 32 +- ...ductor-data-to-applied-machine-learning.md | 32 +- ...m-software-engineer-to-machine-learning.md | 14 +- ...-science-to-data-engineering-leadership.md | 8 +- ...gineering-to-leading-data-science-teams.md | 12 +- ...-machine-learning-applied-ml-leadership.md | 30 +- ...p-engineering-to-freelance-data-science.md | 8 +- ...tive-ai-chatbots-in-production-security.md | 36 +- ...data-analytics-and-data-engineering-job.md | 8 +- ...data-engineering-job-prep-and-interview.md | 8 +- _podcast/get-data-scientist-job.md | 8 +- ...junior-data-job-and-transferable-skills.md | 8 +- ...managing-data-science-teams-in-b2b-saas.md | 4 +- .../hiring-data-scientists-and-analysts.md | 14 +- ...ing-for-data-engineering-jobs-in-europe.md | 8 +- ...ence-jobs-interview-questions-skills.md.md | 8 +- _podcast/how-to-break-into-data-science.md | 12 +- .../how-to-grow-your-ml-engineering-career.md | 8 +- _podcast/how-to-stand-out-in-data-science.md | 8 +- ...to-switch-to-ml-tech-without-experience.md | 8 +- ...on-into-ml-and-data-engineering-from-qa.md | 8 +- ...ng-face-contributions-and-nlp-portfolio.md | 8 +- ...entered-ai-automatic-speech-recognition.md | 31 +- ...man-centered-mlops-and-model-monitoring.md | 8 +- ...-small-data-production-machine-learning.md | 8 +- _podcast/interpretable-machine-learning.md | 30 +- ...nvesting-in-open-source-developer-tools.md | 4 +- ...y-in-tech-projects-skills-cv-networking.md | 31 +- ...ndmaster-to-production-ml-and-education.md | 33 +- ...edge-graphs-and-llms-for-automotive-rnd.md | 32 +- ...data-product-adoption-modern-data-stack.md | 8 +- _podcast/launch-and-build-retail-startup.md | 8 +- _podcast/lean-mlops-for-startups.md | 31 +- ...ine-learning-self-taught-bioinformatics.md | 8 +- ...ne-learning-data-science-interview-prep.md | 6 +- .../machine-learning-decision-optimization.md | 8 +- ...g-engineering-production-best-practices.md | 8 +- ...for-asteroid-mining-and-water-detection.md | 8 +- ...ting-attribution-marketing-mix-modeling.md | 8 +- ...achine-learning-system-design-interview.md | 8 +- ...oney-with-machine-learning-roles-skills.md | 8 +- ...tech-how-to-find-and-become-a-mentor.md.md | 16 +- ...ndful-data-strategy-for-business-impact.md | 30 +- ...l-engineering-kpis-and-metrics-strategy.md | 8 +- ...uct-manager-and-mlops-platform-strategy.md | 8 +- _podcast/ml-system-design.md | 8 +- .../mlops-and-ml-engineering-in-finance.md | 31 +- ...mlops-at-scale-reproducibility-adoption.md | 30 +- .../mlops-community-building-and-meetups.md | 8 +- ...ture-stores-feature-stores-feast-tecton.md | 8 +- _podcast/mlops-kubeflow-model-monitoring.md | 8 +- ...ops-model-monitoring-data-observability.md | 12 +- ...elines-orchestration-ingestion-modeling.md | 8 +- ...ector-databases-llms-semantic-retrieval.md | 32 +- ...set-creation-annotation-tools-workflows.md | 8 +- .../nlp-team-hiring-and-production-mlops.md | 8 +- ...earning-freelancing-and-public-learning.md | 29 +- ...teering-in-ai-for-data-ml-career-growth.md | 33 +- _podcast/open-source-ml-contributions.md | 8 +- ...e-ml-tools-strategy-and-business-models.md | 31 +- ...turned-into-career-and-startup-creation.md | 4 +- .../personal-brand-for-data-professionals.md | 8 +- ...-to-data-science-lead-career-transition.md | 8 +- ...rel-demofirst-education-and-open-source.md | 34 +- ...-ai-consulting-from-expertise-to-impact.md | 29 +- _podcast/practical-llm-engineering-and-rag.md | 30 +- ...ical-llm-use-cases-and-product-patterns.md | 4 +- _podcast/pragmatic-and-standardized-mlops.md | 8 +- ...roduct-designer-to-data-product-manager.md | 8 +- ...duction-ml-mlops-and-data-team-building.md | 12 +- ...duction-ml-pipelines-with-aws-and-kafka.md | 8 +- ...-vector-search-embeddings-hybrid search.md | 31 +- _podcast/production-ready-ai-engineering.md | 32 +- _podcast/project-manager-to-data-scientist.md | 8 +- .../public-speaking-for-data-scientists.md | 8 +- ...neering-work-and-building-iot-platforms.md | 6 +- ...search-to-production-ml-systems-roadmap.md | 12 +- ...sponsible-explainable-ai-bias-detection.md | 8 +- ...ngineering-teams-self-service-platforms.md | 14 +- ...enterprise-ai-mlops-data-first-strategy.md | 18 +- ...ftware-engineering-for-machine-learning.md | 8 +- _podcast/solopreneur-data-scientist.md | 8 +- ...preneur-developer-and-data-professional.md | 8 +- ...aching-mentoring-data-analytics-fintech.md | 8 +- ...n-science-coding-practices-for-academia.md | 8 +- .../technical-writing-for-data-scientists.md | 14 +- ...ng-to-tesla-full-stack-data-engineering.md | 31 +- _podcast/trends-in-modern-data-engineering.md | 27 +- _podcast/urban-data-science.md | 29 +- ...machine-learning-concepts-to-explain-ml.md | 8 +- scripts/generate_topics_podcasts.py | 579 ++++++++++++++++++ scripts/topic_list.txt | 52 ++ 192 files changed, 1799 insertions(+), 1600 deletions(-) rename _podcast/{data-centric.md => data-centric-ai.md} (97%) create mode 100755 scripts/generate_topics_podcasts.py create mode 100644 scripts/topic_list.txt diff --git a/_podcast/_s12e08.md b/_podcast/_s12e08.md index 713ef42e..1f9683b1 100644 --- a/_podcast/_s12e08.md +++ b/_podcast/_s12e08.md @@ -12,8 +12,8 @@ links: spotify: https://open.spotify.com/episode/5fB185hGlGYQmdk0kbIsPv?si=YtnsaYNzTc-fl7emZ2IjEA youtube: https://www.youtube.com/watch?v=FRi0SUtxdMw season: 12 -short: 'The Journey of a Data Generalist: From Bioinformatics to Freelancing' -title: 'The Journey of a Data Generalist: From Bioinformatics to Freelancing' +short: "The Journey of a Data Generalist: From Bioinformatics to Freelancing" +title: "The Journey of a Data Generalist: From Bioinformatics to Freelancing" transcript: - line: This week we'll talk about being a data generalist. We'll discuss going from bioinformatics to freelancing. We have a special guest today, Katya. As a freelancer diff --git a/_podcast/ab-testing-and-product-experimentation.md b/_podcast/ab-testing-and-product-experimentation.md index 5c19830b..2c6b3d97 100644 --- a/_podcast/ab-testing-and-product-experimentation.md +++ b/_podcast/ab-testing-and-product-experimentation.md @@ -1,6 +1,6 @@ --- -title: 'Product Analytics & A/B Testing: Causality, Metrics, Power Analysis, A/A Tests' -short: A/B Testing +title: "Product Analytics & A/B Testing: Causality, Metrics, Power Analysis, A/A Tests" +short: "A/B Testing" season: 7 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3LhBOO1UANCGbOwkntZt4j youtube: https://www.youtube.com/watch?v=0Gqx1LtqRZU -description: 'Master product analytics, A/B testing & power analysis: design stable metrics, validate randomization with A/A tests, plan sample size to de-risk features.' -intro: How do you design product experiments that truly establish causality and avoid costly false conclusions? In this episode, Jakob Graff — Director of Data Science and Data Analytics at diconium, with prior analytics leadership at Inkitt, Babbel, King and a background in econometrics — walks through practical product analytics and A/B testing strategies focused on causality and reliable metrics.

    We cover why randomized experiments mirror clinical trials, how experimentation de-risks features and builds organizational learning, and a concrete case study on subscription vs. points revenue metric design. Jakob explains experimentation platform trade-offs (third-party vs. in-house), traffic splitters, assignment tracking, and why A/A tests validate system trust. You’ll hear best practices for first tests (two-group simplicity), metric selection considering noise and seasonality, and how to plan duration with power analysis and sample-size calculations. The discussion also compares z/t/nonparametric tests, p-value intuition from A/A comparisons, frequentist vs Bayesian perspectives, and multi-armed test considerations.

    Listen to learn practical steps for designing randomized experiments, selecting stable metrics, planning sample sizes, and interpreting results so your product analytics and A/B testing produce actionable, causal insights +description: "Master product analytics, A/B testing & power analysis: design stable metrics, validate randomization with A/A tests, plan sample size to de-risk features." +intro: "How do you design product experiments that truly establish causality and avoid costly false conclusions? In this episode, Jakob Graff — Director of Data Science and Data Analytics at diconium, with prior analytics leadership at Inkitt, Babbel, King and a background in econometrics — walks through practical product analytics and A/B testing strategies focused on causality and reliable metrics.

    We cover why randomized experiments mirror clinical trials, how experimentation de-risks features and builds organizational learning, and a concrete case study on subscription vs. points revenue metric design. Jakob explains experimentation platform trade-offs (third-party vs. in-house), traffic splitters, assignment tracking, and why A/A tests validate system trust. You’ll hear best practices for first tests (two-group simplicity), metric selection considering noise and seasonality, and how to plan duration with power analysis and sample-size calculations. The discussion also compares z/t/nonparametric tests, p-value intuition from A/A comparisons, frequentist vs Bayesian perspectives, and multi-armed test considerations.

    Listen to learn practical steps for designing randomized experiments, selecting stable metrics, planning sample sizes, and interpreting results so your product analytics and A/B testing produce actionable, causal insights" topics: - data science - practices diff --git a/_podcast/ai-for-ecology-biodiversity-and-conservation.md b/_podcast/ai-for-ecology-biodiversity-and-conservation.md index 6d4cff5f..8dd1b481 100644 --- a/_podcast/ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/ai-for-ecology-biodiversity-and-conservation.md @@ -1,7 +1,6 @@ --- -title: 'AI for Ecology, Biodiversity, and Conservation: Computer Vision, Remote Sensing - and Citizen Science' -short: AI for Ecology, Biodiversity, and Conservation +title: "AI for Ecology, Biodiversity, and Conservation: Computer Vision, Remote Sensing and Citizen Science" +short: "AI for Ecology, Biodiversity, and Conservation" season: 18 episode: 3 guests: @@ -15,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/ai-for-ecology-biodiversity-and-conservation-tanya/id1541710331?i=1000653709956 spotify: https://open.spotify.com/episode/3Hhz5N8ZDvsOPlPP3wxQxq?si=Oz7y_pBrTfeypfYZXubu-g youtube: https://www.youtube.com/watch?v=30tTrozbAkg -description: Discover AI-driven computer vision and remote sensing strategies to scale - biodiversity monitoring, improve species ID, and inform conservation policy. -intro: How can AI help close critical data gaps in biodiversity monitoring and turn - images and sensor data into actionable conservation decisions? In this episode Tanya - Berger-Wolf, a computational ecologist, director of TDAI@OSU, and co-founder of - the Wildbook project (Wild Me), walks through practical applications of AI for ecology, - biodiversity monitoring, and conservation.

    We cover core techniques—computer - vision, machine learning, and remote sensing—and their use in image-based monitoring - with camera traps, drones, and species identification. Tanya explains individual - identification and longitudinal tracking, habitat mapping and change detection, - and the data challenges of labeling, class imbalance, and sparse observations. The - conversation addresses integration of heterogeneous datasets, model robustness (domain - shift and transfer learning), and ethical considerations including Indigenous knowledge - and equity. You’ll also hear about scalable platforms like Wildbook, citizen science - workflows for crowdsourcing and quality control, policy relevance, open data and - FAIR principles, edge deployment in the field, and building sustainable monitoring - programs.

    Listen to gain concrete insights on tools, pitfalls, and next - steps for applying AI to conservation—what works now, what remains hard, and resources - to explore further. +description: "Discover AI-driven computer vision and remote sensing strategies to scale biodiversity monitoring, improve species ID, and inform conservation policy." +intro: "How can AI help close critical data gaps in biodiversity monitoring and turn images and sensor data into actionable conservation decisions? In this episode Tanya Berger-Wolf, a computational ecologist, director of TDAI@OSU, and co-founder of the Wildbook project (Wild Me), walks through practical applications of AI for ecology, biodiversity monitoring, and conservation.

    We cover core techniques—computer vision, machine learning, and remote sensing—and their use in image-based monitoring with camera traps, drones, and species identification. Tanya explains individual identification and longitudinal tracking, habitat mapping and change detection, and the data challenges of labeling, class imbalance, and sparse observations. The conversation addresses integration of heterogeneous datasets, model robustness (domain shift and transfer learning), and ethical considerations including Indigenous knowledge and equity. You’ll also hear about scalable platforms like Wildbook, citizen science workflows for crowdsourcing and quality control, policy relevance, open data and FAIR principles, edge deployment in the field, and building sustainable monitoring programs.

    Listen to gain concrete insights on tools, pitfalls, and next steps for applying AI to conservation—what works now, what remains hard, and resources to explore further." +topics: +- AI +- computer vision +- remote sensing +- MLOps +- data engineering dateadded: 2024-04-28 quotableClips: - name: Podcast Introduction diff --git a/_podcast/ai-in-healthcare-and-digital-therapeutics.md b/_podcast/ai-in-healthcare-and-digital-therapeutics.md index 5d23b7f3..20129c6a 100644 --- a/_podcast/ai-in-healthcare-and-digital-therapeutics.md +++ b/_podcast/ai-in-healthcare-and-digital-therapeutics.md @@ -1,6 +1,6 @@ --- -title: 'AI in Healthcare & Digital Therapeutics: Building Data Teams, Personalization, A/B Testing & Ethics' -short: Machine Learning and Personalization in Healthcare +title: "AI in Healthcare & Digital Therapeutics: Building Data Teams, Personalization, A/B Testing & Ethics" +short: "Machine Learning and Personalization in Healthcare" season: 8 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3s78PtlbUmecuMOXwO8aD5?si=991e1811a5204305 youtube: https://www.youtube.com/watch?v=IDzhmmKeNG4 -description: 'Learn to build data teams and ethical AI in healthcare: actionable personalization, A/B testing for digital therapeutics, GDPR-safe experiments.' -intro: How can AI power effective digital therapeutics while balancing personalization, rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — Director of Data, Analytics, and AI with a track record building ML and data teams at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches for AI in healthcare and digital therapeutics.

    We cover how machine learning is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s gamified digital therapeutics and quality-of-life goals; behavioral design that minimizes in-app time; and engagement strategies like charity incentives versus leaderboards. Stefan explains building the analytics foundation—data pipelines, dashboards, and experimentation capabilities—and why A/B testing and agenda-driven recommender systems are core to personalization. He also tackles data privacy and ethics (GDPR/HIPAA, de-identification), remote monitoring with wearables, clinical trials versus app experiments, managing medical risk, and hiring and scaling data, ML, and engineering teams.

    Listen to get concrete frameworks for building data teams, running safe, measurable experiments, designing personalized interventions, and embedding ethical safeguards into AI-driven digital therapeutics +description: "Learn to build data teams and ethical AI in healthcare: actionable personalization, A/B testing for digital therapeutics, GDPR-safe experiments." +intro: "How can AI power effective digital therapeutics while balancing personalization, rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — Director of Data, Analytics, and AI with a track record building ML and data teams at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches for AI in healthcare and digital therapeutics.

    We cover how machine learning is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s gamified digital therapeutics and quality-of-life goals; behavioral design that minimizes in-app time; and engagement strategies like charity incentives versus leaderboards. Stefan explains building the analytics foundation—data pipelines, dashboards, and experimentation capabilities—and why A/B testing and agenda-driven recommender systems are core to personalization. He also tackles data privacy and ethics (GDPR/HIPAA, de-identification), remote monitoring with wearables, clinical trials versus app experiments, managing medical risk, and hiring and scaling data, ML, and engineering teams.

    Listen to get concrete frameworks for building data teams, running safe, measurable experiments, designing personalized interventions, and embedding ethical safeguards into AI-driven digital therapeutics" topics: - machine learning - healthcare diff --git a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md index 06e76c14..aac0b8e4 100644 --- a/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md +++ b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md @@ -1,7 +1,6 @@ --- -title: 'Post-ChatGPT AI Infrastructure: Open Source Orchestration, On-Prem Economics - & Distributed Training at Scale' -short: Trends in AI Infrastructure +title: "Post-ChatGPT AI Infrastructure: Open Source Orchestration, On-Prem Economics & Distributed Training at Scale" +short: "Trends in AI Infrastructure" season: 20 episode: 1 guests: @@ -15,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/redefining-ai-infrastructure-open-source-chips-and/id1541710331?i=1000687565459 spotify: https://open.spotify.com/episode/5MIc1pAXPxVYSr0E4pndU4 youtube: https://www.youtube.com/watch?v=1aMuynlLM3o -description: 'Discover AI infrastructure strategies: open source orchestration, on-prem - economics and distributed training at scale to cut costs, boost performance and - control.' -intro: How has the rise of ChatGPT reshaped the infrastructure needed to build and - run large language models, and when does open source orchestration make sense compared - to cloud or proprietary systems? In this episode we speak with Andrey Cheptsov, - founder and CEO of dstack — an open-source alternative to Kubernetes and Slurm designed - to simplify AI infrastructure orchestration. Drawing on his decade-plus at JetBrains - building developer tools, Andrey frames practical trade-offs between on-prem economics - and cloud spend, the maturity of open source orchestration tools, and patterns for - distributed training at scale. We cover core topics including open source orchestration - for AI workloads, cost and operational considerations for on-prem deployments, and - strategies to scale distributed training efficiently and reliably. Listen to understand - when an open source approach like dstack is appropriate, what to evaluate in orchestration - tools, and how to balance performance, cost, and control as you scale AI projects - post-ChatGPT. This episode is for engineering leaders and ML infrastructure teams - seeking actionable insights on AI infrastructure, orchestration tools, on-prem economics, - and distributed training best practices. +description: "Discover AI infrastructure strategies: open source orchestration, on-prem economics and distributed training at scale to cut costs, boost performance and control." +topics: +- AI infrastructure +- MLOps +- LLMs +- open-source +- tools +intro: "How has the rise of ChatGPT reshaped the infrastructure needed to build and run large language models, and when does open source orchestration make sense compared to cloud or proprietary systems? In this episode we speak with Andrey Cheptsov, founder and CEO of dstack — an open-source alternative to Kubernetes and Slurm designed to simplify AI infrastructure orchestration. Drawing on his decade-plus at JetBrains building developer tools, Andrey frames practical trade-offs between on-prem economics and cloud spend, the maturity of open source orchestration tools, and patterns for distributed training at scale. We cover core topics including open source orchestration for AI workloads, cost and operational considerations for on-prem deployments, and strategies to scale distributed training efficiently and reliably. Listen to understand when an open source approach like dstack is appropriate, what to evaluate in orchestration tools, and how to balance performance, cost, and control as you scale AI projects post-ChatGPT. This episode is for engineering leaders and ML infrastructure teams seeking actionable insights on AI infrastructure, orchestration tools, on-prem economics, and distributed training best practices." dateadded: 2025-02-26 duration: PT01H06M04S quotableClips: diff --git a/_podcast/ai-ml-product-design-and-experimentation.md b/_podcast/ai-ml-product-design-and-experimentation.md index 494b6ec4..1d6ccf8a 100644 --- a/_podcast/ai-ml-product-design-and-experimentation.md +++ b/_podcast/ai-ml-product-design-and-experimentation.md @@ -1,6 +1,6 @@ --- -title: 'AI Product Design: Algorithm-Ready UX, Rapid Experiments & Data-Driven Roadmaps' -short: Innovation and Design for Machine Learning +title: "AI Product Design: Algorithm-Ready UX, Rapid Experiments & Data-Driven Roadmaps" +short: "Innovation and Design for Machine Learning" season: 8 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4vhTQJ6Aj9z5VHm9UsHspv youtube: https://www.youtube.com/watch?v=tcqBfZw41FM -description: 'Master AI product design: build algorithm-ready UX, run rapid experiments and craft data-driven roadmaps to prioritize innovation and ship measurable results.' -intro: How do you design products that are “algorithm-ready” while running rapid experiments and building data-driven roadmaps? In this episode, Liesbeth Dingemans—strategy and AI leader, founder of Dingemans Consulting, former VP of Revenue at Source.ag and Head of AI Strategy at Prosus—walks through pragmatic approaches to AI product design that bridge vision and execution.

    We cover algorithm-friendly UX and signal collection, a concrete interaction-design case study comparing TikTok and Instagram signals, and the Double Diamond framework for moving from problem framing to solution exploration. Liesbeth explains scoping and prioritization, parallel experiments and proofs of concept, one-week design sprints, appropriate timeframes for research-to-scale, and the role of designers, data scientists, engineers and product managers in shaping AI roadmaps.

    Listeners will learn how to avoid rework by involving data science early, use scoping documents to challenge assumptions, create measurable experiments (the Task Force/“Jet Ski” model), and build data-driven pitches for long-term bets versus quarterly OKRs. Tune in for concrete frameworks and practices to make AI product design, rapid experiments, and data-driven roadmaps work in your organization +description: "Master AI product design: build algorithm-ready UX, run rapid experiments and craft data-driven roadmaps to prioritize innovation and ship measurable results." +intro: "How do you design products that are “algorithm-ready” while running rapid experiments and building data-driven roadmaps? In this episode, Liesbeth Dingemans—strategy and AI leader, founder of Dingemans Consulting, former VP of Revenue at Source.ag and Head of AI Strategy at Prosus—walks through pragmatic approaches to AI product design that bridge vision and execution.

    We cover algorithm-friendly UX and signal collection, a concrete interaction-design case study comparing TikTok and Instagram signals, and the Double Diamond framework for moving from problem framing to solution exploration. Liesbeth explains scoping and prioritization, parallel experiments and proofs of concept, one-week design sprints, appropriate timeframes for research-to-scale, and the role of designers, data scientists, engineers and product managers in shaping AI roadmaps.

    Listeners will learn how to avoid rework by involving data science early, use scoping documents to challenge assumptions, create measurable experiments (the Task Force/“Jet Ski” model), and build data-driven pitches for long-term bets versus quarterly OKRs. Tune in for concrete frameworks and practices to make AI product design, rapid experiments, and data-driven roadmaps work in your organization" topics: - machine learning - design thinking @@ -88,7 +88,7 @@ quotableClips: startOffset: 1698 url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1698 endOffset: 1864 -- name: 'Scoping Documents: Challenging Assumptions with "Why"' +- name: 'Scoping Documents: Challenging Assumptions with "Why" startOffset: 1864 url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1864 endOffset: 2005 @@ -688,7 +688,7 @@ transcript: sec: 1817 time: '30:17' who: Liesbeth -- header: 'Scoping Documents: Challenging Assumptions with "Why"' +- header: 'Scoping Documents: Challenging Assumptions with "Why" - line: 'Let''s imagine we have this situation: a manager comes to me, or to the team, or to the product manager and says, “Hey, this is the problem we think we have. Let''s solve it with a neural network.” So how do we challenge that person? How diff --git a/_podcast/algorithmic-trading-with-python-and-machine-learning.md b/_podcast/algorithmic-trading-with-python-and-machine-learning.md index 86432a7c..c999c45a 100644 --- a/_podcast/algorithmic-trading-with-python-and-machine-learning.md +++ b/_podcast/algorithmic-trading-with-python-and-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Algorithmic Trading with Python: Backtesting, Risk Management and Deployment' -short: Stock Market Analysis with Python and Machine Learning +title: "Algorithmic Trading with Python: Backtesting, Risk Management and Deployment" +short: "Stock Market Analysis with Python and Machine Learning" season: 17 episode: 3 guests: @@ -14,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/stock-market-analysis-with-python-and-machine/id1541710331?i=1000641465239 spotify: https://open.spotify.com/episode/1ZXAeGr4Kx7F6oLQUip8Cc?si=KJwpYL-3SvuX8nPdc2cyOg youtube: https://www.youtube.com/watch?v=NThHAEIazFk -description: 'Master algorithmic trading: backtesting and risk management—learn practical - data sources, features, models & execution to build robust strategies.' -intro: How do you turn a trading idea into a robust, risk-managed algorithm in Python? - In this episode Ivan Brigida — analytics lead behind PythonInvest with 10+ years - in statistical modeling, forecasting, econometrics and finance — walks through practical - steps for algorithmic trading with Python, from data sourcing to deployment (and - a clear reminder this is educational, not investment advice).

    We cover - where retail traders get market data (Yahoo, Quandl, Polygon), OHLCV and adjusted-close - nuances, and a concrete mean-reversion example. Ivan explains backtesting methodology, - common pitfalls like time-series data leakage, and walk-forward simulation for realistic - validation. He breaks down risk management (stop-loss thresholds, position sizing), - execution and trading fees, plus evaluation metrics (ROI, precision) and defining - prediction targets (binary growth thresholds such as 5%).

    On the modeling - side you’ll hear practical feature engineering (time-window stats, handcrafted indicators), - model choices (logistic regression, XGBoost, neural nets), explainability via feature - importance, and deployment options (cron, Airflow, APIs, partial automation). Listen - to gain actionable guidance for building, validating, and deploying algorithmic - trading systems in Python. +description: "Master algorithmic trading: backtesting and risk management—learn practical data sources, features, models & execution to build robust strategies." +topics: +- machine learning +- data science +- MLOps +- algorithmic trading +- tools +intro: "How do you turn a trading idea into a robust, risk-managed algorithm in Python? In this episode Ivan Brigida — analytics lead behind PythonInvest with 10+ years in statistical modeling, forecasting, econometrics and finance — walks through practical steps for algorithmic trading with Python, from data sourcing to deployment (and a clear reminder this is educational, not investment advice).

    We cover where retail traders get market data (Yahoo, Quandl, Polygon), OHLCV and adjusted-close nuances, and a concrete mean-reversion example. Ivan explains backtesting methodology, common pitfalls like time-series data leakage, and walk-forward simulation for realistic validation. He breaks down risk management (stop-loss thresholds, position sizing), execution and trading fees, plus evaluation metrics (ROI, precision) and defining prediction targets (binary growth thresholds such as 5%).

    On the modeling side you’ll hear practical feature engineering (time-window stats, handcrafted indicators), model choices (logistic regression, XGBoost, neural nets), explainability via feature importance, and deployment options (cron, Airflow, APIs, partial automation). Listen to gain actionable guidance for building, validating, and deploying algorithmic trading systems in Python." dateadded: 2024-01-24 duration: PT01H40S quotableClips: diff --git a/_podcast/algorithms-data-structures-for-engineers.md b/_podcast/algorithms-data-structures-for-engineers.md index cdf3ab4b..205bbcf5 100644 --- a/_podcast/algorithms-data-structures-for-engineers.md +++ b/_podcast/algorithms-data-structures-for-engineers.md @@ -1,6 +1,6 @@ --- -title: 'Practical Algorithms for Engineers: Bloom Filters, Approximate Nearest-Neighbor & Performance' -short: Mastering Algorithms and Data Structures +title: "Practical Algorithms for Engineers: Bloom Filters, Approximate Nearest-Neighbor & Performance" +short: "Mastering Algorithms and Data Structures" season: 5 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5IM2Des1sjVIwrvB3dGoJN apple: https://podcasts.apple.com/us/podcast/mastering-algorithms-and-data-structures-marcello-la/id1541710331?i=1000534241523 -description: Learn Bloom filters, approximate nearest-neighbor and performance tuning to gain memory-efficient containment, fast vector search and practical profiling tips -intro: How do engineers choose and implement the right algorithm for memory, latency, and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com and author of Algorithms and Data Structures in Action, with experience at Twitter, Microsoft and Apple — walks through practical algorithmic solutions engineers can actually use in production. We focus on Bloom filters for memory-efficient containment checks (and real-world uses like crawlers, routing tables, and adtech device-ID targeting), and on approximate nearest-neighbour (ANN) strategies when KD-trees break down for high-dimensional data — covering R-trees, SS-trees, vector similarity, embeddings and Faiss. Along the way Marcello discusses core data structures, profiling and performance pitfalls, abstraction vs implementation trade-offs, cross-language serialization, and language performance choices (Python vs C++ and Cython). If you want actionable guidance — including when to trust libraries versus inspect internals, practical code in Java/JavaScript/Python, and study resources to get hands-on — this episode gives concrete patterns, trade-offs, and examples you can apply to improve search, recommendation, and large-scale systems performance +description: "Learn Bloom filters, approximate nearest-neighbor and performance tuning to gain memory-efficient containment, fast vector search and practical profiling tips" +intro: "How do engineers choose and implement the right algorithm for memory, latency, and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com and author of Algorithms and Data Structures in Action, with experience at Twitter, Microsoft and Apple — walks through practical algorithmic solutions engineers can actually use in production. We focus on Bloom filters for memory-efficient containment checks (and real-world uses like crawlers, routing tables, and adtech device-ID targeting), and on approximate nearest-neighbour (ANN) strategies when KD-trees break down for high-dimensional data — covering R-trees, SS-trees, vector similarity, embeddings and Faiss. Along the way Marcello discusses core data structures, profiling and performance pitfalls, abstraction vs implementation trade-offs, cross-language serialization, and language performance choices (Python vs C++ and Cython). If you want actionable guidance — including when to trust libraries versus inspect internals, practical code in Java/JavaScript/Python, and study resources to get hands-on — this episode gives concrete patterns, trade-offs, and examples you can apply to improve search, recommendation, and large-scale systems performance" topics: - algorithms - data structures diff --git a/_podcast/analytics-engineer-skills-tools.md b/_podcast/analytics-engineer-skills-tools.md index 0d124ef2..8eb99030 100644 --- a/_podcast/analytics-engineer-skills-tools.md +++ b/_podcast/analytics-engineer-skills-tools.md @@ -1,6 +1,6 @@ --- -title: 'Master Analytics Engineering: Skills, Toolstack, Career Roadmap' -short: 'Analytics Engineer: New Role in a Data Team' +title: "Master Analytics Engineering: Skills, Toolstack, Career Roadmap" +short: "Analytics Engineer: New Role in a Data Team" season: 3 episode: 11 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4rLQ5ulsYR9LqXxbFe2MlN apple: https://podcasts.apple.com/us/podcast/analytics-engineer-new-role-in-data-team-victoria-perez/id1541710331?i=1000526036141 -description: 'Master analytics engineering with dbt and data modeling: learn pipelines, testing, Snowflake basics and a clear career roadmap to advance your data career.' -intro: How do you become an effective analytics engineer and what skills, tools, and career steps matter most? In this episode, Victoria Perez Mola—born in Argentina, trained as a Systems Engineer and now an Analytics Engineer at Tier in Berlin—walks us through her move from ERP and finance reporting into analytics engineering. We cover daily responsibilities like data modeling, pipelines, data quality and Looker; the DBT workflow (SQL transformations, version control, tests, DAG); and a practical analytics toolstack including DBT, Snowflake, Adlib ETL and Looker. Victoria contrasts analytics engineer, data analyst and data engineer roles, explains role origins, and outlines typical job expectations such as pipeline ownership, auditing and dashboarding. She digs into core skills—SQL, dimensional modeling, Snowflake—strategies for handling bad data and schema changes with DBT macros and tests, and team structures from platform teams to embedded roles. Listen for a clear career roadmap, concrete learning resources (DBT tutorials and an 'Analytics readings' Notion list), and indicators of role fit if you enjoy modeling, data quality and engineering best practices +description: "Master analytics engineering with dbt and data modeling: learn pipelines, testing, Snowflake basics and a clear career roadmap to advance your data career." +intro: "How do you become an effective analytics engineer and what skills, tools, and career steps matter most? In this episode, Victoria Perez Mola—born in Argentina, trained as a Systems Engineer and now an Analytics Engineer at Tier in Berlin—walks us through her move from ERP and finance reporting into analytics engineering. We cover daily responsibilities like data modeling, pipelines, data quality and Looker; the DBT workflow (SQL transformations, version control, tests, DAG); and a practical analytics toolstack including DBT, Snowflake, Adlib ETL and Looker. Victoria contrasts analytics engineer, data analyst and data engineer roles, explains role origins, and outlines typical job expectations such as pipeline ownership, auditing and dashboarding. She digs into core skills—SQL, dimensional modeling, Snowflake—strategies for handling bad data and schema changes with DBT macros and tests, and team structures from platform teams to embedded roles. Listen for a clear career roadmap, concrete learning resources (DBT tutorials and an 'Analytics readings' Notion list), and indicators of role fit if you enjoy modeling, data quality and engineering best practices" topics: - analytics engineering dateadded: 2021-06-19 diff --git a/_podcast/analytics-to-data-science-with-kaggle-portfolio.md b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md index 13ec1681..918bbcdd 100644 --- a/_podcast/analytics-to-data-science-with-kaggle-portfolio.md +++ b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md @@ -1,6 +1,6 @@ --- -title: 'Career Transition from Analytics to Data Science: Build a Kaggle Notebook Portfolio, Learn Python & Get Hired' -short: Shifting Career from Analytics to Data Science +title: "Career Transition from Analytics to Data Science: Build a Kaggle Notebook Portfolio, Learn Python & Get Hired" +short: "Shifting Career from Analytics to Data Science" season: 3 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1GVuHJzqbcf2BvaLBTgsAL apple: https://podcasts.apple.com/us/podcast/shifting-career-from-analytics-to-data-science-andrada/id1541710331?i=1000517426368 -description: Build a Kaggle portfolio, learn Python to pivot from analytics to data science—hands-on notebooks, interview prep and hiring strategies to get hired -intro: 'How do you move from analytics into a hireable data science role by building a Kaggle notebook portfolio and learning Python fast? In this episode, Andrada Olteanu — Data Scientist at Endava, Kaggle Notebooks Master, and Z by HP & NVIDIA Data Science Ambassador — walks through her path from a statistics degree and data analyst role at Avon to a master’s in DS and a practical, project-driven transition.

    We cover concrete steps: recommended courses like Jose Portilla’s “Python for Data Science & Machine Learning,” using Kaggle as your primary practice environment, and specific notebook work such as the Iowa House Prices project with hyperparameter tuning. Andrada explains how to translate academic dissertations into public notebooks, decompose and reimplement kernels to grow coding skills, and leverage mentorship (including connecting with Gabi Preda on Kaggle) during the job search. Listeners will also learn how to present work on Kaggle and GitHub, navigate interview expectations (algorithmic coding tests vs practical ML), and use LinkedIn/Twitter for networking.

    If you’re building a Kaggle notebook portfolio, learning Python, and aiming for data science roles, this episode gives a practical, step-by-step roadmap.' +description: "Build a Kaggle portfolio, learn Python to pivot from analytics to data science—hands-on notebooks, interview prep and hiring strategies to get hired" +intro: "How do you move from analytics into a hireable data science role by building a Kaggle notebook portfolio and learning Python fast? In this episode, Andrada Olteanu — Data Scientist at Endava, Kaggle Notebooks Master, and Z by HP & NVIDIA Data Science Ambassador — walks through her path from a statistics degree and data analyst role at Avon to a master’s in DS and a practical, project-driven transition.

    We cover concrete steps: recommended courses like Jose Portilla’s “Python for Data Science & Machine Learning,” using Kaggle as your primary practice environment, and specific notebook work such as the Iowa House Prices project with hyperparameter tuning. Andrada explains how to translate academic dissertations into public notebooks, decompose and reimplement kernels to grow coding skills, and leverage mentorship (including connecting with Gabi Preda on Kaggle) during the job search. Listeners will also learn how to present work on Kaggle and GitHub, navigate interview expectations (algorithmic coding tests vs practical ML), and use LinkedIn/Twitter for networking.

    If you’re building a Kaggle notebook portfolio, learning Python, and aiming for data science roles, this episode gives a practical, step-by-step roadmap." topics: - career transition - analytics diff --git a/_podcast/applied-llm-research-and-career-growth-in-practice.md b/_podcast/applied-llm-research-and-career-growth-in-practice.md index a6aa69ae..f7da283c 100644 --- a/_podcast/applied-llm-research-and-career-growth-in-practice.md +++ b/_podcast/applied-llm-research-and-career-growth-in-practice.md @@ -1,7 +1,6 @@ --- -title: 'Applied LLM Research & Career Growth: Long-Context Evaluation, Prototyping - & Industry Publishing' -short: Build a Strong Career in Data +title: "Applied LLM Research & Career Growth: Long-Context Evaluation, Prototyping & Industry Publishing" +short: "Build a Strong Career in Data" season: 20 episode: 7 guests: @@ -15,23 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/build-a-strong-career-in-data-lavanya-gupta/id1541710331?i=1000706988972 spotify: https://open.spotify.com/episode/2mJXd0lSZFPKJA0ZrG9iS2 youtube: https://www.youtube.com/watch?v=ekG5zJioyFs -description: Learn LLM research tactics, long-context evaluation approaches and prototyping - tips to boost your career, publish industry work, and ship impactful models. -intro: How do you evaluate and prototype long-context LLMs in a real-world setting - while advancing a career as an applied researcher? In this episode Lavanya Gupta - — a Carnegie Mellon Language Technologies Institute alum and Sr. AI/ML Applied Scientist - at JPMorgan Chase’s Machine Learning Center of Excellence — walks through practical - strategies for applied LLM research and career growth. With 5+ years of industrial - research experience, public talks at WiDS, PyData, TensorFlow User Group and reviewer - roles for NeurIPS 2024, ICLR 2025 and NAACL 2025, Lavanya connects technical practice - with professional development.

    We cover core topics including long-context - evaluation methodologies for transformer models, rapid prototyping workflows for - LLM systems, and best practices for industry publishing and technical communication. - Listeners will get actionable guidance on setting up reproducible experiments, balancing - research rigor with product timelines, and positioning industry work for peer-reviewed - venues. This episode is for machine learning engineers, NLP researchers, and applied - scientists seeking concrete tactics for prototyping LLMs, conducting robust long-context - evaluations, and growing a research-oriented career in industry. +description: "Learn LLM research tactics, long-context evaluation approaches and prototyping tips to boost your career, publish industry work, and ship impactful models." +topics: +- LLMs +- NLP +- MLOps +- applied research +- career growth +intro: "How do you evaluate and prototype long-context LLMs in a real-world setting while advancing a career as an applied researcher? In this episode Lavanya Gupta — a Carnegie Mellon Language Technologies Institute alum and Sr. AI/ML Applied Scientist at JPMorgan Chase’s Machine Learning Center of Excellence — walks through practical strategies for applied LLM research and career growth. With 5+ years of industrial research experience, public talks at WiDS, PyData, TensorFlow User Group and reviewer roles for NeurIPS 2024, ICLR 2025 and NAACL 2025, Lavanya connects technical practice with professional development.

    We cover core topics including long-context evaluation methodologies for transformer models, rapid prototyping workflows for LLM systems, and best practices for industry publishing and technical communication. Listeners will get actionable guidance on setting up reproducible experiments, balancing research rigor with product timelines, and positioning industry work for peer-reviewed venues. This episode is for machine learning engineers, NLP researchers, and applied scientists seeking concrete tactics for prototyping LLMs, conducting robust long-context evaluations, and growing a research-oriented career in industry." dateadded: 2025-05-12 duration: PT00H58M10S quotableClips: @@ -99,7 +89,7 @@ quotableClips: startOffset: 2252 url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2252 endOffset: 2473 -- name: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows"' +- name: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows" startOffset: 2473 url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2473 endOffset: 2724 @@ -878,7 +868,7 @@ transcript: sec: 2403 time: '40:03' who: Alexey -- header: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows"' +- header: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows" - line: Yeah, definitely. I mentioned luck because, as you said, at that time-during COVID-people were really active on Kaggle. Maybe the timing was luck, but it wasn't a random decision to just get up one day and scrape Google Play Store. diff --git a/_podcast/bayesian-modeling-workflows-and-tools.md b/_podcast/bayesian-modeling-workflows-and-tools.md index f4dc49f5..27a5d661 100644 --- a/_podcast/bayesian-modeling-workflows-and-tools.md +++ b/_podcast/bayesian-modeling-workflows-and-tools.md @@ -1,6 +1,6 @@ --- -title: 'Bayesian Modeling: PyMC, Stan and Probabilistic Programming Workflows' -short: Bayesian Modeling and Probabilistic Programming +title: "Bayesian Modeling: PyMC, Stan and Probabilistic Programming Workflows" +short: "Bayesian Modeling and Probabilistic Programming" season: 17 episode: 4 guests: @@ -14,25 +14,13 @@ links: apple: https://podcasts.apple.com/us/podcast/bayesian-modeling-and-probabilistic-programming-rob/id1541710331?i=1000642253191 spotify: https://open.spotify.com/episode/5WUKDcTYv8ZvnqeHSQT7FF?si=K10siPBHQwmegCCXJ1VpIA youtube: https://www.youtube.com/watch?v=kcKvUSInm-M -description: 'Discover Bayesian modeling with PyMC and Stan: learn priors, MCMC/HMC - sampling, probabilistic programming workflows to build, debug and refine robust - models.' -intro: How do you move from point estimates to full uncertainty-aware models and choose - the right tools and workflows for Bayesian modeling? In this episode Rob Zinkov, - a machine learning engineer and former Indiana University research scientist who - led development of the Hakaru probabilistic programming language, walks through - practical Bayesian workflows and tool choices. We cover the core challenge of encoding - priors, likelihoods, and posteriors; why integrals become intractable and how numerical - integration and sampling (MCMC, Hamiltonian Monte Carlo, NUTS) approximate expectations; - and the trade-offs between probabilistic languages and libraries. Rob explains career - lessons on moving from software engineering to ML research, the essential math (calculus, - linear algebra, optimization), and self-study strategies for statistics. Concrete - topics include PyMC examples (a rainfall model and computational graph), Stan’s - advances in efficient sampling, composing hierarchical and spatial models, diagnosing - multimodality and uncertainty, and automating model tasks with probabilistic programming - (Hakaru). Listen to gain a clearer, practical understanding of Bayesian modeling, - when to use PyMC vs Stan, how samplers work, and recommended resources to build - your workflow. +description: "Discover Bayesian modeling with PyMC and Stan: learn priors, MCMC/HMC sampling, probabilistic programming workflows to build, debug and refine robust models." +topics: +- probabilistic programming +- bayesian statistics +- machine learning +- tools +intro: "How do you move from point estimates to full uncertainty-aware models and choose the right tools and workflows for Bayesian modeling? In this episode Rob Zinkov, a machine learning engineer and former Indiana University research scientist who led development of the Hakaru probabilistic programming language, walks through practical Bayesian workflows and tool choices. We cover the core challenge of encoding priors, likelihoods, and posteriors; why integrals become intractable and how numerical integration and sampling (MCMC, Hamiltonian Monte Carlo, NUTS) approximate expectations; and the trade-offs between probabilistic languages and libraries. Rob explains career lessons on moving from software engineering to ML research, the essential math (calculus, linear algebra, optimization), and self-study strategies for statistics. Concrete topics include PyMC examples (a rainfall model and computational graph), Stan’s advances in efficient sampling, composing hierarchical and spatial models, diagnosing multimodality and uncertainty, and automating model tasks with probabilistic programming (Hakaru). Listen to gain a clearer, practical understanding of Bayesian modeling, when to use PyMC vs Stan, how samplers work, and recommended resources to build your workflow." dateadded: '2024-01-22' duration: PT01H05M05S quotableClips: diff --git a/_podcast/becoming-data-freelancer.md b/_podcast/becoming-data-freelancer.md index 1a18ff95..c929bddf 100644 --- a/_podcast/becoming-data-freelancer.md +++ b/_podcast/becoming-data-freelancer.md @@ -1,6 +1,6 @@ --- -title: 'Becoming a Data Freelancer: Pricing, Client Acquisition and Contract Strategy' -short: Become a Data Freelancer +title: "Becoming a Data Freelancer: Pricing, Client Acquisition and Contract Strategy" +short: "Become a Data Freelancer" season: 16 episode: 9 guests: @@ -14,24 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/become-a-data-freelancer-dimitri-visnadi/id1541710331?i=1000637962993 spotify: https://open.spotify.com/episode/5OJfRiQ64JtLUmIkvadohg?si=uUEdvZwARN2hVGEfz73URg youtube: https://www.youtube.com/watch?v=R_EnSa9aZtE -description: Master data freelancer pricing, client acquisition and contract strategy—learn - rate benchmarking, outreach tactics, client vetting and runway planning. -intro: 'How do you move from corporate analytics to independent data consulting while - pricing services fairly, finding steady clients, and avoiding contract pitfalls? - In this episode Dimitri Visnadi — an independent data consultant who has advised - brands like Unilever, Ferrero, Heineken and Red Bull and who trained at UCL and - HP — walks through the practical realities of becoming a data freelancer.

    - We cover the full transition: career pivoting from marketing to data, early startup - and corporate analytics experience, and the decision to resign and pursue freelancing. - Dimitri breaks down client acquisition tactics (cold outreach, recruiter channels, - proactive self-marketing), pricing strategy (platforms vs direct contracting, rate - benchmarking, project pricing), and contract strategy (dependent contractor risk, - platform terms vs direct agreements, subcontracting). He also addresses vetting - clients, payment delays, financial runway recommendations, common pitfalls like - mispositioning and mispricing, and learning resources including the Data Freelancer - newsletter.

    Listen to get actionable guidance on pricing, client acquisition, - contract formats, and the business practices that help sustain a freelance data - consultancy.' +description: "Master data freelancer pricing, client acquisition and contract strategy—learn rate benchmarking, outreach tactics, client vetting and runway planning." +topics: +- freelance +- consulting +- data analytics +- career transition +- career growth +- data science + +intro: "How do you move from corporate analytics to independent data consulting while pricing services fairly, finding steady clients, and avoiding contract pitfalls? In this episode Dimitri Visnadi — an independent data consultant who has advised brands like Unilever, Ferrero, Heineken and Red Bull and who trained at UCL and HP — walks through the practical realities of becoming a data freelancer.

    We cover the full transition: career pivoting from marketing to data, early startup and corporate analytics experience, and the decision to resign and pursue freelancing. Dimitri breaks down client acquisition tactics (cold outreach, recruiter channels, proactive self-marketing), pricing strategy (platforms vs direct contracting, rate benchmarking, project pricing), and contract strategy (dependent contractor risk, platform terms vs direct agreements, subcontracting). He also addresses vetting clients, payment delays, financial runway recommendations, common pitfalls like mispositioning and mispricing, and learning resources including the Data Freelancer newsletter.

    Listen to get actionable guidance on pricing, client acquisition, contract formats, and the business practices that help sustain a freelance data consultancy." dateadded: 2023-12-09 date: 2025-11-07 duration: PT00H59M49S diff --git a/_podcast/big-data-analytics-and-postdoc-research.md b/_podcast/big-data-analytics-and-postdoc-research.md index 04da239a..f45fa345 100644 --- a/_podcast/big-data-analytics-and-postdoc-research.md +++ b/_podcast/big-data-analytics-and-postdoc-research.md @@ -1,6 +1,6 @@ --- -title: 'Master Spatial Big Data Analytics: Nebula Stream Systems, Postdoc Mentoring & PhD Tips' -short: 'Advancing Big Data Analytics: Post-Doctoral Research' +title: "Master Spatial Big Data Analytics: Nebula Stream Systems, Postdoc Mentoring & PhD Tips" +short: "Advancing Big Data Analytics: Post-Doctoral Research" season: 6 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6rgBSTPRvgNcJ7ouFyZmbH apple: https://podcasts.apple.com/us/podcast/advancing-big-data-analytics-post-doctoral-research/id1541710331?i=1000543884294 -description: 'Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, publishing, time-management and stream-processing tactics to boost your research.' -intro: How do you master spatial big data analytics while navigating the demands of postdoc research, systems building, and preparing for a PhD? In this episode, Eleni Tzirita-Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with a PhD from EPFL and award-winning work in data management — breaks down practical approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) and robust stream processing for IoT. We cover systems-driven research like the Nebula Stream and Agora infrastructure, spotting research trends via conferences and reviewing, and aligning academic work with industry needs. Eleni also outlines the postdoc role (mentoring, teaching, reviewing, dissemination), time management strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD or master’s thesis. Listeners will gain concrete guidance on research priorities beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, data cleaning evaluation challenges, and steps to increase diversity in CS. Tune in for actionable postdoc mentoring and PhD tips grounded in spatial big data and stream processing research +description: "Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, publishing, time-management and stream-processing tactics to boost your research." +intro: "How do you master spatial big data analytics while navigating the demands of postdoc research, systems building, and preparing for a PhD? In this episode, Eleni Tzirita-Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with a PhD from EPFL and award-winning work in data management — breaks down practical approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) and robust stream processing for IoT. We cover systems-driven research like the Nebula Stream and Agora infrastructure, spotting research trends via conferences and reviewing, and aligning academic work with industry needs. Eleni also outlines the postdoc role (mentoring, teaching, reviewing, dissemination), time management strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD or master’s thesis. Listeners will gain concrete guidance on research priorities beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, data cleaning evaluation challenges, and steps to increase diversity in CS. Tune in for actionable postdoc mentoring and PhD tips grounded in spatial big data and stream processing research" topics: - academia - big data analytics diff --git a/_podcast/big-data-engineer-vs-data-scientist.md b/_podcast/big-data-engineer-vs-data-scientist.md index da33386f..5dccb708 100644 --- a/_podcast/big-data-engineer-vs-data-scientist.md +++ b/_podcast/big-data-engineer-vs-data-scientist.md @@ -1,6 +1,6 @@ --- -title: 'Big Data Engineer vs Data Scientist: Skills, Tools, and Career Paths' -short: Big Data Engineer vs Data Scientist +title: "Big Data Engineer vs Data Scientist: Skills, Tools, and Career Paths" +short: "Big Data Engineer vs Data Scientist" season: 4 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/08Mb5JOOo6sWOFgsXILVsj apple: https://podcasts.apple.com/us/podcast/big-data-engineer-vs-data-scientist-roksolana-diachuk/id1541710331?i=1000528386609 -description: Discover how Big Data Engineer vs Data Scientist roles differ — skills, performance optimization, ETL pipelines and ML deployment tips to advance your career -intro: 'How do the day-to-day responsibilities and skill sets really differ between a Big Data Engineer and a Data Scientist—and what should you learn to move between those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her career transition from backend Java into big data engineering and R&D.

    We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala and Parquet formats—plus performance tuning: Spark job optimization, cluster resource planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries (data cleaning and feature engineering for data scientists vs pipeline design and formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning with Delta Lake, observability, documentation, starter projects and learning resources.

    Listen to learn which skills, tools and projects will help you choose or transition between careers, and what to prioritize when building scalable data pipelines, deploying models, and ensuring data quality.' +description: "Discover how Big Data Engineer vs Data Scientist roles differ — skills, performance optimization, ETL pipelines and ML deployment tips to advance your career" +intro: "How do the day-to-day responsibilities and skill sets really differ between a Big Data Engineer and a Data Scientist—and what should you learn to move between those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her career transition from backend Java into big data engineering and R&D.

    We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala and Parquet formats—plus performance tuning: Spark job optimization, cluster resource planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries (data cleaning and feature engineering for data scientists vs pipeline design and formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning with Delta Lake, observability, documentation, starter projects and learning resources.

    Listen to learn which skills, tools and projects will help you choose or transition between careers, and what to prioritize when building scalable data pipelines, deploying models, and ensuring data quality." topics: - career transition - software engineering diff --git a/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md index 570cf309..588e027a 100644 --- a/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md +++ b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md @@ -1,6 +1,6 @@ --- -title: 'Actionable Biohacks to Boost Productivity: Sleep, Circadian Light, Dopamine & Habits' -short: Biohacking for Data Scientists and ML Engineers +title: "Actionable Biohacks to Boost Productivity: Sleep, Circadian Light, Dopamine & Habits" +short: "Biohacking for Data Scientists and ML Engineers" season: 13 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6IuHKMK4CJdcVJNq9uQ9lm?si=PgXZHBCNSu21Nma1ToxGyQ youtube: https://www.youtube.com/watch?v=uyxUBADZYpU -description: Discover actionable biohacks for sleep and dopamine to boost productivity with 90-min cycles, morning light, habit tracking and energy-focused routines -intro: How do small, science-aligned biohacks actually move the needle on focus and productivity? In this episode, Ruslan Shchuchkin, a Berlin-based data scientist who transitioned from business/marketing into data science after experimenting with many techniques to stay focused, walks through practical, evidence-minded strategies for improving performance. We cover the root causes of procrastination and perfectionism, behavioral biohacking versus chemical interventions, and how dopamine-driven problem-solving fuels habits. Ruslan explains meditation and NSDR for prefrontal focus, morning sun and circadian light exposure to regulate cortisol and melatonin, and daylight lamps and wake lighting for low-daylight homes. He shares sleep planning tips based on 90-minute cycles, protein-forward nutrition for sustained focus, and caffeine timing trade-offs. You’ll also hear about habit tracking (logs and Notion dashboards), voluntary discomfort as a dopamine reset, failed experiments worth avoiding, safety considerations, and a simple prioritization framework. If you want actionable biohacks—sleep, circadian light, dopamine management, habit tracking, and meditation—to boost sustainable productivity, this episode offers concrete, practical steps grounded in real-world experience +description: "Discover actionable biohacks for sleep and dopamine to boost productivity with 90-min cycles, morning light, habit tracking and energy-focused routines" +intro: "How do small, science-aligned biohacks actually move the needle on focus and productivity? In this episode, Ruslan Shchuchkin, a Berlin-based data scientist who transitioned from business/marketing into data science after experimenting with many techniques to stay focused, walks through practical, evidence-minded strategies for improving performance. We cover the root causes of procrastination and perfectionism, behavioral biohacking versus chemical interventions, and how dopamine-driven problem-solving fuels habits. Ruslan explains meditation and NSDR for prefrontal focus, morning sun and circadian light exposure to regulate cortisol and melatonin, and daylight lamps and wake lighting for low-daylight homes. He shares sleep planning tips based on 90-minute cycles, protein-forward nutrition for sustained focus, and caffeine timing trade-offs. You’ll also hear about habit tracking (logs and Notion dashboards), voluntary discomfort as a dopamine reset, failed experiments worth avoiding, safety considerations, and a simple prioritization framework. If you want actionable biohacks—sleep, circadian light, dopamine management, habit tracking, and meditation—to boost sustainable productivity, this episode offers concrete, practical steps grounded in real-world experience" topics: - biohacking - productivity diff --git a/_podcast/bioinformatics-worflows-tools-and-data-science.md b/_podcast/bioinformatics-worflows-tools-and-data-science.md index da9c1268..62777571 100644 --- a/_podcast/bioinformatics-worflows-tools-and-data-science.md +++ b/_podcast/bioinformatics-worflows-tools-and-data-science.md @@ -1,8 +1,6 @@ --- -title: 'Bioinformatics Workflows in Practice: Sequencing, Metagenomics, and Open-Source - Tools' -short: Applying Data Science Concepts, Tools, and Workflows to Accelerate Biological - Research +title: "Bioinformatics Workflows in Practice: Sequencing, Metagenomics, and Open-Source Tools" +short: "Applying Data Science Concepts, Tools, and Workflows to Accelerate Biological Research" season: 22 episode: 3 guests: @@ -16,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/from-biotechnology-to-bioinformatics-software-sebastian/id1541710331?i=1000733347636 spotify: https://open.spotify.com/episode/3CohNIXZdooLYoIyIbr6EF youtube: https://www.youtube.com/watch?v=ZFrcrTtnB1Q -description: Master bioinformatics workflows for sequencing & metagenomics with open-source - tools, streamline pipelines, boost reproducibility, and speed analyses. -intro: How do you build reproducible, scalable bioinformatics workflows for sequencing - and metagenomics using open-source tools? In this episode we explore practical answers - with Sebastian Ayala Ruano, a bioinformatics software developer and Master's student - in Systems Biology at Maastricht University. Sebastian has contributed to open-source - projects such as MicW2Graph, VueGen, and VueCore to simplify multi-omics data analysis - and has a background in cheminformatics, peptide discovery, and network-based analysis. -

    We discuss real-world sequencing and metagenomics workflows, trade-offs - in pipeline design, and how open-source tools and educational software can accelerate - reproducible research. Sebastian also outlines how machine learning and network - science concepts inform analysis strategies for complex biological data. Key topics - include sequencing data processing, metagenomic analysis approaches, workflow automation, - and practical considerations for integrating multi-omics datasets.

    Listeners - will gain concrete guidance for designing bioinformatics pipelines, selecting open-source - tools, and applying network- and ML-driven methods to improve interpretation. This - episode is useful for researchers and developers wanting actionable perspectives - on sequencing, metagenomics, and building reliable workflows backed by community - tools and resources. +description: "Master bioinformatics workflows for sequencing & metagenomics with open-source tools, streamline pipelines, boost reproducibility, and speed analyses." +topics: +- bioinformatics +- LLMs +- MLOps +- open-source +- tools +intro: "How do you build reproducible, scalable bioinformatics workflows for sequencing and metagenomics using open-source tools? In this episode we explore practical answers with Sebastian Ayala Ruano, a bioinformatics software developer and Master's student in Systems Biology at Maastricht University. Sebastian has contributed to open-source projects such as MicW2Graph, VueGen, and VueCore to simplify multi-omics data analysis and has a background in cheminformatics, peptide discovery, and network-based analysis.

    We discuss real-world sequencing and metagenomics workflows, trade-offs in pipeline design, and how open-source tools and educational software can accelerate reproducible research. Sebastian also outlines how machine learning and network science concepts inform analysis strategies for complex biological data. Key topics include sequencing data processing, metagenomic analysis approaches, workflow automation, and practical considerations for integrating multi-omics datasets.

    Listeners will gain concrete guidance for designing bioinformatics pipelines, selecting open-source tools, and applying network- and ML-driven methods to improve interpretation. This episode is useful for researchers and developers wanting actionable perspectives on sequencing, metagenomics, and building reliable workflows backed by community tools and resources." dateadded: 2025-10-27 duration: PT00H55M13S quotableClips: diff --git a/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md index e1bd1c28..a0844aac 100644 --- a/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md +++ b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md @@ -1,7 +1,6 @@ --- -title: 'Building Agentic AI Systems: Pragmatic Agent Engineering, Tooling, Retrieval - & Evaluation' -short: Building reliable AI products in the era of Gen AI and Agents +title: "Building Agentic AI Systems: Pragmatic Agent Engineering, Tooling, Retrieval & Evaluation" +short: "Building reliable AI products in the era of Gen AI and Agents" season: 22 episode: 1 guests: @@ -15,23 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/building-reliable-ai-products-in-the-era-of-gen/id1541710331?i=1000731199709 spotify: https://open.spotify.com/episode/7c22vqYNuNLKKYEfYGOos8?si=NBFT2e80S6WErW_tDDrijA youtube: https://www.youtube.com/watch?v=x2AAjqz2XmM -description: 'Discover agentic AI tactics: practical agent engineering and retrieval - strategies to build robust autonomous systems, boost performance and ensure reliability.' -intro: 'How do you build reliable, agentic AI systems that balance practical engineering, - tooling, retrieval, and robust evaluation? In this episode Ranjitha Kulkarni, Staff - Machine Learning Engineer at NeuBird.ai and former engineer on LLM- and agent-powered - product features at Dropbox Dash and Microsoft, explores pragmatic approaches to - agent design. Drawing on her work in speech recognition, language modeling, assistant - evaluation, and publications on voice query reformulation and automatic online evaluation, - Ranjitha discusses key elements of agent engineering: selecting and integrating - tools, designing effective retrieval pipelines, and establishing meaningful evaluation - metrics for intelligent assistants.

    Listeners will get a grounded look - at the trade-offs of agentic AI in real products, how retrieval strategies impact - reasoning and performance, and practical evaluation frameworks to measure assistant - behavior. If you’re building LLM-powered agents, improving tool use, or defining - evaluation for agentic systems, this episode offers actionable perspectives rooted - in production experience and research. Keywords: agentic AI, agent engineering, - agent tooling, retrieval, agent evaluation, LLM-powered products.' +description: "Discover agentic AI tactics: practical agent engineering and retrieval strategies to build robust autonomous systems, boost performance and ensure reliability." +topics: +- LLMs +- AI +- agent engineering +- retrieval-augmented generation +- MLOps +- tools +intro: "How do you build reliable, agentic AI systems that balance practical engineering, tooling, retrieval, and robust evaluation? In this episode Ranjitha Kulkarni, Staff Machine Learning Engineer at NeuBird.ai and former engineer on LLM- and agent-powered product features at Dropbox Dash and Microsoft, explores pragmatic approaches to agent design. Drawing on her work in speech recognition, language modeling, assistant evaluation, and publications on voice query reformulation and automatic online evaluation, Ranjitha discusses key elements of agent engineering: selecting and integrating tools, designing effective retrieval pipelines, and establishing meaningful evaluation metrics for intelligent assistants.

    Listeners will get a grounded look at the trade-offs of agentic AI in real products, how retrieval strategies impact reasoning and performance, and practical evaluation frameworks to measure assistant behavior. If you’re building LLM-powered agents, improving tool use, or defining evaluation for agentic systems, this episode offers actionable perspectives rooted in production experience and research. Keywords: agentic AI, agent engineering, agent tooling, retrieval, agent evaluation, LLM-powered products." dateadded: 2025-10-21 duration: PT00H59M23S quotableClips: diff --git a/_podcast/building-ai-digital-health-startups.md b/_podcast/building-ai-digital-health-startups.md index 4dc4670b..73f3ea3e 100644 --- a/_podcast/building-ai-digital-health-startups.md +++ b/_podcast/building-ai-digital-health-startups.md @@ -1,6 +1,6 @@ --- -title: 'Building Digital Health Startups: MVP Strategy, AI Diagnosis and Telemedicine' -short: AI for Digital Health +title: "Building Digital Health Startups: MVP Strategy, AI Diagnosis and Telemedicine" +short: "AI for Digital Health" season: 16 episode: 8 guests: @@ -14,25 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/ai-for-digital-health-maria-bruckert/id1541710331?i=1000637212773 spotify: https://open.spotify.com/episode/2NE0vbiYwXxOuqychHIqBR?si=QdRyuJvSRE2V3bLwHaEv-Q youtube: https://www.youtube.com/watch?v=whpkDmVVGUE -description: Discover actionable digital health MVP strategy, telemedicine tactics - to build, validate and monetize a scalable healthcare startup with faster remote - care. -intro: How do you build a digital health startup that ships a focused MVP, uses AI - for diagnosis, and delivers care via telemedicine while overcoming data gaps and - legacy workflows? In this episode Maria-Liisa Bruckert, Co-Founder and Co-CEO of - SQIN and recipient of the Google Play Best of 2020 award and Google Female Founder - Immersion 2020, walks through her transition from electrical engineering to health - tech and the practical playbook she uses to de-risk product development.

    - We cover MVP strategy and market research tactics—cold outreach, accelerators, clinical - meetings—and unconventional experiments like an AR “lipstick try-on” to collect - engagement data. Maria explains how SQIN aligns AI diagnosis with concrete business - cases, builds a digital clinic flow from diagnosis to prescription, and uses telemedicine - for remote follow-up and prescriptions. You’ll also hear about data strategy and - community bootstrapping, ethics and UX for sensitive AI messaging, go-to-market - choices for regional rollout, and monetization through SaaS integrations and partnerships. -

    Listen for actionable insights on product-market fit, hiring priorities - for AI and full-stack roles, and practical steps to launch a digital health startup - that balances technical credibility with patient access. +description: "Discover actionable digital health MVP strategy, telemedicine tactics to build, validate and monetize a scalable healthcare startup with faster remote care." +topics: +- AI +- computer vision +- data strategy +- product management +- startups +- healthcare +intro: "How do you build a digital health startup that ships a focused MVP, uses AI for diagnosis, and delivers care via telemedicine while overcoming data gaps and legacy workflows? In this episode Maria-Liisa Bruckert, Co-Founder and Co-CEO of SQIN and recipient of the Google Play Best of 2020 award and Google Female Founder Immersion 2020, walks through her transition from electrical engineering to health tech and the practical playbook she uses to de-risk product development.

    We cover MVP strategy and market research tactics—cold outreach, accelerators, clinical meetings—and unconventional experiments like an AR “lipstick try-on” to collect engagement data. Maria explains how SQIN aligns AI diagnosis with concrete business cases, builds a digital clinic flow from diagnosis to prescription, and uses telemedicine for remote follow-up and prescriptions. You’ll also hear about data strategy and community bootstrapping, ethics and UX for sensitive AI messaging, go-to-market choices for regional rollout, and monetization through SaaS integrations and partnerships.

    Listen for actionable insights on product-market fit, hiring priorities for AI and full-stack roles, and practical steps to launch a digital health startup that balances technical credibility with patient access." dateadded: 2023-12-03 duration: PT00H52M27S quotableClips: diff --git a/_podcast/building-and-scaling-ai-data-products-with-mlops.md b/_podcast/building-and-scaling-ai-data-products-with-mlops.md index 81937fc0..cbf4d012 100644 --- a/_podcast/building-and-scaling-ai-data-products-with-mlops.md +++ b/_podcast/building-and-scaling-ai-data-products-with-mlops.md @@ -1,6 +1,6 @@ --- -title: 'Build & Scale Data Products for AI: Roadmaps, MLOps, Customer Research & Metrics' -short: Product Management Essentials for Data Professionals +title: "Build & Scale Data Products for AI: Roadmaps, MLOps, Customer Research & Metrics" +short: "Product Management Essentials for Data Professionals" season: 7 episode: 3 guests: @@ -15,8 +15,13 @@ links: spotify: https://open.spotify.com/episode/1Oh6ewUJ2c1jiVcKxWIwDU apple: https://podcasts.apple.com/us/podcast/product-management-essentials-for-data-professionals/id1541710331?i=1000550093434 -description: Build scalable data products with MLOps roadmaps, customer research and metric-driven templates - prioritize impact, reduce failures, and measure success -intro: How do you move from proofs-of-concept to scalable AI data products that deliver measurable business value? In this episode, Greg Coquillo, a Technology Manager at Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, walks through practical approaches for building and scaling data products, MLOps, customer research, and metrics.

    We cover Greg’s transition into AI product work and the role of data product managers (internal vs. external), then dive into customer journey mapping, domain knowledge, and structured customer research—interview techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how to work backwards from business problems, contribute technical input to roadmaps with T-shirt sizing, and prioritize MLOps by spotting unscalable manual processes. Greg outlines three-year roadmap thinking (impact, effort, cost), a pragmatic Excel template (problems → solutions → metrics), and SMART and operational metrics like pipeline failures, SLAs, and data quality. He also addresses operating without a PM, aligning team mental models, and on-the-job product skill development.

    Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer research, and defining success metrics for AI-driven data products +description: "Build scalable data products with MLOps roadmaps, customer research and metric-driven templates - prioritize impact, reduce failures, and measure success" +topics: +- product management +- MLOps +- data engineering +- data science +intro: "How do you move from proofs-of-concept to scalable AI data products that deliver measurable business value? In this episode, Greg Coquillo, a Technology Manager at Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, walks through practical approaches for building and scaling data products, MLOps, customer research, and metrics.

    We cover Greg’s transition into AI product work and the role of data product managers (internal vs. external), then dive into customer journey mapping, domain knowledge, and structured customer research—interview techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how to work backwards from business problems, contribute technical input to roadmaps with T-shirt sizing, and prioritize MLOps by spotting unscalable manual processes. Greg outlines three-year roadmap thinking (impact, effort, cost), a pragmatic Excel template (problems → solutions → metrics), and SMART and operational metrics like pipeline failures, SLAs, and data quality. He also addresses operating without a PM, aligning team mental models, and on-the-job product skill development.

    Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer research, and defining success metrics for AI-driven data products" dateadded: 2022-02-06 duration: PT00H59M41S diff --git a/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md index 10ab967b..f645c285 100644 --- a/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md +++ b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md @@ -1,6 +1,6 @@ --- -title: 'Build and Scale Data Engineering Systems for Fraud Detection: Feature Pipelines, Real-Time Inference, Graph Databases & Production Debugging' -short: Data Engineering for Fraud Prevention +title: "Build and Scale Data Engineering Systems for Fraud Detection: Feature Pipelines, Real-Time Inference, Graph Databases & Production Debugging" +short: "Data Engineering for Fraud Prevention" season: 15 episode: 9 guests: @@ -16,7 +16,13 @@ links: youtube: https://www.youtube.com/watch?v=ZXNKjrrKU_I description: "Learn retail fraud detection with real-time scoring and MLOps: build data pipelines, graph investigations, and instant cashier decisions to cut losses." -intro: How do you build data infrastructure that stops stolen-card transactions and return abuse in real time? In this episode, Angela Ramirez, a Sam’s Club data engineer who moved from Sephora and specializes in machine learning for fraud prevention, walks through the engineering behind retail fraud detection. Drawing on her background in NLP and four years as a data engineer, Angela explains pipelines, feature engineering workflows that combine daily batches with real-time scoring, and the MLOps responsibilities for model metrics, deployment, and monitoring.

    We cover system design best practices—stakeholder alignment, timing, documentation—and data modeling tradeoffs across relational, document (Elasticsearch), and graph databases (SPARQL, Neo4j) to support network features connecting members, transactions, and products. Angela also discusses hybrid architectures for instant inference, tooling like PySpark, Pandas/PyArrow, Cassandra, GCP/Dataproc, and data quality practices (Great Expectations), plus operational debugging and scaling patterns. Listen to learn practical approaches to real-time scoring, graph-powered investigations, and the engineering decisions that make retail fraud detection reliable and actionable +topics: +- data engineering +- MLOps +- fraud detection +- graph databases +- software engineering +intro: "How do you build data infrastructure that stops stolen-card transactions and return abuse in real time? In this episode, Angela Ramirez, a Sam’s Club data engineer who moved from Sephora and specializes in machine learning for fraud prevention, walks through the engineering behind retail fraud detection. Drawing on her background in NLP and four years as a data engineer, Angela explains pipelines, feature engineering workflows that combine daily batches with real-time scoring, and the MLOps responsibilities for model metrics, deployment, and monitoring.

    We cover system design best practices—stakeholder alignment, timing, documentation—and data modeling tradeoffs across relational, document (Elasticsearch), and graph databases (SPARQL, Neo4j) to support network features connecting members, transactions, and products. Angela also discusses hybrid architectures for instant inference, tooling like PySpark, Pandas/PyArrow, Cassandra, GCP/Dataproc, and data quality practices (Great Expectations), plus operational debugging and scaling patterns. Listen to learn practical approaches to real-time scoring, graph-powered investigations, and the engineering decisions that make retail fraud detection reliable and actionable" dateadded: 2023-10-07 duration: PT00H59M19S diff --git a/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md index 044ed50e..2041abfd 100644 --- a/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md +++ b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md @@ -1,6 +1,6 @@ --- -title: 'Building and Scaling Data Science Practice in Industrial Enterprises: AI Adoption, MLOps Maturity & Career Growth' -short: Building and Scaling Data Science Practice in Industrial Enterprises +title: "Building and Scaling Data Science Practice in Industrial Enterprises: AI Adoption, MLOps Maturity & Career Growth" +short: "Building and Scaling Data Science Practice in Industrial Enterprises" season: 11 episode: 5 guests: @@ -15,7 +15,7 @@ links: spotify: https://open.spotify.com/episode/0M7Y77MFToxtKuyfdF5W22?si=jgWR6EchQnWe6nYWW44ZxQ youtube: https://www.youtube.com/watch?v=XbDQv8FTA4U -description: 'Discover Industrial AI tactics, MLOps & sensorization to scale projects: hub-and-spoke data teams, proving value, tooling and career steps to productionize ML.' +description: "Discover Industrial AI tactics, MLOps & sensorization to scale projects: hub-and-spoke data teams, proving value, tooling and career steps to productionize ML." intro: "How do industrial enterprises move from pilots to production-ready AI—and what team structures, MLOps practices, and career moves make that possible? In this episode Andrey Shtylenko, Director of Engineering at Honeywell and leader of its Advanced Technology Group and AI practice, walks through practical approaches for building and scaling data science teams in industrial enterprises. Drawing on Honeywell use cases—smart sensors, computer vision, and robotics—Andrey explains the data and machine learning practices that enable AI adoption, the role of sensorization and cloud processing, and the common challenges traditional industrial companies face.

    You’ll hear a concrete data practice maturity model (crawl → walk → run), POC strategy recommendations for proving value with end-to-end projects, and trade-offs between centralized, embedded, and hybrid hub-and-spoke team models. We cover MLOps standardization, shared services (experiment tracking, annotation, procurement), reporting-line impacts (CTO vs CIO vs CEO), and career guidance for engineers pivoting into data science or production ML roles. Listen to gain frameworks and actionable insights to structure teams, mature MLOps, and grow careers within industrial AI initiatives." topics: - data science diff --git a/_podcast/building-and-scaling-data-team.md b/_podcast/building-and-scaling-data-team.md index 36c3fe13..09f2158e 100644 --- a/_podcast/building-and-scaling-data-team.md +++ b/_podcast/building-and-scaling-data-team.md @@ -1,6 +1,6 @@ --- -title: 'How to Build & Scale a Data Team: Hiring, Production ML, Forecasting & Driving Adoption' -short: Building and Leading Data Teams +title: "How to Build & Scale a Data Team: Hiring, Production ML, Forecasting & Driving Adoption" +short: "Building and Leading Data Teams" season: 5 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3hlzKwORlOsCPKrawuW4YQ apple: https://podcasts.apple.com/us/podcast/building-and-leading-data-teams-tammy-liang/id1541710331?i=1000537994433 -description: 'Learn to build a scalable data team: hiring, production ML delivery, demand forecasting and driving adoption—practical staffing, stack, and governance tips.' -intro: How do you build and scale a data team that moves beyond dashboards to production ML, reliable forecasting, and real adoption across the business? In this episode Tammy Liang, Chief of Data at Platanomelón and co-host of Data for Future, walks through her journey building data capabilities for marketing, e-commerce, and operations at a mission-driven consumer brand.

    Tammy breaks down practical hiring decisions—why she hired an analyst first, then a data engineer, and why early senior hires matter—plus the tradeoffs between analyst, engineer, and business-facing roles. She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, Notion) to enable forecasting and production ML, and describes common model delivery challenges moving work out of notebooks. The conversation also covers demand forecasting, time-series and basic machine learning skills, data accuracy and governance, dbt tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building trust.

    Listen to learn concrete steps for hiring a data team, setting up a data warehouse for forecasting, delivering models to production, and creating data products that stakeholders actually use +description: "Learn to build a scalable data team: hiring, production ML delivery, demand forecasting and driving adoption—practical staffing, stack, and governance tips." +intro: "How do you build and scale a data team that moves beyond dashboards to production ML, reliable forecasting, and real adoption across the business? In this episode Tammy Liang, Chief of Data at Platanomelón and co-host of Data for Future, walks through her journey building data capabilities for marketing, e-commerce, and operations at a mission-driven consumer brand.

    Tammy breaks down practical hiring decisions—why she hired an analyst first, then a data engineer, and why early senior hires matter—plus the tradeoffs between analyst, engineer, and business-facing roles. She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, Notion) to enable forecasting and production ML, and describes common model delivery challenges moving work out of notebooks. The conversation also covers demand forecasting, time-series and basic machine learning skills, data accuracy and governance, dbt tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building trust.

    Listen to learn concrete steps for hiring a data team, setting up a data warehouse for forecasting, delivering models to production, and creating data products that stakeholders actually use" topics: - team building - data teams diff --git a/_podcast/building-data-products-lead-data-scientist.md b/_podcast/building-data-products-lead-data-scientist.md index bf9e2e88..962088a0 100644 --- a/_podcast/building-data-products-lead-data-scientist.md +++ b/_podcast/building-data-products-lead-data-scientist.md @@ -1,6 +1,6 @@ --- title: "Building Data Products at Scale: Intake, A/B Testing, and MLOps in a Marketing Organization" -short: Collaborative Data Science in Business +short: "Collaborative Data Science in Business" season: 16 episode: 3 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/46DN6rAlufvvXaqdOomoTe?si=OMPDN8m5QZWsc5kJY8IcAA youtube: https://www.youtube.com/watch?v=1pExOVuCF8Q -description: Discover MLOps tactics to prioritize data products, run A/B testing and enable model monitoring for faster validation, reliable rollouts and stakeholder buy-in -intro: How do you prioritize data product work, validate models in production, and keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a practical data product operating model for tackling those challenges.

    Drawing on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains a four‑phase funnel with a "single front door" intake, a Definition of Done template with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR feasibility. He breaks down when to treat work as analytics vs. research, how R&D sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies for production rollout as MLOps capabilities evolve. Technical tooling and monitoring get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection — plus pragmatic dashboarding and alerting patterns. Listeners will come away with actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder engagement, and the estimation and cadence practices that make ML teams productive +description: "Discover MLOps tactics to prioritize data products, run A/B testing and enable model monitoring for faster validation, reliable rollouts and stakeholder buy-in" +topics: +- MLOps +- machine learning +- data science +- tools +- product management +intro: "How do you prioritize data product work, validate models in production, and keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a practical data product operating model for tackling those challenges.

    Drawing on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains a four‑phase funnel with a "single front door" intake, a Definition of Done template with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR feasibility. He breaks down when to treat work as analytics vs. research, how R&D sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies for production rollout as MLOps capabilities evolve. Technical tooling and monitoring get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection — plus pragmatic dashboarding and alerting patterns. Listeners will come away with actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder engagement, and the estimation and cadence practices that make ML teams productive" dateadded: 2023-10-29 duration: PT01H14S diff --git a/_podcast/building-data-products-product-owner-vs-product-manager.md b/_podcast/building-data-products-product-owner-vs-product-manager.md index 3d96ac82..4cc4b170 100644 --- a/_podcast/building-data-products-product-owner-vs-product-manager.md +++ b/_podcast/building-data-products-product-owner-vs-product-manager.md @@ -1,6 +1,6 @@ --- -title: 'Building Data Products at Scale: Recommenders, Domain Ownership, and Hiring for Production ML' -short: Product Owners in Data Science +title: "Building Data Products at Scale: Recommenders, Domain Ownership, and Hiring for Production ML" +short: "Product Owners in Data Science" season: 11 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5deNrH5E6802ClwVt2Re4A?si=Xdg7qlT1TPCrH318MvS2RA youtube: https://www.youtube.com/watch?v=rTRTjB6cGng -description: Discover scaling recommender systems, production ML hiring strategies and price markdown modeling to cut waste, optimize discounts, and lead data product teams -intro: 'How do you scale recommender systems, hire for production ML, and model price markdowns to reduce waste—and who should own those decisions? In this episode, Anna Hannemann, Domain Owner for Data Science at Metro.digital, walks through practical answers informed by her PhD in Data Science and prior leadership of recommender and robotics/smart logistics teams.

    We cover customer data completeness, API-first recommender design, and algorithm choices like collaborative filtering and Word2Vec variants, plus the trade-offs product owners must manage. Anna contrasts product owner and product manager responsibilities, describes the domain owner role for aligning data scientists across teams, and lays out hiring strategies for production ML—data scientists, ML engineers, and MLOps. You’ll also hear how to source problems from operations, evaluate new data domains with MVPs and manual fixes, and take a portfolio approach to staging data product investments.

    If you work in data product leadership, product management, or machine learning operations, this episode delivers actionable frameworks for scaling recommenders, building production ML capabilities, and applying price markdown modeling to optimize discounting and reduce waste. Recommended reading: Data Science for Business.' +description: "Discover scaling recommender systems, production ML hiring strategies and price markdown modeling to cut waste, optimize discounts, and lead data product teams" +intro: "How do you scale recommender systems, hire for production ML, and model price markdowns to reduce waste—and who should own those decisions? In this episode, Anna Hannemann, Domain Owner for Data Science at Metro.digital, walks through practical answers informed by her PhD in Data Science and prior leadership of recommender and robotics/smart logistics teams.

    We cover customer data completeness, API-first recommender design, and algorithm choices like collaborative filtering and Word2Vec variants, plus the trade-offs product owners must manage. Anna contrasts product owner and product manager responsibilities, describes the domain owner role for aligning data scientists across teams, and lays out hiring strategies for production ML—data scientists, ML engineers, and MLOps. You’ll also hear how to source problems from operations, evaluate new data domains with MVPs and manual fixes, and take a portfolio approach to staging data product investments.

    If you work in data product leadership, product management, or machine learning operations, this episode delivers actionable frameworks for scaling recommenders, building production ML capabilities, and applying price markdown modeling to optimize discounting and reduce waste. Recommended reading: Data Science for Business." topics: - data products - product owners diff --git a/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md index 2900165f..f459c18b 100644 --- a/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md +++ b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md @@ -1,6 +1,6 @@ --- -title: Build Data Science Programs, Democratize HPC & Scale Graph Analytics with Arkouda -short: Leading Data Research +title: "Build Data Science Programs, Democratize HPC & Scale Graph Analytics with Arkouda" +short: "Leading Data Research" season: 10 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7DmFWFHUwxx4Wf0X6GbKBf?si=2DW0G2EMQ7ebB9K60LfJyQ youtube: https://www.youtube.com/watch?v=vZLlpsUlchQ -description: Learn to build data science programs, democratize HPC and scale graph analytics with Arkouda - practical curriculum, performance tips and recruitment tips -intro: How do you build effective data science programs, democratize high-performance computing, and scale graph analytics so researchers and practitioners can solve real-world problems? In this episode, David Bader — Director of the Institute for Data Science at NJIT, founder of NJIT’s Department of Data Science, and a distinguished professor with deep expertise in HPC, big data, and analytics — walks through his career, leadership in launching academic units, and practical lessons for curriculum design and regional workforce alignment.

    We explore Arkouda and ARACHNE — interactive, massive-scale Python analytics and graph tools — and the Chapel-backed supercomputing techniques that aim to democratize HPC for broader use. David discusses research lab-as-startup practices (open source releases, datasets like synthetic/SNAP, and industry partnerships with NSF, Accenture, NVIDIA), building usable systems to achieve adoption (including a NASA example), and underappreciated advances such as STINGER and streaming graph analytics. He also covers mentorship models, recruiting PhD and MS students, conference strategies, and balancing teaching, research, and service.

    Listen to learn concrete approaches to creating data science programs, practical steps to scale graph analytics with Arkouda, and tactics for turning research into real-world impact +description: "Learn to build data science programs, democratize HPC and scale graph analytics with Arkouda - practical curriculum, performance tips and recruitment tips" +intro: "How do you build effective data science programs, democratize high-performance computing, and scale graph analytics so researchers and practitioners can solve real-world problems? In this episode, David Bader — Director of the Institute for Data Science at NJIT, founder of NJIT’s Department of Data Science, and a distinguished professor with deep expertise in HPC, big data, and analytics — walks through his career, leadership in launching academic units, and practical lessons for curriculum design and regional workforce alignment.

    We explore Arkouda and ARACHNE — interactive, massive-scale Python analytics and graph tools — and the Chapel-backed supercomputing techniques that aim to democratize HPC for broader use. David discusses research lab-as-startup practices (open source releases, datasets like synthetic/SNAP, and industry partnerships with NSF, Accenture, NVIDIA), building usable systems to achieve adoption (including a NASA example), and underappreciated advances such as STINGER and streaming graph analytics. He also covers mentorship models, recruiting PhD and MS students, conference strategies, and balancing teaching, research, and service.

    Listen to learn concrete approaches to creating data science programs, practical steps to scale graph analytics with Arkouda, and tactics for turning research into real-world impact" topics: - data science - data analytics diff --git a/_podcast/building-data-team.md b/_podcast/building-data-team.md index e265dc27..e1f82c6a 100644 --- a/_podcast/building-data-team.md +++ b/_podcast/building-data-team.md @@ -1,6 +1,6 @@ --- -title: 'How to Build and Scale ML Teams: Hiring, MLOps & Product-Driven AI for Startups' -short: Building a Data Science Team +title: "How to Build and Scale ML Teams: Hiring, MLOps & Product-Driven AI for Startups" +short: "Building a Data Science Team" season: 1 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0daFpY1z2J4Uop1XdMNsnY apple: https://podcasts.apple.com/us/podcast/building-a-data-science-team-dat-tran/id1541710331?i=1000502061864 -description: 'Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven AI for startups—scale with T-shaped engineers, ship robust models.' -intro: 'How do you build and scale an ML team that delivers product-driven AI without getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks through practical strategies for hiring, MLOps, and shaping data teams for startups.

    Dat draws on a decade of production ML experience to unpack the MLOps mindset (day-two operations, model maintenance), how to hire early (T-shaped generalists, take-home assessments, key hiring signals), and when to shift to specialists as you scale. He also explains product-centric practices: aligning hiring to prototype vs. MVP needs, prioritizing impact over technical perfection, and building human-centric AI (augmenting pricing managers at Priceloop). Other topics include open research and open source as strategic advantages, bootstrapping data capabilities, retention through autonomy and interesting work, and educating leadership about realistic AI expectations.

    Listen for actionable guidance on building ML teams, hiring machine learning engineers, and implementing MLOps and product-driven AI in early-stage startups.' +description: "Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven AI for startups—scale with T-shaped engineers, ship robust models." +intro: "How do you build and scale an ML team that delivers product-driven AI without getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks through practical strategies for hiring, MLOps, and shaping data teams for startups.

    Dat draws on a decade of production ML experience to unpack the MLOps mindset (day-two operations, model maintenance), how to hire early (T-shaped generalists, take-home assessments, key hiring signals), and when to shift to specialists as you scale. He also explains product-centric practices: aligning hiring to prototype vs. MVP needs, prioritizing impact over technical perfection, and building human-centric AI (augmenting pricing managers at Priceloop). Other topics include open research and open source as strategic advantages, bootstrapping data capabilities, retention through autonomy and interesting work, and educating leadership about realistic AI expectations.

    Listen for actionable guidance on building ML teams, hiring machine learning engineers, and implementing MLOps and product-driven AI in early-stage startups." topics: - leadership - team building diff --git a/_podcast/building-domestic-risk-assessment-tool.md b/_podcast/building-domestic-risk-assessment-tool.md index da8d6455..0a86b2c6 100644 --- a/_podcast/building-domestic-risk-assessment-tool.md +++ b/_podcast/building-domestic-risk-assessment-tool.md @@ -1,7 +1,6 @@ --- -title: 'Building a Domestic Risk Assessment Tool: Data Cleaning, Risk Scoring Models - and Privacy Compliance' -short: Building a Domestic Risk Assessment Tool +title: "Building a Domestic Risk Assessment Tool: Data Cleaning, Risk Scoring Models and Privacy Compliance" +short: "Building a Domestic Risk Assessment Tool" season: 18 episode: 7 guests: @@ -15,14 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/building-a-domestic-risk-assessment-tool-sabina-firtala/id1541710331?i=1000662124309 spotify: https://open.spotify.com/episode/7bjORhGzTQoxtbv60mMtzW?si=p6UaBdZJTnGvlwbGb6AsFQ youtube: https://www.youtube.com/watch?v=CpWlBAmD9ok -description: 'Discover building a domestic risk assessment: data cleaning, risk scoring - models, and privacy compliance to improve triage, reduce bias, and ensure compliance.' -intro: 'How do you build an accurate, privacy-compliant domestic risk assessment tool - that frontline teams can actually use? In this episode Sabina Firtala — who leads - Frontline’s AI product development and brings experience in data wrangling, model - validation, and applied analytics from finance, SaaS, and mission-driven projects - — walks through a practical roadmap.

    We cover problem framing and project - scope; sources like case management, public records, and surveys; and hands-on data +description: "Discover building a domestic risk assessment: data cleaning, risk scoring models, and privacy compliance to improve triage, reduce bias, and ensure compliance." +topics: +- data science +- machine learning +- data engineering +- data governance +- MLOps +intro: "How do you build an accurate, privacy-compliant domestic risk assessment tool that frontline teams can actually use? In this episode Sabina Firtala — who leads Frontline’s AI product development and brings experience in data wrangling, model validation, and applied analytics from finance, SaaS, and mission-driven projects — walks through a practical roadmap.

    We cover problem framing and project scope; sources like case management, public records, and surveys; and hands-on data" work: cleaning, linking, and feature engineering. Sabina explains risk scoring approaches and model architecture, evaluation metrics and bias assessment, plus privacy, ethical considerations, and legal data governance. You’ll also hear about deployment into diff --git a/_podcast/building-explainable-and-actionable-ai-ml-systems.md b/_podcast/building-explainable-and-actionable-ai-ml-systems.md index ae75e85d..59f39f3f 100644 --- a/_podcast/building-explainable-and-actionable-ai-ml-systems.md +++ b/_podcast/building-explainable-and-actionable-ai-ml-systems.md @@ -1,6 +1,6 @@ --- -title: 'Build Explainable and Actionable AI/ML Systems: Industrial PhD, Trust Theory & Production Deployment' -short: Build Explainable and Actionable AI/ML Systems +title: "Build Explainable and Actionable AI/ML Systems: Industrial PhD, Trust Theory & Production Deployment" +short: "Build Explainable and Actionable AI/ML Systems" season: 14 episode: 9 guests: diff --git a/_podcast/building-healthcare-machine-learning-systems.md b/_podcast/building-healthcare-machine-learning-systems.md index 21b46d46..92652f14 100644 --- a/_podcast/building-healthcare-machine-learning-systems.md +++ b/_podcast/building-healthcare-machine-learning-systems.md @@ -1,7 +1,6 @@ --- -title: 'Building Healthcare ML Systems: From Sepsis Prediction to Low-Resource Clinical - Deployment' -short: Bridging Data Science and Healthcare +title: "Building Healthcare ML Systems: From Sepsis Prediction to Low-Resource Clinical Deployment" +short: "Bridging Data Science and Healthcare" season: 16 episode: 2 guests: @@ -15,24 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/bridging-data-science-and-healthcare-eleni-stamatelou/id1541710331?i=1000632040444 spotify: https://open.spotify.com/episode/5W6lfZVhjIKEmVzBuexfzE?si=0nUHr66eQa6oPVJDb3d0rw youtube: https://www.youtube.com/watch?v=pDOwlulDh0c -description: Learn to build reliable healthcare ML systems for sepsis prediction and - low-resource clinical deployment—improve patient outcomes, scalability, and trust. -intro: 'How do you build machine learning systems that can predict sepsis and actually - work in low-resource clinical settings? In this episode Eleni Stamatelou, a machine - learning researcher and educator focused on using data science to improve healthcare, - walks through the technical and practical steps of turning models into deployed - clinical tools. With expertise in signal processing, deep learning, and data-driven - design, Eleni frames the core challenges of healthcare ML: data quality and preprocessing, - model reliability for sepsis prediction, and the constraints of low-resource deployment. -

    We cover key topics including designing robust models for noisy clinical - signals, evaluation and validation strategies suited to patient safety, and pragmatic - considerations for integrating ML into clinical workflows with limited infrastructure. - Listeners will gain actionable insights on bridging research and practice—how to - prioritize features, manage trade-offs between complexity and reliability, and make - deployment decisions that respect resource limitations.

    If you work on - machine learning in healthcare, clinical AI, or sepsis prediction, this episode - provides concrete perspectives on building systems that are both scientifically - sound and practically deployable in low-resource environments.' +description: "Learn to build reliable healthcare ML systems for sepsis prediction and low-resource clinical deployment—improve patient outcomes, scalability, and trust." +topics: +- machine learning +- data science +- MLOps +- data engineering +- data governance +- healthcare +intro: "How do you build machine learning systems that can predict sepsis and actually work in low-resource clinical settings? In this episode Eleni Stamatelou, a machine learning researcher and educator focused on using data science to improve healthcare, walks through the technical and practical steps of turning models into deployed clinical tools. With expertise in signal processing, deep learning, and data-driven design, Eleni frames the core challenges of healthcare ML: data quality and preprocessing, model reliability for sepsis prediction, and the constraints of low-resource deployment.

    We cover key topics including designing robust models for noisy clinical signals, evaluation and validation strategies suited to patient safety, and pragmatic considerations for integrating ML into clinical workflows with limited infrastructure. Listeners will gain actionable insights on bridging research and practice—how to prioritize features, manage trade-offs between complexity and reliability, and make deployment decisions that respect resource limitations.

    If you work on machine learning in healthcare, clinical AI, or sepsis prediction, this episode provides concrete perspectives on building systems that are both scientifically sound and practically deployable in low-resource environments." dateadded: 2023-10-23 duration: PT00H59M01S quotableClips: diff --git a/_podcast/building-ml-communities-diversity-and-career-growth.md b/_podcast/building-ml-communities-diversity-and-career-growth.md index 7a88fb09..f52a0a70 100644 --- a/_podcast/building-ml-communities-diversity-and-career-growth.md +++ b/_podcast/building-ml-communities-diversity-and-career-growth.md @@ -1,6 +1,6 @@ --- -title: 'How to Build & Scale a Data Science Community: Diversity, ML Deployment & Career Growth' -short: Accelerating the Adoption of AI through Diversity +title: "How to Build & Scale a Data Science Community: Diversity, ML Deployment & Career Growth" +short: "Accelerating the Adoption of AI through Diversity" season: 13 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6pRkAK9Zo2QrXZCAzh2veV?si=ixEmGK5-RemknBcHrChMNA youtube: https://www.youtube.com/watch?v=SRUwwvk_YCk -description: Discover how to build and scale a data science community, boost diversity, deploy ML, and accelerate career growth with mentoring & hiring strategies -intro: 'How do you build and scale a data science community that actually advances diversity, supports machine learning deployment, and accelerates career growth? In this episode, Dânia Meira — AI Guild co-founder, data scientist, teacher and speaker with a Master’s in Computer Science (AI) — walks through her journey from applied math and marketing analytics to founding a global data science community in Berlin.

    We cover practical community building: turning women’s meetups into monthly dinners and an international membership, curating meetup content and the Datalift Summit, and policies like visibility-first speaker invites, codes of conduct, and misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) improves product fit and market reach, how to create psychological safety, and how to source and train diverse talent for regulated industries. She also outlines a vendor-agnostic consulting model for machine learning deployment, community-to-client matching, and scaling from a freelance network to full-time teams.' +description: "Discover how to build and scale a data science community, boost diversity, deploy ML, and accelerate career growth with mentoring & hiring strategies" +intro: "How do you build and scale a data science community that actually advances diversity, supports machine learning deployment, and accelerates career growth? In this episode, Dânia Meira — AI Guild co-founder, data scientist, teacher and speaker with a Master’s in Computer Science (AI) — walks through her journey from applied math and marketing analytics to founding a global data science community in Berlin.

    We cover practical community building: turning women’s meetups into monthly dinners and an international membership, curating meetup content and the Datalift Summit, and policies like visibility-first speaker invites, codes of conduct, and misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) improves product fit and market reach, how to create psychological safety, and how to source and train diverse talent for regulated industries. She also outlines a vendor-agnostic consulting model for machine learning deployment, community-to-client matching, and scaling from a freelance network to full-time teams." topics: - data science - machine learning diff --git a/_podcast/building-mlops-startup.md b/_podcast/building-mlops-startup.md index 0ed65837..310ebbd4 100644 --- a/_podcast/building-mlops-startup.md +++ b/_podcast/building-mlops-startup.md @@ -1,6 +1,6 @@ --- -title: 'How to Build a Successful ML Startup: MLOps, Model Monitoring, Open Source & Founder Fit' -short: I Want to Build a Machine Learning Startup! +title: "How to Build a Successful ML Startup: MLOps, Model Monitoring, Open Source & Founder Fit" +short: "I Want to Build a Machine Learning Startup!" season: 4 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7fwbqo5tDrtakuqWaIuEjc apple: https://podcasts.apple.com/us/podcast/i-want-to-build-a-machine-learning-startup-elena-samuylova/id1541710331?i=1000529106923 -description: 'Discover practical MLOps, model monitoring and founder-fit tactics to build an ML startup: hire, fund, productize, and reach product-market fit faster.' -intro: 'What does it take to build a successful ML startup—especially around MLOps, model monitoring, open source, and founder fit? Elena Samuylova, Co-founder & CEO of Evidently AI, joins to answer that question drawing on her applied machine learning experience since 2014, including roles at Yandex Data Factory and an industrial AI startup.

    This episode walks through practical founder decisions: sourcing problem-first ideas, finding compatible co-founders and establishing pre-launch alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena explains what “AI-first” positioning really means, how developer tools and open source shape go-to-market strategies (open core, cloud, monetization and cloning risks), and how Evidently validated model monitoring as a business. You’ll hear tactical guidance on customer discovery, persuading engineers to adopt your tool, data safety and on-prem deployments, hiring and scaling tradeoffs, funding paths, productizing services for non-technical founders, and normalizing failure and work–life tradeoffs.

    Listen to gain actionable frameworks for building an ML startup—covering model monitoring, MLOps, open source strategy, founder-market fit, and the concrete signals that indicate product–market fit.' +description: "Discover practical MLOps, model monitoring and founder-fit tactics to build an ML startup: hire, fund, productize, and reach product-market fit faster." +intro: "What does it take to build a successful ML startup—especially around MLOps, model monitoring, open source, and founder fit? Elena Samuylova, Co-founder & CEO of Evidently AI, joins to answer that question drawing on her applied machine learning experience since 2014, including roles at Yandex Data Factory and an industrial AI startup.

    This episode walks through practical founder decisions: sourcing problem-first ideas, finding compatible co-founders and establishing pre-launch alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena explains what “AI-first” positioning really means, how developer tools and open source shape go-to-market strategies (open core, cloud, monetization and cloning risks), and how Evidently validated model monitoring as a business. You’ll hear tactical guidance on customer discovery, persuading engineers to adopt your tool, data safety and on-prem deployments, hiring and scaling tradeoffs, funding paths, productizing services for non-technical founders, and normalizing failure and work–life tradeoffs.

    Listen to gain actionable frameworks for building an ML startup—covering model monitoring, MLOps, open source strategy, founder-market fit, and the concrete signals that indicate product–market fit." topics: - startup - machine learning diff --git a/_podcast/building-open-source-data-product-for-identity-resolution.md b/_podcast/building-open-source-data-product-for-identity-resolution.md index e547bb66..90878932 100644 --- a/_podcast/building-open-source-data-product-for-identity-resolution.md +++ b/_podcast/building-open-source-data-product-for-identity-resolution.md @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/54DufG1ZVj0GMSoWTbJsen?si=d7XNSW2_Tfa4qKJxmFQpIA youtube: https://www.youtube.com/watch?v=lpjffCOPxlY -description: Discover how to build an open-source, ML-powered identity resolution tool. Learn about the practical challenges across industries. -intro: 'How do you build an open-source, ML-powered identity resolution tool that becomes the single source of truth in a modern data stack? In this episode Sonal Goyal—founder of Zingg and a 23-year data product veteran—walks through the practical challenges of identity resolution and entity resolution across industries like investment banking, telecom, gaming, and insurance. Sonal explains why ML-powered approaches matter, how an open-source framework like Zingg can fit into your modern data stack, and what it takes to reconcile customer and supplier records into a reliable single source of truth.

    Expect discussion of architecture and integration trade-offs, the role of machine learning in matching and deduplication, and lessons from building production data products at scale. If you manage customer data, data integration, or are evaluating open-source identity resolution solutions, this episode offers concrete insights and pointers—including Zingg’s open-source repository—to help you evaluate adoption, reduce duplicate records, and improve downstream analytics and personalization' +description: "Discover how to build an open-source, ML-powered identity resolution tool. Learn about the practical challenges across industries." +topics: +- machine learning +- MLOps +- data engineering +- open-source +- product management +intro: "How do you build an open-source, ML-powered identity resolution tool that becomes the single source of truth in a modern data stack? In this episode Sonal Goyal—founder of Zingg and a 23-year data product veteran—walks through the practical challenges of identity resolution and entity resolution across industries like investment banking, telecom, gaming, and insurance. Sonal explains why ML-powered approaches matter, how an open-source framework like Zingg can fit into your modern data stack, and what it takes to reconcile customer and supplier records into a reliable single source of truth.

    Expect discussion of architecture and integration trade-offs, the role of machine learning in matching and deduplication, and lessons from building production data products at scale. If you manage customer data, data integration, or are evaluating open-source identity resolution solutions, this episode offers concrete insights and pointers—including Zingg’s open-source repository—to help you evaluate adoption, reduce duplicate records, and improve downstream analytics and personalization" dateadded: 2022-10-29 duration: PT01H23S diff --git a/_podcast/building-open-source-nlp-tool.md b/_podcast/building-open-source-nlp-tool.md index b019be9f..5f20c24a 100644 --- a/_podcast/building-open-source-nlp-tool.md +++ b/_podcast/building-open-source-nlp-tool.md @@ -1,6 +1,6 @@ --- -title: 'Build Open-Source NLP Tools: Weak Supervision, LLM Heuristics & Enterprise ML Product Strategy' -short: Build Open-Source NLP Tools +title: "Build Open-Source NLP Tools: Weak Supervision, LLM Heuristics & Enterprise ML Product Strategy" +short: "Build Open-Source NLP Tools" season: 13 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5SjY4vatlUYFCZUMV7dE7W?si=MC4ZZrKbSTKUEDVEfedGwA youtube: https://www.youtube.com/watch?v=WIpnyiHp4IE -description: Discover weak supervision, NLP labeling & GPT heuristics to build high-quality datasets faster — combine Refinery, Bricks, ensemble heuristics & active learning -intro: 'How can teams scale high-quality NLP labeling without hand-labeling every example? In this episode, Johannes Hötter, data scientist, engineer, and co-founder of kern, explains practical approaches to that problem using weak supervision, heuristics, and open-source tooling. We walk through demos of Refinery and Bricks, with a close look at Refinery’s weak supervision and labeling workflows, and why Jupyter widgets leave a gap for NLP tooling.

    You’ll hear about common NLP challenges—messy labels and text metadata—and how ChatGPT can be used as a labeling heuristic. Johannes outlines combining heuristics: GPT-driven rules, active learning, and crowd labels as an ensemble of “workers,” plus foundations like Hugging Face, embeddings, and robust data management. Bricks is presented as a heuristic library with recipes and ensemble methods to streamline labeling.

    The conversation also covers productization choices (open-source vs commercial), targeting engineers, enterprise workflows, community support, and niche document/PDF NLP issues. Listen to learn actionable strategies to improve NLP labeling quality, adopt weak supervision and GPT heuristics, and make tooling and go-to-market decisions for scalable data labeling and model training.' +description: "Discover weak supervision, NLP labeling & GPT heuristics to build high-quality datasets faster — combine Refinery, Bricks, ensemble heuristics & active learning" +intro: "How can teams scale high-quality NLP labeling without hand-labeling every example? In this episode, Johannes Hötter, data scientist, engineer, and co-founder of kern, explains practical approaches to that problem using weak supervision, heuristics, and open-source tooling. We walk through demos of Refinery and Bricks, with a close look at Refinery’s weak supervision and labeling workflows, and why Jupyter widgets leave a gap for NLP tooling.

    You’ll hear about common NLP challenges—messy labels and text metadata—and how ChatGPT can be used as a labeling heuristic. Johannes outlines combining heuristics: GPT-driven rules, active learning, and crowd labels as an ensemble of “workers,” plus foundations like Hugging Face, embeddings, and robust data management. Bricks is presented as a heuristic library with recipes and ensemble methods to streamline labeling.

    The conversation also covers productization choices (open-source vs commercial), targeting engineers, enterprise workflows, community support, and niche document/PDF NLP issues. Listen to learn actionable strategies to improve NLP labeling quality, adopt weak supervision and GPT heuristics, and make tooling and go-to-market decisions for scalable data labeling and model training." topics: - NLP - machine learning diff --git a/_podcast/building-production-ml-platform-and-mlops-team.md b/_podcast/building-production-ml-platform-and-mlops-team.md index 516d8a22..18cae147 100644 --- a/_podcast/building-production-ml-platform-and-mlops-team.md +++ b/_podcast/building-production-ml-platform-and-mlops-team.md @@ -1,6 +1,6 @@ --- -title: 'Building Production ML Platforms: Infrastructure, Workflows, Teams & Governance That Scale' -short: 'From Scratch to Success: Building an MLOps Team and ML Platform' +title: "Building Production ML Platforms: Infrastructure, Workflows, Teams & Governance That Scale" +short: "From Scratch to Success: Building an MLOps Team and ML Platform" season: 14 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0raudIf9XsKdUfr5m2YlUE?si=x1PuaBqwTVyMlfNlGape2A youtube: https://www.youtube.com/watch?v=CB1YIsxQRtc -description: Discover MLOps strategies to build an ML platform with experiment tracking, improved reproducibility, faster releases and compliance-ready model operations -intro: How do you design an ML platform that reliably deploys models, tracks experiments, and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining & Data Warehousing — walks through practical MLOps platform design grounded in real-world deployment challenges.

    We cover a clear definition of MLOps as people, processes, and technology, and dig into core platform skills (cloud infrastructure, Kubernetes, Terraform), user-centric design for notebooks and data science workflows, and software engineering fundamentals for production ML. Simon explains experiment tracking, model registry practices, deployment patterns (batch vs online), orchestration choices like Airflow, and stitching SaaS and open-source tools into a coherent ML platform. The episode also addresses compliance and data governance — GDPR, fintech security constraints — plus metadata, lineage, API design, and monitoring. We close with build vs buy trade-offs, staffing and on-call considerations, and how emerging LLM needs affect platforms.

    Listen to learn concrete guidance on model deployment, reproducibility, orchestration, and compliance to help you design a pragmatic, scalable ML platform +description: "Discover MLOps strategies to build an ML platform with experiment tracking, improved reproducibility, faster releases and compliance-ready model operations" +intro: "How do you design an ML platform that reliably deploys models, tracks experiments, and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining & Data Warehousing — walks through practical MLOps platform design grounded in real-world deployment challenges.

    We cover a clear definition of MLOps as people, processes, and technology, and dig into core platform skills (cloud infrastructure, Kubernetes, Terraform), user-centric design for notebooks and data science workflows, and software engineering fundamentals for production ML. Simon explains experiment tracking, model registry practices, deployment patterns (batch vs online), orchestration choices like Airflow, and stitching SaaS and open-source tools into a coherent ML platform. The episode also addresses compliance and data governance — GDPR, fintech security constraints — plus metadata, lineage, API design, and monitoring. We close with build vs buy trade-offs, staffing and on-call considerations, and how emerging LLM needs affect platforms.

    Listen to learn concrete guidance on model deployment, reproducibility, orchestration, and compliance to help you design a pragmatic, scalable ML platform" topics: - MLOps - machine learning diff --git a/_podcast/building-production-search-systems.md b/_podcast/building-production-search-systems.md index 0fb14dcd..c0fd7465 100644 --- a/_podcast/building-production-search-systems.md +++ b/_podcast/building-production-search-systems.md @@ -1,6 +1,6 @@ --- -title: 'Building Search Systems: Dense Embeddings, MLOps and Evaluation Metrics' -short: Building Production Search Systems +title: "Building Search Systems: Dense Embeddings, MLOps and Evaluation Metrics" +short: "Building Production Search Systems" season: 17 episode: 9 guests: @@ -14,25 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/building-production-search-systems-daniel-svonava/id1541710331?i=1000650138905 spotify: https://open.spotify.com/episode/19R0rLA8hULTBZi9FhZuTs?si=xggb0OzfRHCFSmXtJWm7bA youtube: https://www.youtube.com/watch?v=gEmSrknGKDE -description: Learn dense embeddings, vector databases & MLOps to productionize search—get - indexing, hybrid search, evaluation metrics and deploy tips to boost relevance. -intro: 'How do you build search systems that balance dense embeddings, MLOps, and - meaningful evaluation metrics? In this episode Daniel Svonava — an entrepreneurial - technologist with 20 years of experience (from competitive programming and research - internships to leading ML infrastructure at YouTube Ads) and co-founder of Superlinked/VectorHub - — walks through practical design and operational decisions for modern search and - retrieval.

    We cover core topics: framing search as a decision problem, - representation learning from bag-of-words to dense vector embeddings, inverted index - mechanics, document chunking and ingestion, and when to use Lucene/Elasticsearch - versus dedicated vector databases. Daniel explains vector compute trade-offs (ingestion - vs query-time encoding), model versioning and recomputing embeddings, hybrid search - strategies, CLIP-style cross-modal retrieval, multi-embedding designs, and techniques - for encoding recency and timestamps. He also digs into MLOps concerns — pipeline - brittleness, configuration debt, and deployment trade-offs — plus evaluation: business - metrics, A/B testing, offline evaluation and operational metrics.

    Listeners - will get concrete guidance on embedding strategy, vector database selection, indexing - and ranking trade-offs, and how to measure search impact so teams can prototype - faster and productionize reliable retrieval systems.' +description: "Learn dense embeddings, vector databases & MLOps to productionize search—get indexing, hybrid search, evaluation metrics and deploy tips to boost relevance." +topics: +- information retrieval +- vector databases +- embeddings +- MLOps +- evaluation metrics +- production +- search +intro: "How do you build search systems that balance dense embeddings, MLOps, and meaningful evaluation metrics? In this episode Daniel Svonava — an entrepreneurial technologist with 20 years of experience (from competitive programming and research internships to leading ML infrastructure at YouTube Ads) and co-founder of Superlinked/VectorHub — walks through practical design and operational decisions for modern search and retrieval.

    We cover core topics: framing search as a decision problem, representation learning from bag-of-words to dense vector embeddings, inverted index mechanics, document chunking and ingestion, and when to use Lucene/Elasticsearch versus dedicated vector databases. Daniel explains vector compute trade-offs (ingestion vs query-time encoding), model versioning and recomputing embeddings, hybrid search strategies, CLIP-style cross-modal retrieval, multi-embedding designs, and techniques for encoding recency and timestamps. He also digs into MLOps concerns — pipeline brittleness, configuration debt, and deployment trade-offs — plus evaluation: business metrics, A/B testing, offline evaluation and operational metrics.

    Listeners will get concrete guidance on embedding strategy, vector database selection, indexing and ranking trade-offs, and how to measure search impact so teams can prototype faster and productionize reliable retrieval systems." dateadded: 2024-03-25 duration: PT01H05M23S quotableClips: diff --git a/_podcast/building-scalable-and-reliable-machine-learning-systems.md b/_podcast/building-scalable-and-reliable-machine-learning-systems.md index fe23edc6..fb7ebd05 100644 --- a/_podcast/building-scalable-and-reliable-machine-learning-systems.md +++ b/_podcast/building-scalable-and-reliable-machine-learning-systems.md @@ -1,6 +1,6 @@ --- -title: 'Build Scalable, Reliable ML Systems (MLOps): Design Docs, Data Strategy & Edge Constraints' -short: Building Scalable and Reliable Machine Learning Systems +title: "Build Scalable, Reliable ML Systems (MLOps): Design Docs, Data Strategy & Edge Constraints" +short: "Building Scalable and Reliable Machine Learning Systems" season: 14 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6iDyJuhfXibDB6kXFhvaqG?si=urjDGVl6RrWtjVXIAUgOvQ youtube: https://www.youtube.com/watch?v=i-pIdekjUow -description: Learn MLOps design doc and data strategy to build scalable, reliable machine learning systems; navigate edge constraints, metrics, pipelines, and testing -intro: 'How do you design machine learning systems that scale, stay reliable in production, and meet tight edge and mobile constraints? In this episode, Arseny Kravchenko — a seasoned ML engineer focused on computer vision, active in ML since 2015 and a former Kaggle Master — walks through practical MLOps patterns for turning models into production systems.

    We cover where startups trade off productionization and who owns those decisions; how to define ML system goals, non-goals, and assumptions; and why a lightweight design phase with a problem-first design doc (50/50 problem vs solution) pays off. Arseny breaks down edge and mobile ML constraints (latency, FPS, energy, Core ML), managing known and unknown risks with early tests, and building a solution blueprint: baselines, metrics, pipeline components, and data strategy (availability, processing, features, data lakes). He also explains system diagramming for data flow and real-time vs batch, dataset heuristics, and shares design doc examples (photostock search and retail pricing). Listeners will get concrete guidance on MLOps, design docs, data strategy, and edge ML trade-offs — plus pointers to deeper learning resources and a book offer discussed at the end.' +description: "Learn MLOps design doc and data strategy to build scalable, reliable machine learning systems; navigate edge constraints, metrics, pipelines, and testing" +intro: "How do you design machine learning systems that scale, stay reliable in production, and meet tight edge and mobile constraints? In this episode, Arseny Kravchenko — a seasoned ML engineer focused on computer vision, active in ML since 2015 and a former Kaggle Master — walks through practical MLOps patterns for turning models into production systems.

    We cover where startups trade off productionization and who owns those decisions; how to define ML system goals, non-goals, and assumptions; and why a lightweight design phase with a problem-first design doc (50/50 problem vs solution) pays off. Arseny breaks down edge and mobile ML constraints (latency, FPS, energy, Core ML), managing known and unknown risks with early tests, and building a solution blueprint: baselines, metrics, pipeline components, and data strategy (availability, processing, features, data lakes). He also explains system diagramming for data flow and real-time vs batch, dataset heuristics, and shares design doc examples (photostock search and retail pricing). Listeners will get concrete guidance on MLOps, design docs, data strategy, and edge ML trade-offs — plus pointers to deeper learning resources and a book offer discussed at the end." topics: - machine learning - MLOps diff --git a/_podcast/causal-inference-for-machine-learning.md b/_podcast/causal-inference-for-machine-learning.md index 80fca817..d2236c4f 100644 --- a/_podcast/causal-inference-for-machine-learning.md +++ b/_podcast/causal-inference-for-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Causal Inference for Real-World ML: Uplift Modeling, Counterfactuals, Treatment Effects & LLM Integration' -short: Democratizing Causality +title: "Causal Inference for Real-World ML: Uplift Modeling, Counterfactuals, Treatment Effects & LLM Integration" +short: "Democratizing Causality" season: 15 episode: 6 guests: diff --git a/_podcast/chief-data-officer-data-strategy-and-org-design.md b/_podcast/chief-data-officer-data-strategy-and-org-design.md index 51e4c02c..63ef1827 100644 --- a/_podcast/chief-data-officer-data-strategy-and-org-design.md +++ b/_podcast/chief-data-officer-data-strategy-and-org-design.md @@ -1,6 +1,6 @@ --- -title: 'Mastering the Chief Data Officer Role: Build Data Strategy, Org Design & AI' -short: Chief Data Officer +title: "Mastering the Chief Data Officer Role: Build Data Strategy, Org Design & AI" +short: "Chief Data Officer" season: 4 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/64lEB0Wv0a6DfkDi672Ulk apple: https://podcasts.apple.com/us/podcast/chief-data-officer-marco-de-sa/id1541710331?i=1000533326308 -description: Learn how Chief Data Officers build data strategy, org design and roadmaps—get tactics on governance, KPIs, delegation, career growth and remote leadership -intro: 'How do you move from head of data to an effective Chief Data Officer who builds strategy, designs the org, and delivers AI? In this episode, Marco De Sa — CDO at OLX Group with prior data leadership roles at Yahoo, Facebook, Twitter, and Spotify — lays out what modern data leadership really requires.

    We explore the evolving CDO scope: data strategy, governance, and AI; balancing vision versus tactics; and future-proofing data collection for tomorrow’s products. Marco breaks down organizational design and delegation — when to hire multiple VPs, how CDO responsibilities differ from VP, CTO and CPO roles, and how to structure reporting lines. Practical topics include working backwards from goals to data platform and machine learning investment, measuring progress with meaningful KPIs, time management and productivity for senior data leaders, and managing distributed teams.

    Listeners will walk away with concrete frameworks for data strategy, org design, and building a data-driven culture, plus career guidance for aspiring CDOs on technical breadth, soft skills, interviewing, and overcoming resistance with evidence-based persuasion. Ideal for data leaders and executives shaping data strategy, governance, and AI roadmaps.' +description: "Learn how Chief Data Officers build data strategy, org design and roadmaps—get tactics on governance, KPIs, delegation, career growth and remote leadership" +intro: "How do you move from head of data to an effective Chief Data Officer who builds strategy, designs the org, and delivers AI? In this episode, Marco De Sa — CDO at OLX Group with prior data leadership roles at Yahoo, Facebook, Twitter, and Spotify — lays out what modern data leadership really requires.

    We explore the evolving CDO scope: data strategy, governance, and AI; balancing vision versus tactics; and future-proofing data collection for tomorrow’s products. Marco breaks down organizational design and delegation — when to hire multiple VPs, how CDO responsibilities differ from VP, CTO and CPO roles, and how to structure reporting lines. Practical topics include working backwards from goals to data platform and machine learning investment, measuring progress with meaningful KPIs, time management and productivity for senior data leaders, and managing distributed teams.

    Listeners will walk away with concrete frameworks for data strategy, org design, and building a data-driven culture, plus career guidance for aspiring CDOs on technical breadth, soft skills, interviewing, and overcoming resistance with evidence-based persuasion. Ideal for data leaders and executives shaping data strategy, governance, and AI roadmaps." topics: - data strategy - data governance diff --git a/_podcast/cloud-data-governance.md b/_podcast/cloud-data-governance.md index c41b20a3..400299f3 100644 --- a/_podcast/cloud-data-governance.md +++ b/_podcast/cloud-data-governance.md @@ -1,6 +1,6 @@ --- -title: 'How to Build Data Governance in the Cloud: Classification, Catalogs, Policies & ROI' -short: Data Governance +title: "How to Build Data Governance in the Cloud: Classification, Catalogs, Policies & ROI" +short: "Data Governance" season: 3 episode: 10 guests: @@ -16,8 +16,8 @@ links: spotify: https://open.spotify.com/episode/2zaLMrgbIgVkVEWY09b1Wn apple: https://podcasts.apple.com/us/podcast/data-governance-jessi-ashdown-uri-gilad/id1541710331?i=1000525176805 -description: 'Learn data governance in the cloud: build data classification, catalogs & policies, automate tagging, add stewards and measure ROI for trusted, compliant data' -intro: 'How do you build data governance in the cloud that enables access, meets regulation, and demonstrates ROI? In this episode, Jessi Ashdown, Senior UX Researcher for Google Cloud, and Uri Gilad, Product Manager for Data Governance at Google Cloud, walk through practical approaches to data governance in the cloud—grounded in real user research and product experience.

    They define governance beyond security and PII, explain how GDPR and high-profile events like Cambridge Analytica accelerated adoption, and outline the core components: people, processes, and tools. Key topics include data classification and taxonomy, building scalable data catalogs versus spreadsheets, policy design (retention, freshness, purpose-based access), enforcement models, and access workflows. They cover roles such as data stewards and producers, data quality signals, automation (tagging and requests), and what to measure for ROI—catalog metrics, cost versus usage, and compliance value. You’ll also hear an MVP strategy for minimum viable governance and what to include in a catalog (technical metadata, lineage, business glossary).

    Listen to gain actionable steps to scope a cloud data governance program, prioritize by the “why,” and implement classification, catalogs, and policies that balance control and democratized access.' +description: "Learn data governance in the cloud: build data classification, catalogs & policies, automate tagging, add stewards and measure ROI for trusted, compliant data" +intro: "How do you build data governance in the cloud that enables access, meets regulation, and demonstrates ROI? In this episode, Jessi Ashdown, Senior UX Researcher for Google Cloud, and Uri Gilad, Product Manager for Data Governance at Google Cloud, walk through practical approaches to data governance in the cloud—grounded in real user research and product experience.

    They define governance beyond security and PII, explain how GDPR and high-profile events like Cambridge Analytica accelerated adoption, and outline the core components: people, processes, and tools. Key topics include data classification and taxonomy, building scalable data catalogs versus spreadsheets, policy design (retention, freshness, purpose-based access), enforcement models, and access workflows. They cover roles such as data stewards and producers, data quality signals, automation (tagging and requests), and what to measure for ROI—catalog metrics, cost versus usage, and compliance value. You’ll also hear an MVP strategy for minimum viable governance and what to include in a catalog (technical metadata, lineage, business glossary).

    Listen to gain actionable steps to scope a cloud data governance program, prioritize by the “why,” and implement classification, catalogs, and policies that balance control and democratized access." topics: - data governance - data compliance diff --git a/_podcast/community-building-and-teaching-in-ai-tech.md b/_podcast/community-building-and-teaching-in-ai-tech.md index 7f25df9f..dfc632fc 100644 --- a/_podcast/community-building-and-teaching-in-ai-tech.md +++ b/_podcast/community-building-and-teaching-in-ai-tech.md @@ -1,7 +1,6 @@ --- -title: 'Community Building and Teaching in AI & Tech: Project-to-Course Model for - AI Education' -short: Community Building and Teaching in AI & Tech +title: "Community Building and Teaching in AI & Tech: Project-to-Course Model for AI Education" +short: "Community Building and Teaching in AI & Tech" season: 18 episode: 5 guests: @@ -15,24 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/community-building-and-teaching-in-ai-tech-erum-afzal/id1541710331?i=1000655187649 spotify: https://open.spotify.com/episode/4iAvz4Qu0l28fxXvaHdAPj?si=7MdKKu1fTrqxIGPQBT61Ag youtube: https://www.youtube.com/watch?v=7SLd5V7z3xQ -description: Discover Omdena's project-to-course model for AI education and community - building—master curriculum, instructor pipeline, and pathways to join real AI projects. -intro: 'How can communities turn real-world AI projects into repeatable courses that - scale learning and careers? In this episode Erum Afzal — lead ML engineer, PhD researcher - in AI for teacher training, and head of Omdena Academy — walks through a project-to-course - model for AI education rooted in community collaboration.

    We cover Omdena’s - evolution from global, problem-focused projects into structured courses, the design - of foundational data science curricula (Python, Pandas, NumPy, NLP), and the practical - steps for course development: instructor application, content review, delivery, - and engagement strategies (live sessions, selection, graduation). Erum explains - the instructor pipeline and open applications, access pathways for learners, the - academy’s free-course business model with organizational partnerships, and options - for monetization or volunteer teaching.

    Listeners will get concrete guidance - on curriculum tiers (basic to advanced), community growth tactics (start small, - empower sub-communities), ethical concerns around hiring integrity and responsible - ChatGPT use, and where to apply to teach (Omdena.com/Omdena-Academy). This episode - is for educators, community builders, and early-career practitioners who want actionable - models for teaching, curriculum design, and building inclusive AI learning communities.' +description: "Discover Omdena's project-to-course model for AI education and community building—master curriculum, instructor pipeline, and pathways to join real AI projects." +topics: +- AI +- NLP +- data science +- career growth +- leadership +- community building +- teaching +intro: "How can communities turn real-world AI projects into repeatable courses that scale learning and careers? In this episode Erum Afzal — lead ML engineer, PhD researcher in AI for teacher training, and head of Omdena Academy — walks through a project-to-course model for AI education rooted in community collaboration.

    We cover Omdena’s evolution from global, problem-focused projects into structured courses, the design of foundational data science curricula (Python, Pandas, NumPy, NLP), and the practical steps for course development: instructor application, content review, delivery, and engagement strategies (live sessions, selection, graduation). Erum explains the instructor pipeline and open applications, access pathways for learners, the academy’s free-course business model with organizational partnerships, and options for monetization or volunteer teaching.

    Listeners will get concrete guidance on curriculum tiers (basic to advanced), community growth tactics (start small, empower sub-communities), ethical concerns around hiring integrity and responsible ChatGPT use, and where to apply to teach (Omdena.com/Omdena-Academy). This episode is for educators, community builders, and early-career practitioners who want actionable models for teaching, curriculum design, and building inclusive AI learning communities." dateadded: 2024-05-12 duration: PT00H57M03S quotableClips: diff --git a/_podcast/crisp-dm.md b/_podcast/crisp-dm.md index 6a3c6db5..d359eb7f 100644 --- a/_podcast/crisp-dm.md +++ b/_podcast/crisp-dm.md @@ -1,6 +1,6 @@ --- -title: 'CRISP-DM Methodology for Data Science Projects: Business Understanding, Data Preparation, Modeling, Evaluation & Deployment' -short: Processes in a Data Science Project +title: "CRISP-DM Methodology for Data Science Projects: Business Understanding, Data Preparation, Modeling, Evaluation & Deployment" +short: "Processes in a Data Science Project" season: 1 episode: 2 guests: @@ -15,7 +15,7 @@ links: spotify: TODO apple: TODO -description: Learn the CRISP-DM methodology for managing data science projects. Step-by-step guide covering business understanding, data preparation, modeling, evaluation, and deployment +description: "Learn the CRISP-DM methodology for managing data science projects. Step-by-step guide covering business understanding, data preparation, modeling, evaluation, and deployment" topics: - data science - machine learning diff --git a/_podcast/data-centric.md b/_podcast/data-centric-ai.md similarity index 97% rename from _podcast/data-centric.md rename to _podcast/data-centric-ai.md index 4a5aa45a..8dedec92 100644 --- a/_podcast/data-centric.md +++ b/_podcast/data-centric-ai.md @@ -1,6 +1,6 @@ --- -title: 'Data-Centric AI: Improve Label Quality & Edit Datasets to Boost Model Performance' -short: Data-Centric AI +title: "Data-Centric AI: Improve Label Quality & Edit Datasets to Boost Model Performance" +short: "Data-Centric AI" season: 12 episode: 3 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/6q1yago5iyMt8OmCX1abG3?si=-OaRAwjaRfOfyQ7_QZEbBw youtube: https://www.youtube.com/watch?v=t3HDdVWQzNM -description: Discover Data-Centric AI tactics to improve label quality and edit datasets to boost model performance, practical workflows, relabeling, augmentation tips -intro: How much can improving label quality and editing your dataset actually boost model performance? In this episode, Marysia Winkels — Lead Data Scientist at GoDataDriven with a Master’s in Artificial Intelligence and a focus on data-efficient deep learning, and co-organizer of PyData Amsterdam/Global — walks through a practical, data-centric approach to that question.

    We cover why shifting from “more data” to “better data” matters, especially for transfer learning and fine-tuning, and contrast model-centric vs data-centric workflows. Marysia breaks down a data-centric competition that used a fixed ResNet with an editable dataset, strategies for targeted relabeling using model confidence and embeddings, lightweight data versioning and low-tech tooling (Google Sheets + scripts), and when to use synthetic augmentation versus manual fixes. You’ll also hear about validation-split integrity, detecting dataset gaps with UMAP, acceptance criteria for real-world contexts, shadow-mode rollouts, and the trade-offs of automating dataset repairs.

    Listen to learn concrete workflows and heuristics to prioritize impactful data fixes, improve label quality, and make dataset edits that measurably increase model performance. Find additional resources at marysia.nl and PyData +description: "Discover Data-Centric AI tactics to improve label quality and edit datasets to boost model performance, practical workflows, relabeling, augmentation tips" +topics: +- machine learning +- data science +- MLOps +- tools +- data governance +intro: "How much can improving label quality and editing your dataset actually boost model performance? In this episode, Marysia Winkels — Lead Data Scientist at GoDataDriven with a Master’s in Artificial Intelligence and a focus on data-efficient deep learning, and co-organizer of PyData Amsterdam/Global — walks through a practical, data-centric approach to that question.

    We cover why shifting from “more data” to “better data” matters, especially for transfer learning and fine-tuning, and contrast model-centric vs data-centric workflows. Marysia breaks down a data-centric competition that used a fixed ResNet with an editable dataset, strategies for targeted relabeling using model confidence and embeddings, lightweight data versioning and low-tech tooling (Google Sheets + scripts), and when to use synthetic augmentation versus manual fixes. You’ll also hear about validation-split integrity, detecting dataset gaps with UMAP, acceptance criteria for real-world contexts, shadow-mode rollouts, and the trade-offs of automating dataset repairs.

    Listen to learn concrete workflows and heuristics to prioritize impactful data fixes, improve label quality, and make dataset edits that measurably increase model performance. Find additional resources at marysia.nl and PyData" dateadded: 2023-01-07 duration: PT00H57M34S diff --git a/_podcast/data-consulting-business-pricing-and-client-acquisition.md b/_podcast/data-consulting-business-pricing-and-client-acquisition.md index f7de5e57..c1350592 100644 --- a/_podcast/data-consulting-business-pricing-and-client-acquisition.md +++ b/_podcast/data-consulting-business-pricing-and-client-acquisition.md @@ -1,6 +1,6 @@ --- -title: 'Build a Data Consulting Business: Customer Validation, User Interviews & Pricing Strategy' -short: Starting a Consultancy in the Data Space +title: "Build a Data Consulting Business: Customer Validation, User Interviews & Pricing Strategy" +short: "Starting a Consultancy in the Data Space" season: 13 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2Y0mKRHq6wVfr25HJ5Ji3Y?si=kUkmMW2AT6-FeRd6SpXWlg youtube: https://www.youtube.com/watch?v=rh_pE35m3vE -description: 'Learn data consulting: customer validation, user interviews and pricing strategy to validate ideas, win clients, set value-based rates & scale your practice.' -intro: How do you validate customers, run effective user interviews, and set pricing to build a sustainable data consulting business? In this episode, Aleksander Kruszelnicki — ex-Delivery Hero product manager turned co-founder of leukos, a boutique data analytics agency in Berlin — walks through the practical steps he took shifting from product ideas to a consulting model after early startup failures.

    We cover market and technical limits of “data stack as a service,” first-customer stories, customer validation techniques for pre-product ideas, and a repeatable user interview strategy (questions, cadence, roles, and note-taking). Aleksander explains why value often sits in data modeling over infrastructure, the decision to pivot to hands-on consulting, and team composition (PM + engineer). You’ll also hear tactical guidance on client acquisition (network-first outreach), positioning for European customers and VCs, messaging examples for revenue/marketing optimization, marketing mix (networking, content, LinkedIn), pricing frameworks and rate setting, contract models (day rates vs project pricing), and practical legal/admin steps for registering a consultancy in Germany.

    Listen to get actionable methods for customer validation, user interviews, pricing strategy, and building a data consulting business that captures real client value +description: "Learn data consulting: customer validation, user interviews and pricing strategy to validate ideas, win clients, set value-based rates & scale your practice." +intro: "How do you validate customers, run effective user interviews, and set pricing to build a sustainable data consulting business? In this episode, Aleksander Kruszelnicki — ex-Delivery Hero product manager turned co-founder of leukos, a boutique data analytics agency in Berlin — walks through the practical steps he took shifting from product ideas to a consulting model after early startup failures.

    We cover market and technical limits of “data stack as a service,” first-customer stories, customer validation techniques for pre-product ideas, and a repeatable user interview strategy (questions, cadence, roles, and note-taking). Aleksander explains why value often sits in data modeling over infrastructure, the decision to pivot to hands-on consulting, and team composition (PM + engineer). You’ll also hear tactical guidance on client acquisition (network-first outreach), positioning for European customers and VCs, messaging examples for revenue/marketing optimization, marketing mix (networking, content, LinkedIn), pricing frameworks and rate setting, contract models (day rates vs project pricing), and practical legal/admin steps for registering a consultancy in Germany.

    Listen to get actionable methods for customer validation, user interviews, pricing strategy, and building a data consulting business that captures real client value" topics: - consulting - entrepreneurship diff --git a/_podcast/data-engineering-career-path-and-skills.md b/_podcast/data-engineering-career-path-and-skills.md index 4f8e6a3c..88e74269 100644 --- a/_podcast/data-engineering-career-path-and-skills.md +++ b/_podcast/data-engineering-career-path-and-skills.md @@ -1,6 +1,6 @@ --- -title: 'Build a Data Engineering Career: Bootcamp Curriculum, SQL Mastery & Interview Prep' -short: Teaching Data Engineers +title: "Build a Data Engineering Career: Bootcamp Curriculum, SQL Mastery & Interview Prep" +short: "Teaching Data Engineers" season: 8 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0Fo6Y62xaWPy7C24eZKfJw?si=lnjgqHUiRdGiZNxE76QMYQ youtube: https://www.youtube.com/watch?v=dFo10l8B6Go -description: 'Master data engineering and SQL with a bootcamp curriculum: employer-validated projects, cloud basics, SQL window functions & interview prep for junior roles.' -intro: How do you build a data engineering career from zero — what should you learn, how do you master SQL, and how do you pass the interviews? In this episode, Jeff Katz — former lawyer turned developer, founder of Jigsaw Labs, and current ML engineer at AppFolio — walks through practical paths into data engineering and how to design bootcamp curriculum that actually leads to hires.

    We cover curriculum development and pedagogy (active learning, conceptual-first lessons, reinforcement cycles), core skills to prioritize (Python, SQL, cloud fundamentals), and why junior-focused programs drop Spark/Kafka/Kubernetes early. Jeff details analytics engineering tools (DBT, Snowflake, Mode, Fivetran), backend and ETL practices (Flask, codebase navigation, testing), data modeling (OLTP vs OLAP), and SQL mastery (window functions, medium LeetCode problems). You’ll also hear about admissions and screening, mid-program internships for real experience, interview stages (screening calls, SQL tests, on-site expectations), and tactics for transitioning from data analyst to data engineer.

    Listen for actionable guidance on building a bootcamp-ready portfolio, targeted interview prep, and the concrete curriculum choices that employers value in data engineering hires +description: "Master data engineering and SQL with a bootcamp curriculum: employer-validated projects, cloud basics, SQL window functions & interview prep for junior roles." +intro: "How do you build a data engineering career from zero — what should you learn, how do you master SQL, and how do you pass the interviews? In this episode, Jeff Katz — former lawyer turned developer, founder of Jigsaw Labs, and current ML engineer at AppFolio — walks through practical paths into data engineering and how to design bootcamp curriculum that actually leads to hires.

    We cover curriculum development and pedagogy (active learning, conceptual-first lessons, reinforcement cycles), core skills to prioritize (Python, SQL, cloud fundamentals), and why junior-focused programs drop Spark/Kafka/Kubernetes early. Jeff details analytics engineering tools (DBT, Snowflake, Mode, Fivetran), backend and ETL practices (Flask, codebase navigation, testing), data modeling (OLTP vs OLAP), and SQL mastery (window functions, medium LeetCode problems). You’ll also hear about admissions and screening, mid-program internships for real experience, interview stages (screening calls, SQL tests, on-site expectations), and tactics for transitioning from data analyst to data engineer.

    Listen for actionable guidance on building a bootcamp-ready portfolio, targeted interview prep, and the concrete curriculum choices that employers value in data engineering hires" topics: - data engineering - education diff --git a/_podcast/data-engineering-leadership-and-modern-data-platforms.md b/_podcast/data-engineering-leadership-and-modern-data-platforms.md index 9dd76f72..d4131efc 100644 --- a/_podcast/data-engineering-leadership-and-modern-data-platforms.md +++ b/_podcast/data-engineering-leadership-and-modern-data-platforms.md @@ -1,6 +1,6 @@ --- -title: 'Data Engineering Leadership: Scale ETL to ELT, Build Robust Data Platforms & Teams' -short: Becoming a Data Engineering Manager +title: "Data Engineering Leadership: Scale ETL to ELT, Build Robust Data Platforms & Teams" +short: "Becoming a Data Engineering Manager" season: 7 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4nWP18woLTt4a7Wm0CQwhM youtube: https://www.youtube.com/watch?v=FljnbUQ796w -description: Learn to scale ETL to ELT and build resilient data platforms—gain leadership skills, stakeholder management, data quality metrics and hiring tips -intro: 'How do you lead a data engineering team to scale ETL into ELT, build a robust data platform, and maintain data quality as you grow? In this episode, Rahul Jain — a data engineering manager at Siemens with 12+ years in data and three years in management — walks through that transition from ETL developer to IoT data platform lead and what leadership looks like in practice.

    We cover practical topics like migrating ETL to ELT architectures, data lake and data lineage design, and end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul discusses stakeholder management, prioritization, hands-on technical credibility, balancing individual contributor work with people management, and onboarding strategies to build trust and delegate effectively. He shares approaches for measuring success (data culture, consumers served, data quality), detecting data reconciliation issues, GDPR tactics like dynamic data masking and role-based access, and how to evaluate new tools (example: Prefect). Hiring, interview screening, and essential skills (SQL, Python, CI/CD, cloud) are also explored.

    Listen to gain concrete leadership and technical guidance for scaling data platforms, improving throughput, and enabling your team to deliver reliable, compliant data products.' +description: "Learn to scale ETL to ELT and build resilient data platforms—gain leadership skills, stakeholder management, data quality metrics and hiring tips" +intro: "How do you lead a data engineering team to scale ETL into ELT, build a robust data platform, and maintain data quality as you grow? In this episode, Rahul Jain — a data engineering manager at Siemens with 12+ years in data and three years in management — walks through that transition from ETL developer to IoT data platform lead and what leadership looks like in practice.

    We cover practical topics like migrating ETL to ELT architectures, data lake and data lineage design, and end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul discusses stakeholder management, prioritization, hands-on technical credibility, balancing individual contributor work with people management, and onboarding strategies to build trust and delegate effectively. He shares approaches for measuring success (data culture, consumers served, data quality), detecting data reconciliation issues, GDPR tactics like dynamic data masking and role-based access, and how to evaluate new tools (example: Prefect). Hiring, interview screening, and essential skills (SQL, Python, CI/CD, cloud) are also explored.

    Listen to gain concrete leadership and technical guidance for scaling data platforms, improving throughput, and enabling your team to deliver reliable, compliant data products." topics: - data engineering - career growth diff --git a/_podcast/data-engineering-tools-modern-data-stack.md b/_podcast/data-engineering-tools-modern-data-stack.md index aea7fc15..671da9e7 100644 --- a/_podcast/data-engineering-tools-modern-data-stack.md +++ b/_podcast/data-engineering-tools-modern-data-stack.md @@ -1,6 +1,6 @@ --- -title: 'ETL vs ELT & Data Lake vs Warehouse: Airbyte, dbt, CDC for Modern Data Engineering' -short: Making Sense of Data Engineering Acronyms and Buzzwords +title: "ETL vs ELT & Data Lake vs Warehouse: Airbyte, dbt, CDC for Modern Data Engineering" +short: "Making Sense of Data Engineering Acronyms and Buzzwords" season: 5 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1AvtwdcAXGGjdJ7fl0Hsuw apple: https://podcasts.apple.com/us/podcast/making-sense-of-data-engineering-acronyms-and/id1541710331?i=1000534990760 -description: Discover ETL vs ELT, data lake vs data warehouse with Airbyte and dbt—learn CDC, orchestration, and governance to design reliable, fast modern data pipelines -intro: How do you decide between ETL and ELT, or when to keep a data lake versus a warehouse—and where do tools like Airbyte, dbt, and CDC fit into a modern data stack? In this episode, Natalie Kwong, Growth Product Manager at Airbyte with prior analytics and ops roles at Harness, KeepTruckin, and AppDynamics, pulls from hands-on experience scaling analytics teams and systems to unpack these trade-offs.

    We break down core concepts—ETL (traditional extract-transform-load) vs ELT (load then transform), the rise of the analytics engineer, and why ELT favors analyst autonomy with dbt. Natalie explains Airbyte's role as a connector/ingestion layer, CDC for row-level change syncing, and orchestration with Airflow. We also cover data lake vs data warehouse purposes, preventing data swamps through governance, schema evolution, operational reverse data flows, and when hybrid architectures make sense.

    If you're designing a modern data platform or refining pipelines, this episode offers practical guidance on ETL vs ELT decisions, choosing lakes vs warehouses, leveraging Airbyte and dbt, and operational considerations like data quality, orchestration, and cleanup practices +description: "Discover ETL vs ELT, data lake vs data warehouse with Airbyte and dbt—learn CDC, orchestration, and governance to design reliable, fast modern data pipelines" +intro: "How do you decide between ETL and ELT, or when to keep a data lake versus a warehouse—and where do tools like Airbyte, dbt, and CDC fit into a modern data stack? In this episode, Natalie Kwong, Growth Product Manager at Airbyte with prior analytics and ops roles at Harness, KeepTruckin, and AppDynamics, pulls from hands-on experience scaling analytics teams and systems to unpack these trade-offs.

    We break down core concepts—ETL (traditional extract-transform-load) vs ELT (load then transform), the rise of the analytics engineer, and why ELT favors analyst autonomy with dbt. Natalie explains Airbyte's role as a connector/ingestion layer, CDC for row-level change syncing, and orchestration with Airflow. We also cover data lake vs data warehouse purposes, preventing data swamps through governance, schema evolution, operational reverse data flows, and when hybrid architectures make sense.

    If you're designing a modern data platform or refining pipelines, this episode offers practical guidance on ETL vs ELT decisions, choosing lakes vs warehouses, leveraging Airbyte and dbt, and operational considerations like data quality, orchestration, and cleanup practices" topics: - data engineering - tools diff --git a/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md index c7f4a0c5..1ccc2b31 100644 --- a/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md +++ b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md @@ -1,6 +1,6 @@ --- title: "Building a Sustainable Data Freelancing Career: Market Validation, Client Acquisition & Strategic Positioning" -short: Taking your Freelance Career to the Next Level +short: "Taking your Freelance Career to the Next Level" season: 20 episode: 9 guests: @@ -14,25 +14,8 @@ links: apple: https://podcasts.apple.com/us/podcast/can-you-quit-your-job-and-still-succeed-as-a-data-freelancer/id1541710331?i=1000718997257 spotify: https://open.spotify.com/episode/3BknrKqhLggx1G5ZbrfgFc youtube: https://www.youtube.com/watch?v=S93V8RgwBig -description: Master data freelancer tactics, pricing strategies and AI tools to land - clients, price services confidently, and boost productivity for higher income -intro: How do you move from employed data pro to a sustainable data freelancer who - consistently lands clients, prices services well, and uses AI to boost productivity? - In this episode, Dimitri Visnadi — an independent data consultant focused on data - strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles - at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer - Science from UCL — walks through a practical playbook for data freelancers.

    - Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus - problem-solving, and how to validate freelance viability with financial targets. - He explains how to land initial clients through recruiters and LinkedIn, the idea - behind a data-freelancer job board, market-driven specialization, and insights on - rates, top skills and data management. You’ll hear about scaling choices (lifestyle - business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course - and community approaches for branding and marketing, subscription models and client - relationship management, high-impact small analyses, pricing strategies (hourly - vs packages), and transition planning.

    Listen to get concrete guidance - on landing clients, setting prices, structuring offers, and using AI tools to increase - productivity as a freelance data consultant +description: "Master data freelancer tactics, pricing strategies and AI tools to land clients, price services confidently, and boost productivity for higher income" +intro: "How do you move from employed data pro to a sustainable data freelancer who consistently lands clients, prices services well, and uses AI to boost productivity? In this episode, Dimitri Visnadi — an independent data consultant focused on data strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer Science from UCL — walks through a practical playbook for data freelancers.

    Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus problem-solving, and how to validate freelance viability with financial targets. He explains how to land initial clients through recruiters and LinkedIn, the idea behind a data-freelancer job board, market-driven specialization, and insights on rates, top skills and data management. You’ll hear about scaling choices (lifestyle business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course and community approaches for branding and marketing, subscription models and client relationship management, high-impact small analyses, pricing strategies (hourly vs packages), and transition planning.

    Listen to get concrete guidance on landing clients, setting prices, structuring offers, and using AI tools to increase productivity as a freelance data consultant" topics: - Freelance - Career Growth @@ -73,7 +56,7 @@ quotableClips: startOffset: 1431 url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1431 endOffset: 1508 -- name: 'Job Board Insights: Rates, Top Skills & "Data Management"' +- name: 'Job Board Insights: Rates, Top Skills & "Data Management" startOffset: 1508 url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1508 endOffset: 1968 @@ -315,8 +298,8 @@ transcript: sec: 1469 time: '24:29' who: Alexey -- header: 'Job Board Insights: Rates, Top Skills & "Data Management"' -- header: 'Job Board Insights: Rates, Top Skills & "Data Management"' +- header: 'Job Board Insights: Rates, Top Skills & "Data Management" +- header: 'Job Board Insights: Rates, Top Skills & "Data Management" - line: It really depends on the skills you have. If you're a data analyst, you likely won't become a software engineer overnight—it takes time to learn new skills. I could pull up numbers on how many software engineering roles get filtered out, diff --git a/_podcast/data-governance-data-access-management.md b/_podcast/data-governance-data-access-management.md index f116b3a0..3740b6fe 100644 --- a/_podcast/data-governance-data-access-management.md +++ b/_podcast/data-governance-data-access-management.md @@ -1,6 +1,6 @@ --- -title: 'Data Governance & Data Access Management: Access Controls, Data Catalogs & Access-as-Code' -short: Data Governance & Data Access Management +title: "Data Governance & Data Access Management: Access Controls, Data Catalogs & Access-as-Code" +short: "Data Governance & Data Access Management" season: 14 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5PDgK1FsGNtKAAyiXOppRs?si=QZDP8k38Q0e4LaZtl4lCMA youtube: https://www.youtube.com/watch?v=IiPOIiUy5b4 -description: Master Data Access Management with data catalog, access controls & access-as-code to stop privilege creep, speed investigations and ensure compliance -intro: 'How do you scale data access management—from access controls and data catalogs to access-as-code—without slowing innovation? In this episode, Bart Vandekerckhove, co-founder and CEO at Raito and former PM of Privacy at Collibra, walks through practical approaches born from consulting with banks (BCBS 239) and tackling early data governance pain.

    We explore what effective data governance looks like for building trust in data, the differences between data catalogs, dictionaries and lineage, and how cloud consolidation and Chinese walls shape access management. Bart covers ownership models (data teams, governance teams, data mesh), common skill gaps for data engineers, and core processes: access requests, approvals, reviews and revocation. You’ll hear actionable tactics for preventing privilege creep—time-bound access, revocation workflows—and guidance on GDPR, privacy vs security roles, and debugging with temporary access.

    Later segments dive into DataOps patterns (active metadata, automated tagging), avoiding role explosion, and the rise of access-as-code with Terraform and IAM. Listeners will gain a clear, incremental strategy for implementing access controls, leveraging data catalogs, and evaluating build vs buy or open source options to scale data access management.' +description: "Master Data Access Management with data catalog, access controls & access-as-code to stop privilege creep, speed investigations and ensure compliance" +intro: "How do you scale data access management—from access controls and data catalogs to access-as-code—without slowing innovation? In this episode, Bart Vandekerckhove, co-founder and CEO at Raito and former PM of Privacy at Collibra, walks through practical approaches born from consulting with banks (BCBS 239) and tackling early data governance pain.

    We explore what effective data governance looks like for building trust in data, the differences between data catalogs, dictionaries and lineage, and how cloud consolidation and Chinese walls shape access management. Bart covers ownership models (data teams, governance teams, data mesh), common skill gaps for data engineers, and core processes: access requests, approvals, reviews and revocation. You’ll hear actionable tactics for preventing privilege creep—time-bound access, revocation workflows—and guidance on GDPR, privacy vs security roles, and debugging with temporary access.

    Later segments dive into DataOps patterns (active metadata, automated tagging), avoiding role explosion, and the rise of access-as-code with Terraform and IAM. Listeners will gain a clear, incremental strategy for implementing access controls, leveraging data catalogs, and evaluating build vs buy or open source options to scale data access management." topics: - data governance dateadded: 2023-06-03 diff --git a/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md index 181e0325..ef379075 100644 --- a/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md +++ b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md @@ -1,6 +1,6 @@ --- -title: 'Ace Data Interviews: Behavioral STARs, Case Strategy, Portfolios & Cold Emails' -short: Ace Non-Technical Data Science Interviews +title: "Ace Data Interviews: Behavioral STARs, Case Strategy, Portfolios & Cold Emails" +short: "Ace Non-Technical Data Science Interviews" season: 6 episode: 2 guests: @@ -15,7 +15,13 @@ links: spotify: https://open.spotify.com/episode/7tO8GmqAcFUUk4fLqxEXy1 apple: https://podcasts.apple.com/us/podcast/ace-non-technical-data-science-interviews-nick-singh/id1541710331?i=1000541631687 -description: 'Master behavioral interviews & prep to break into data roles: build an impact portfolio, use STAR stories, nail case interviews and cold emails.' +description: "Master behavioral interviews & prep to break into data roles: build an impact portfolio, use STAR stories, nail case interviews and cold emails." +topics: +- data science +- machine learning +- MLOps +- product management +- job search dateadded: 2021-11-12 duration: PT01H01M38S diff --git a/_podcast/data-journalism-python-visualization-storytelling.md b/_podcast/data-journalism-python-visualization-storytelling.md index b51074f9..87c5d052 100644 --- a/_podcast/data-journalism-python-visualization-storytelling.md +++ b/_podcast/data-journalism-python-visualization-storytelling.md @@ -1,6 +1,6 @@ --- -title: 'Practical Data Journalism: Sourcing, Storytelling, Visualization & Tools (Python, Tableau)' -short: Technical Writing and Data Journalism +title: "Practical Data Journalism: Sourcing, Storytelling, Visualization & Tools (Python, Tableau)" +short: "Technical Writing and Data Journalism" season: 11 episode: 8 guests: @@ -15,7 +15,7 @@ links: spotify: https://open.spotify.com/episode/38b2Y9KgxSFlIHPZ3jqheK?si=SPiURO1bTamVKrKV_laVDQ youtube: https://www.youtube.com/watch?v=uO_lk12q02A -description: 'Discover data journalism: sourcing, storytelling & visualization with Python and Tableau—learn tools, workflows and publishing tips to craft compelling stories.' +description: "Discover data journalism: sourcing, storytelling & visualization with Python and Tableau—learn tools, workflows and publishing tips to craft compelling stories." intro: "How do you transform raw data into compelling, trustworthy journalism that readers can understand and act upon? In this episode, Angelica Lo Duca—researcher at the Institute of Informatics and Telematics (CNR) and Data Journalism professor at the University of Pisa—shares practical frameworks for data journalism covering sourcing, storytelling, visualization, and essential tools like Python and Tableau.

    Drawing from her journey through cryptography, web development, and data science, Angelica tackles real-world challenges: finding reliable small datasets on the web, working with query engines like Presto and Trino, and learning from investigative work like Washington Post projects. She distinguishes data journalism from data science, reveals teaching approaches for digital humanities students, and breaks down an effective writer's workflow: problem identification → solution development → clear results presentation, complete with code repositories and step-by-step clarity.

    You'll gain concrete strategies for converting dense reports and survey PDFs into engaging narratives, visualization best practices (one concept per chart, choosing tables over confusing pie charts), tool selection guidance between Python scripting and Tableau, plus curated learning resources. Whether you're a journalist exploring data tools, a data professional interested in storytelling, or an educator teaching interdisciplinary skills, this episode delivers actionable methods for reliable sourcing, effective narrative construction, and clear data visualization that makes complex information accessible and impactful." topics: - data journalism diff --git a/_podcast/data-leadership-coaching.md b/_podcast/data-leadership-coaching.md index 9538bcd1..3a733c96 100644 --- a/_podcast/data-leadership-coaching.md +++ b/_podcast/data-leadership-coaching.md @@ -1,6 +1,6 @@ --- -title: 'Data Leadership Coaching: Transition to Manager, Stakeholder Skills and Team Impact' -short: Inclusive Data Leadership Coaching +title: "Data Leadership Coaching: Transition to Manager, Stakeholder Skills and Team Impact" +short: "Inclusive Data Leadership Coaching" season: 18 episode: 1 guests: @@ -12,25 +12,14 @@ links: apple: https://podcasts.apple.com/us/podcast/inclusive-data-leadership-coaching-tereza-iofciu/id1541710331?i=1000650865043 spotify: https://open.spotify.com/episode/3zVzlQ0NmAVCtaFQXbqvHE?si=sSZhU-KXRamv2x5YZCDxAg youtube: https://www.youtube.com/watch?v=Z4vOTgzLkJQ -description: 'Master data leadership coaching: transition to manager, build stakeholder - skills, and boost team impact with feedback, visibility, and influence strategies.' -intro: How do you move from a strong individual contributor into a data leader who - can influence stakeholders, grow team impact, and build inclusive practices? In - this episode Tereza Iofciu—data science manager, data scientist, data engineer, - product manager, coach and community organizer—walks through her transition from - a PhD in computer science to leading teams and running data leadership coaching. -

    We cover the practical challenges of the manager transition, experiments - that shaped her coaching approach, and ways to scale manager bandwidth using the - “pizza” span-of-control metaphor. Tereza breaks down feedback skills, psychological - safety, and routines for team feedback training, plus leadership learning through - workshops and frameworks. You’ll hear actionable guidance on increasing impact and - promotions, making foundational data work visible with product mindsets and KPIs, - and influencing without authority by framing projects to stakeholders’ priorities. - The conversation also explores cross-functional and inclusive leadership, self-promotion - versus bragging, and concrete coaching formats like one-shot sessions, CV reviews, - and Calendly-driven delivery.

    Listen if you want practical data leadership - coaching on managing the IC-to-manager shift, stakeholder skills, and boosting your - team’s measurable impact. +description: "Master data leadership coaching: transition to manager, build stakeholder skills, and boost team impact with feedback, visibility, and influence strategies." +topics: +- leadership +- career transition +- communication +- team building +- data strategy +intro: "How do you move from a strong individual contributor into a data leader who can influence stakeholders, grow team impact, and build inclusive practices? In this episode Tereza Iofciu—data science manager, data scientist, data engineer, product manager, coach and community organizer—walks through her transition from a PhD in computer science to leading teams and running data leadership coaching.

    We cover the practical challenges of the manager transition, experiments that shaped her coaching approach, and ways to scale manager bandwidth using the “pizza” span-of-control metaphor. Tereza breaks down feedback skills, psychological safety, and routines for team feedback training, plus leadership learning through workshops and frameworks. You’ll hear actionable guidance on increasing impact and promotions, making foundational data work visible with product mindsets and KPIs, and influencing without authority by framing projects to stakeholders’ priorities. The conversation also explores cross-functional and inclusive leadership, self-promotion versus bragging, and concrete coaching formats like one-shot sessions, CV reviews, and Calendly-driven delivery.

    Listen if you want practical data leadership coaching on managing the IC-to-manager shift, stakeholder skills, and boosting your team’s measurable impact." dateadded: 2024-03-31 duration: PT00H56M35S quotableClips: diff --git a/_podcast/data-led-growth-event-tracking-and-reverse-etl.md b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md index eda167d6..b9c8c3fa 100644 --- a/_podcast/data-led-growth-event-tracking-and-reverse-etl.md +++ b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md @@ -1,6 +1,6 @@ --- -title: 'How to Build a Data-Led Growth Stack: Event Tracking, Tracking Plans & Reverse ETL' -short: Becoming a Data-led Professional +title: "How to Build a Data-Led Growth Stack: Event Tracking, Tracking Plans & Reverse ETL" +short: "Becoming a Data-led Professional" season: 3 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2hg3Gi3h5OfdedXENwZwnU apple: https://podcasts.apple.com/us/podcast/becoming-a-data-led-professional-arpit-choudhury/id1541710331?i=1000523422699 -description: Build a data-led growth stack with event tracking, tracking plans & Reverse ETL to activate product data for precise experimentation, personalization, and ops -intro: 'How do you design a data-led growth stack that reliably powers personalization, activation, and operational workflows? In this episode, Arpit Choudhury, founder of Data-led Academy, walks through the practical steps of building a data-led growth stack focused on event tracking, documented tracking plans, and reverse ETL.

    Arpit — who runs Data-led Academy to teach data skills for non-technical and technical teams alike — breaks down the full data flow: collection (client- vs server-side events), storage (warehouses like Snowflake, BigQuery, Redshift), transformation (DBT), analysis (product analytics), and activation (reverse ETL to support, sales, and engagement tools). He covers how to create tracking plans and instrument events (signup, project created, invite, invoice), common tooling (Segment, RudderStack, MetaRouter, Freshpaint, AVO, Iteratively, TrackPlan), and reverse ETL platforms (Census, Hightouch, Grouparoo). You’ll also hear trade-offs around CDPs versus warehouse-centric approaches, buy vs build decisions, and the team roles and documentation practices needed to democratize data.

    Listen to learn concrete patterns for event tracking, tracking-plan ownership, anomaly investigation, and activating product data to drive growth without sacrificing data quality.' +description: "Build a data-led growth stack with event tracking, tracking plans & Reverse ETL to activate product data for precise experimentation, personalization, and ops" +intro: "How do you design a data-led growth stack that reliably powers personalization, activation, and operational workflows? In this episode, Arpit Choudhury, founder of Data-led Academy, walks through the practical steps of building a data-led growth stack focused on event tracking, documented tracking plans, and reverse ETL.

    Arpit — who runs Data-led Academy to teach data skills for non-technical and technical teams alike — breaks down the full data flow: collection (client- vs server-side events), storage (warehouses like Snowflake, BigQuery, Redshift), transformation (DBT), analysis (product analytics), and activation (reverse ETL to support, sales, and engagement tools). He covers how to create tracking plans and instrument events (signup, project created, invite, invoice), common tooling (Segment, RudderStack, MetaRouter, Freshpaint, AVO, Iteratively, TrackPlan), and reverse ETL platforms (Census, Hightouch, Grouparoo). You’ll also hear trade-offs around CDPs versus warehouse-centric approaches, buy vs build decisions, and the team roles and documentation practices needed to democratize data.

    Listen to learn concrete patterns for event tracking, tracking-plan ownership, anomaly investigation, and activating product data to drive growth without sacrificing data quality." topics: - data engineering - tools diff --git a/_podcast/data-mesh-architecture-decentralized-data-products.md b/_podcast/data-mesh-architecture-decentralized-data-products.md index 4f77c1a0..31107916 100644 --- a/_podcast/data-mesh-architecture-decentralized-data-products.md +++ b/_podcast/data-mesh-architecture-decentralized-data-products.md @@ -1,6 +1,6 @@ --- -title: 'Data Mesh Implementation: Build Decentralized Data Products, Contracts & Federated Governance' -short: Data Mesh 101 +title: "Data Mesh Implementation: Build Decentralized Data Products, Contracts & Federated Governance" +short: "Data Mesh 101" season: 10 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5uX5sfRPvC9WAXOM9fRCup?si=FQYB7cpuSOyzq7022xU3Tg youtube: https://www.youtube.com/watch?v=346N_pCtYZU -description: Discover Data Mesh strategies, data contracts and federated governance to build decentralized data products, improve data quality, and scale adoption -intro: 'How do you scale data architecture so teams deliver value without centralized bottlenecks? In this episode, Zhamak Dehghani — director of technology at Thoughtworks and founder of the Data Mesh concept — walks through practical steps for Data Mesh implementation: building decentralized data products, defining data contracts, and establishing federated governance.

    We cover why enterprises face long pipelines to value and how a socio-technical, domain-oriented approach decouples pipelines with clear contracts and ownership. Zhamak explains the mesh-as-graph view, streaming examples of domain producers/consumers and schemas, and the maturity spectrum from tight warehouse schemas to loose coupling. You’ll hear about minimal guarantees and metadata for discoverability, decentralized interoperability (identity and auth), and how to define data product contracts (quality, SLAs, ownership).

    The episode also digs into self-serve data platforms, platform federation with shared standards, governance primitives such as retention and automated validation, and an adoption roadmap including assessment, pilots, and executive buy-in. Listeners will gain concrete guidance on applying Data Mesh principles, designing data products and contracts, and operationalizing federated governance in their organizations.' +description: "Discover Data Mesh strategies, data contracts and federated governance to build decentralized data products, improve data quality, and scale adoption" +intro: "How do you scale data architecture so teams deliver value without centralized bottlenecks? In this episode, Zhamak Dehghani — director of technology at Thoughtworks and founder of the Data Mesh concept — walks through practical steps for Data Mesh implementation: building decentralized data products, defining data contracts, and establishing federated governance.

    We cover why enterprises face long pipelines to value and how a socio-technical, domain-oriented approach decouples pipelines with clear contracts and ownership. Zhamak explains the mesh-as-graph view, streaming examples of domain producers/consumers and schemas, and the maturity spectrum from tight warehouse schemas to loose coupling. You’ll hear about minimal guarantees and metadata for discoverability, decentralized interoperability (identity and auth), and how to define data product contracts (quality, SLAs, ownership).

    The episode also digs into self-serve data platforms, platform federation with shared standards, governance primitives such as retention and automated validation, and an adoption roadmap including assessment, pilots, and executive buy-in. Listeners will gain concrete guidance on applying Data Mesh principles, designing data products and contracts, and operationalizing federated governance in their organizations." topics: - data mesh - data engineering diff --git a/_podcast/data-privacy-engineering-gdpr-machine-learning.md b/_podcast/data-privacy-engineering-gdpr-machine-learning.md index 934e9de2..b6e14d12 100644 --- a/_podcast/data-privacy-engineering-gdpr-machine-learning.md +++ b/_podcast/data-privacy-engineering-gdpr-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Data Privacy Playbook: Differential Privacy, Federated Learning, PETs & Consent UX' -short: Practical Data Privacy +title: "Data Privacy Playbook: Differential Privacy, Federated Learning, PETs & Consent UX" +short: "Practical Data Privacy" season: 14 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/137H2M9qU5lFqb4hLyMBvg?si=b0KXeubVSpa3bfsuZaS6pQ youtube: https://www.youtube.com/watch?v=gbjoFfrm4iw -description: Discover differential privacy, federated learning and PETs - privacy engineering, consent UX fixes and compliance to reduce re-identification risk -intro: 'How can teams build useful machine learning while respecting user privacy, compliance, and re-identification risk? In this episode, Katharine Jarmul — privacy activist and Principal Data Scientist at ThoughtWorks Germany — walks through a practical Data Privacy Playbook focused on differential privacy, federated learning, privacy-enhancing technologies (PETs) and consent UX.

    Katharine draws on a career from data journalism and NLP to startup work at KI Protect and enterprise ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies for pseudonymisation, encrypted ML and federated architectures. We cover consent and opt-out UX, legal vs technical definitions of privacy, profiling and fingerprinting risks, and privacy-friendly personalization like session-based intent and ephemeral inference.

    You’ll get concrete takeaways: why differential privacy matters (formal definition, use cases, Tumult and other libraries), common anonymization pitfalls (hashing, k-anonymity, Netflix lessons), how PETs fit into system design, and generative AI privacy considerations including retention and localized model deployment. Listeners leave with actionable guidance on privacy engineering, data minimization, consent design, and resources to continue learning.' +description: "Discover differential privacy, federated learning and PETs - privacy engineering, consent UX fixes and compliance to reduce re-identification risk" +intro: "How can teams build useful machine learning while respecting user privacy, compliance, and re-identification risk? In this episode, Katharine Jarmul — privacy activist and Principal Data Scientist at ThoughtWorks Germany — walks through a practical Data Privacy Playbook focused on differential privacy, federated learning, privacy-enhancing technologies (PETs) and consent UX.

    Katharine draws on a career from data journalism and NLP to startup work at KI Protect and enterprise ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies for pseudonymisation, encrypted ML and federated architectures. We cover consent and opt-out UX, legal vs technical definitions of privacy, profiling and fingerprinting risks, and privacy-friendly personalization like session-based intent and ephemeral inference.

    You’ll get concrete takeaways: why differential privacy matters (formal definition, use cases, Tumult and other libraries), common anonymization pitfalls (hashing, k-anonymity, Netflix lessons), how PETs fit into system design, and generative AI privacy considerations including retention and localized model deployment. Listeners leave with actionable guidance on privacy engineering, data minimization, consent design, and resources to continue learning." topics: - data governance - data privacy diff --git a/_podcast/data-professionals-business-skills-in-saas.md b/_podcast/data-professionals-business-skills-in-saas.md index 84717f1f..acb484df 100644 --- a/_podcast/data-professionals-business-skills-in-saas.md +++ b/_podcast/data-professionals-business-skills-in-saas.md @@ -1,6 +1,6 @@ --- -title: 'Practical Skills for Data Professionals in SaaS: Bridging the Gap between Data and Business' -short: Practical Skills for Data Professionals in SaaS +title: "Practical Skills for Data Professionals in SaaS: Bridging the Gap between Data and Business" +short: "Practical Skills for Data Professionals in SaaS" season: 12 episode: 2 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/5tw3qs1XHETDPYrxdEaVbK?si=QIclWOT_QhKhIGrcl-KQXg youtube: https://www.youtube.com/watch?v=xMYRUiTu960 -description: 'Discover practical data science for SaaS: deploy ML, build marketing automation, define metrics and reduce churn—stakeholder tactics, tooling, and storytelling insights.' -intro: 'How do you move data science from experiments to measurable impact in a SaaS business? In this episode, Loris Marini — CEO and founder of Discovering Data and host of the Discovering Data podcast — walks through practical approaches to deploying models, building marketing automation, and turning metrics into persuasive stories.

    Loris covers production challenges for model deployment in SaaS, a marketing automation use case (recommendations and reporting), and how applied research like reinforcement learning maps to real problems. We dig into semantic alignment — defining "customer" and core metrics — plus lead indicators, stickiness, churn, and causal thinking for product metrics. Loris also shares tactics for onboarding stakeholders: stakeholder mapping, CRM-style context capture, meeting immersion, and Notion-based note systems. He emphasizes pragmatic tools (Excel, pivots), prioritizing high-connectivity opportunities, and a conversation-first diagnostic before ML. Finally, learn data storytelling techniques, building trust through active listening and business literacy, and where to find further resources and community.

    Listen to gain concrete strategies for model deployment, marketing automation, measurement, and communicating data-driven outcomes in SaaS.' +description: "Discover practical data science for SaaS: deploy ML, build marketing automation, define metrics and reduce churn—stakeholder tactics, tooling, and storytelling insights." +topics: +- data science +- MLOps +- communication +- tools +- career transition +intro: "How do you move data science from experiments to measurable impact in a SaaS business? In this episode, Loris Marini — CEO and founder of Discovering Data and host of the Discovering Data podcast — walks through practical approaches to deploying models, building marketing automation, and turning metrics into persuasive stories.

    Loris covers production challenges for model deployment in SaaS, a marketing automation use case (recommendations and reporting), and how applied research like reinforcement learning maps to real problems. We dig into semantic alignment — defining "customer" and core metrics — plus lead indicators, stickiness, churn, and causal thinking for product metrics. Loris also shares tactics for onboarding stakeholders: stakeholder mapping, CRM-style context capture, meeting immersion, and Notion-based note systems. He emphasizes pragmatic tools (Excel, pivots), prioritizing high-connectivity opportunities, and a conversation-first diagnostic before ML. Finally, learn data storytelling techniques, building trust through active listening and business literacy, and where to find further resources and community.

    Listen to gain concrete strategies for model deployment, marketing automation, measurement, and communicating data-driven outcomes in SaaS." dateadded: 2022-12-17 duration: PT01H15S diff --git a/_podcast/data-quality-data-observability-data-reliability.md b/_podcast/data-quality-data-observability-data-reliability.md index df44bbcd..79356839 100644 --- a/_podcast/data-quality-data-observability-data-reliability.md +++ b/_podcast/data-quality-data-observability-data-reliability.md @@ -1,6 +1,6 @@ --- -title: 'Data Observability Explained: 5 Pillars to Prevent Downtime, Drift & False Positives' -short: 'Data Observability: The Next Frontier of Data Engineering' +title: "Data Observability Explained: 5 Pillars to Prevent Downtime, Drift & False Positives" +short: "Data Observability: The Next Frontier of Data Engineering" season: 3 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/48QcLAw2I1apC1jeo8e1sd apple: https://podcasts.apple.com/us/podcast/data-observability-barr-moses/id1541710331?i=1000518351217 -description: Discover data observability, freshness, lineage and schema detection to prevent downtime, stop model drift and cut false positives in pipelines -intro: How do you prevent data downtime, drift, and false positives before they break analytics and models? In this episode, Barr Moses, CEO and co-founder of Monte Carlo and former VP of Customer Operations at Gainsight, walks through a practical framework for data observability grounded in real-world incidents and DevOps principles.

    Barr explains why batch data needs different approaches than app monitoring and outlines the Five Pillars of Data Observability—freshness, volume, distribution, schema, and lineage. You’ll hear a schema-change case study, learn how silent failures and model drift occur, and how to move from monitoring to true observability for faster root cause analysis using correlation, logs, and lineage. The conversation covers accountability models (RACI), defining and automating data SLAs, operational runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria for end-to-end platforms versus point tools.

    Listeners will get actionable guidance on reducing false positives, prioritizing pipeline fixes, implementing auto lineage, and applying anomaly detection with contextual alerts—practical steps to improve data quality, reliability, and observability across cloud-agnostic environments +description: "Discover data observability, freshness, lineage and schema detection to prevent downtime, stop model drift and cut false positives in pipelines" +intro: "How do you prevent data downtime, drift, and false positives before they break analytics and models? In this episode, Barr Moses, CEO and co-founder of Monte Carlo and former VP of Customer Operations at Gainsight, walks through a practical framework for data observability grounded in real-world incidents and DevOps principles.

    Barr explains why batch data needs different approaches than app monitoring and outlines the Five Pillars of Data Observability—freshness, volume, distribution, schema, and lineage. You’ll hear a schema-change case study, learn how silent failures and model drift occur, and how to move from monitoring to true observability for faster root cause analysis using correlation, logs, and lineage. The conversation covers accountability models (RACI), defining and automating data SLAs, operational runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria for end-to-end platforms versus point tools.

    Listeners will get actionable guidance on reducing false positives, prioritizing pipeline fixes, implementing auto lineage, and applying anomaly detection with contextual alerts—practical steps to improve data quality, reliability, and observability across cloud-agnostic environments" topics: - MLOps - data observability diff --git a/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md index 47d4b8b0..515014bb 100644 --- a/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md +++ b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md @@ -1,6 +1,6 @@ --- -title: 'Analytics for Nonprofits: Build Data Maturity, Teams, Tools & Optimization Strategies' -short: Analytics for a Better World +title: "Analytics for Nonprofits: Build Data Maturity, Teams, Tools & Optimization Strategies" +short: "Analytics for a Better World" season: 13 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5Xiuu4jMBCMuwkokXbwhE2?si=nGRQrMUaRNa5EINbtJadBA youtube: https://www.youtube.com/watch?v=b6x5zZ3C6sQ -description: Learn nonprofit analytics and data maturity strategies to build teams, choose tools and optimize programs-practical roadmaps, case studies, and open resources -intro: How can nonprofits move from basic reporting to optimization using analytics while building the right teams, tools, and governance? In this episode, Parvathy Krishnan, CTO at Analytics for a Better World and professional doctorate in data science, walks through practical steps for building data maturity in the social sector. Drawing on discovery workshops, fellowship pilots (including a waste-collection optimization project in Nairobi), and partnerships with academic and industry groups, Parvathy explains how to assess needs, design maturity roadmaps, and prioritize short- and long-term goals.

    Listen to learn how to structure nonprofit data teams (analysts, data scientists, engineers, and blended roles), select technology (KoboToolbox, PostgreSQL, dashboards, Python/R, cloud deployment), and implement process and governance practices including privacy, SOPs, and version control. The episode also covers curriculum progression—from descriptive to diagnostic, predictive, and optimization—academy programs for practitioners and executives, open resources on YouTube and GitHub, and real-world optimization use cases like healthcare access and COVID testing lab placement. Ideal for nonprofit leaders, data practitioners, and funders seeking actionable guidance on analytics for nonprofits, data maturity, and optimization strategies +description: "Learn nonprofit analytics and data maturity strategies to build teams, choose tools and optimize programs-practical roadmaps, case studies, and open resources" +intro: "How can nonprofits move from basic reporting to optimization using analytics while building the right teams, tools, and governance? In this episode, Parvathy Krishnan, CTO at Analytics for a Better World and professional doctorate in data science, walks through practical steps for building data maturity in the social sector. Drawing on discovery workshops, fellowship pilots (including a waste-collection optimization project in Nairobi), and partnerships with academic and industry groups, Parvathy explains how to assess needs, design maturity roadmaps, and prioritize short- and long-term goals.

    Listen to learn how to structure nonprofit data teams (analysts, data scientists, engineers, and blended roles), select technology (KoboToolbox, PostgreSQL, dashboards, Python/R, cloud deployment), and implement process and governance practices including privacy, SOPs, and version control. The episode also covers curriculum progression—from descriptive to diagnostic, predictive, and optimization—academy programs for practitioners and executives, open resources on YouTube and GitHub, and real-world optimization use cases like healthcare access and COVID testing lab placement. Ideal for nonprofit leaders, data practitioners, and funders seeking actionable guidance on analytics for nonprofits, data maturity, and optimization strategies" topics: - nonprofit - data maturity diff --git a/_podcast/data-science-career-abc-framework.md b/_podcast/data-science-career-abc-framework.md index 2847dc66..26b269de 100644 --- a/_podcast/data-science-career-abc-framework.md +++ b/_podcast/data-science-career-abc-framework.md @@ -1,6 +1,6 @@ --- -title: 'Data Science Career Guide: ABC Framework (Analyst, Builder, Consultant) & Transition Tips' -short: The ABC’s of Data Science +title: "Data Science Career Guide: ABC Framework (Analyst, Builder, Consultant) & Transition Tips" +short: "The ABC’s of Data Science" season: 2 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5T1Nm3HvrS9oIMH6C2AWcf apple: https://podcasts.apple.com/us/podcast/the-abcs-of-data-science-danny-ma/id1541710331?i=1000510794953 -description: 'Master the Data Science ABC Framework: Analyst, Builder, Consultant. Get SQL, Python, MLOps career tips, project roadmap, transition strategies to land roles.' -intro: 'How do you pick the right data science path—and actually make the transition? In this episode, Danny Ma, a recovering data scientist now focused on ML and data engineering, walks through his ABC Framework (Analyst, Builder, Consultant) and pragmatic steps for career moves. Danny, who runs the #DataWithDanny community (4,500+ members) and specializes in analytics, supervised ML, data architecture and digital customer experiments, traces his own shift from SQL/SAS/Excel workflows to Python, Kaggle projects and production systems.

    We cover the ABC Framework origins and definitions: Type A (Analyst) — data exploration, visualization and storytelling; Type B (Builder) — ML engineering, MLOps and production mindset; Type C (Consultant/Leader) — stakeholder persuasion and strategy. Danny shares transition tactics: build projects first, learn theory as needed, core tools (Git, Docker, cloud), practicing engineering via mini-projects and mentorship, portfolio and referral strategies, and when advanced degrees matter. Tune in to get concrete guidance on skills to prioritize, how to gain production experience, and a clear roadmap from SQL → visualization → ML → deep learning to advance your data science career.' +description: "Master the Data Science ABC Framework: Analyst, Builder, Consultant. Get SQL, Python, MLOps career tips, project roadmap, transition strategies to land roles." +intro: "How do you pick the right data science path—and actually make the transition? In this episode, Danny Ma, a recovering data scientist now focused on ML and data engineering, walks through his ABC Framework (Analyst, Builder, Consultant) and pragmatic steps for career moves. Danny, who runs the #DataWithDanny community (4,500+ members) and specializes in analytics, supervised ML, data architecture and digital customer experiments, traces his own shift from SQL/SAS/Excel workflows to Python, Kaggle projects and production systems.

    We cover the ABC Framework origins and definitions: Type A (Analyst) — data exploration, visualization and storytelling; Type B (Builder) — ML engineering, MLOps and production mindset; Type C (Consultant/Leader) — stakeholder persuasion and strategy. Danny shares transition tactics: build projects first, learn theory as needed, core tools (Git, Docker, cloud), practicing engineering via mini-projects and mentorship, portfolio and referral strategies, and when advanced degrees matter. Tune in to get concrete guidance on skills to prioritize, how to gain production experience, and a clear roadmap from SQL → visualization → ML → deep learning to advance your data science career." topics: - career transition - data science diff --git a/_podcast/data-science-failures-and-mlops-lessons.md b/_podcast/data-science-failures-and-mlops-lessons.md index 04c6447a..b3f36837 100644 --- a/_podcast/data-science-failures-and-mlops-lessons.md +++ b/_podcast/data-science-failures-and-mlops-lessons.md @@ -1,6 +1,6 @@ --- -title: 'Turn Data Science Project Failures into Career Wins: Production Lessons, MLOps Fixes & Framing Failures on LinkedIn' -short: What Data Scientists Don’t Mention in Their LinkedIn Profiles +title: "Turn Data Science Project Failures into Career Wins: Production Lessons, MLOps Fixes & Framing Failures on LinkedIn" +short: "What Data Scientists Don’t Mention in Their LinkedIn Profiles" season: 3 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3KR6zErxqeDuQ2jo8NDvNx apple: https://podcasts.apple.com/us/podcast/what-data-scientists-dont-mention-in-their-linkedin/id1541710331?i=1000524260842 -description: 'Discover how to turn data science project failures into career wins: practical MLOps fixes, production lessons, LinkedIn framing tips to boost hiring outcomes.' -intro: 'How do you turn data science project failures into tangible career wins — and how should you talk about them on LinkedIn? In this episode, Yury Kashnitsky, Ph.D. in applied math, Kaggle Master and Senior ML Scientist at Elsevier who also leads the open course mlcourse.ai, walks through real production ML lessons and MLOps fixes learned across academia, startups and industry.

    We dig into common data science pitfalls and a concrete case study (a BERT-based proofreading regression stopped early), stakeholder communication for when to kill a project, and the missing role of a data product manager. Yury breaks down engineering vs research trade-offs in deployment, production fixes like reducing re-ranking scope to meet latency, when gradient boosting beats CTR heuristics, and DevOps anti-patterns such as SSH deploys and no CI/CD. We also cover practical topics: data labeling cost/quality, going from notebooks to production, multilingual telco NLP, resume choices, interview questions about revenue-producing ML, and how to frame failed projects on LinkedIn with honesty and lessons learned.

    Listen to get actionable MLOps and production-ML strategies, communication tactics for stakeholders, and guidance on reframing failures into career momentum.' +description: "Discover how to turn data science project failures into career wins: practical MLOps fixes, production lessons, LinkedIn framing tips to boost hiring outcomes." +intro: "How do you turn data science project failures into tangible career wins — and how should you talk about them on LinkedIn? In this episode, Yury Kashnitsky, Ph.D. in applied math, Kaggle Master and Senior ML Scientist at Elsevier who also leads the open course mlcourse.ai, walks through real production ML lessons and MLOps fixes learned across academia, startups and industry.

    We dig into common data science pitfalls and a concrete case study (a BERT-based proofreading regression stopped early), stakeholder communication for when to kill a project, and the missing role of a data product manager. Yury breaks down engineering vs research trade-offs in deployment, production fixes like reducing re-ranking scope to meet latency, when gradient boosting beats CTR heuristics, and DevOps anti-patterns such as SSH deploys and no CI/CD. We also cover practical topics: data labeling cost/quality, going from notebooks to production, multilingual telco NLP, resume choices, interview questions about revenue-producing ML, and how to frame failed projects on LinkedIn with honesty and lessons learned.

    Listen to get actionable MLOps and production-ML strategies, communication tactics for stakeholders, and guidance on reframing failures into career momentum." topics: - machine learning - MLOps diff --git a/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md index ee90c2da..8ba849d4 100644 --- a/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md +++ b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md @@ -1,6 +1,6 @@ --- -title: Data Science for Public Policy — Ethical AI, Climate Justice & Impact Projects -short: Data Science for Social Impact +title: "Data Science for Public Policy — Ethical AI, Climate Justice & Impact Projects" +short: "Data Science for Social Impact" season: 10 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7fzBhDrfVfylnBLCJGwUHC?si=3b03d59083804346 youtube: https://www.youtube.com/watch?v=xWC1HAfekRk -description: 'Learn data science for public policy: ethical AI, climate justice & impact project strategies, career tips and actionable project design for social good.' -intro: How can data science meaningfully shape public policy without becoming a tech-first solution or creating new ethical harms? In this episode, Christine Cepelak, a writer and researcher of tech and social issues who’s studying Data Science for Public Policy and has years of experience managing social programs, walks through the practical realities of data science for public policy. We cover career paths and sector differences, a community organizing case study on electronics recycling, and real-world use cases like drone computer vision for refugee aid and rooftop sustainability. Christine digs into ethical AI concerns — including the EU AI Act and social scoring risks — plus project design for long-term impact, stakeholder collaboration with NGOs, and building data pipelines amid limited IT infrastructure. Listeners will also hear about public data gaps (recycling programs, corporate transparency), research applications such as satellite imagery for poverty estimation, and future priorities like climate justice and gender equality. Tune in to get concrete guidance on starting volunteer impact projects, where demand for impact data scientists lies, and how to design responsible, policy-driven data work +description: "Learn data science for public policy: ethical AI, climate justice & impact project strategies, career tips and actionable project design for social good." +intro: "How can data science meaningfully shape public policy without becoming a tech-first solution or creating new ethical harms? In this episode, Christine Cepelak, a writer and researcher of tech and social issues who’s studying Data Science for Public Policy and has years of experience managing social programs, walks through the practical realities of data science for public policy. We cover career paths and sector differences, a community organizing case study on electronics recycling, and real-world use cases like drone computer vision for refugee aid and rooftop sustainability. Christine digs into ethical AI concerns — including the EU AI Act and social scoring risks — plus project design for long-term impact, stakeholder collaboration with NGOs, and building data pipelines amid limited IT infrastructure. Listeners will also hear about public data gaps (recycling programs, corporate transparency), research applications such as satellite imagery for poverty estimation, and future priorities like climate justice and gender equality. Tune in to get concrete guidance on starting volunteer impact projects, where demand for impact data scientists lies, and how to design responsible, policy-driven data work" topics: - data science - public policy diff --git a/_podcast/data-science-interview-and-cv-guide.md b/_podcast/data-science-interview-and-cv-guide.md index 02e171bc..1d6167d1 100644 --- a/_podcast/data-science-interview-and-cv-guide.md +++ b/_podcast/data-science-interview-and-cv-guide.md @@ -1,6 +1,6 @@ --- -title: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation' -short: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation' +title: "Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation" +short: "Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation" season: 3 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/406wN6xDkYPyLS8i9fUJL5 apple: https://podcasts.apple.com/us/podcast/what-i-learned-after-interviewing-300-data-scientists/id1541710331?i=1000520681105 -description: Master CV optimization, take-home projects and mock interviews to land data science offers—learn SQL/ML prep, negotiation tactics and measurable project impact -intro: How do you make your data science application stand out, ace take-home projects, and negotiate an offer without leaving money on the table? In this episode, Oleg Novikov — creator of NextRound and former data science manager at Uber with a background in data and software engineering — walks through a practical data science interview guide covering CV optimization, take-home projects, mock interviews, and negotiation.

    We dig into career trajectory from engineering to product data science, building projects that differentiate your application, and concrete product work like forecasting and LTV. Oleg demonstrates NextRound's mock-interview chatbot and personalized feedback, explains common hiring funnels (recruiter screen → take-home → interviews), and contrasts product data scientist vs. machine learning engineer expectations. You'll hear specific advice on treating your CV as a landing page, highlighting personal contributions, crafting case-study narratives from business goals to evaluation metrics, and preparing for technical assessments (ML fundamentals, SQL window functions, coding). We also cover handling rejection, replying graciously, evaluating offers, negotiation tactics when your current salary is low, and practical steps for PhDs breaking into industry.

    Listen for actionable steps to refine your data science resume, prioritize take-home ROI, and use mock interviews to iterate faster +description: "Master CV optimization, take-home projects and mock interviews to land data science offers—learn SQL/ML prep, negotiation tactics and measurable project impact" +intro: "How do you make your data science application stand out, ace take-home projects, and negotiate an offer without leaving money on the table? In this episode, Oleg Novikov — creator of NextRound and former data science manager at Uber with a background in data and software engineering — walks through a practical data science interview guide covering CV optimization, take-home projects, mock interviews, and negotiation.

    We dig into career trajectory from engineering to product data science, building projects that differentiate your application, and concrete product work like forecasting and LTV. Oleg demonstrates NextRound's mock-interview chatbot and personalized feedback, explains common hiring funnels (recruiter screen → take-home → interviews), and contrasts product data scientist vs. machine learning engineer expectations. You'll hear specific advice on treating your CV as a landing page, highlighting personal contributions, crafting case-study narratives from business goals to evaluation metrics, and preparing for technical assessments (ML fundamentals, SQL window functions, coding). We also cover handling rejection, replying graciously, evaluating offers, negotiation tactics when your current salary is low, and practical steps for PhDs breaking into industry.

    Listen for actionable steps to refine your data science resume, prioritize take-home ROI, and use mock interviews to iterate faster" topics: - data science - software engineering diff --git a/_podcast/data-science-job-red-flags-and-mismatched-roles.md b/_podcast/data-science-job-red-flags-and-mismatched-roles.md index 388c3347..047ef030 100644 --- a/_podcast/data-science-job-red-flags-and-mismatched-roles.md +++ b/_podcast/data-science-job-red-flags-and-mismatched-roles.md @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4v6h48B0c0Je8xLMo5zMs5?si=hcMUqpPPQYm2vrdi2py1UQ youtube: https://www.youtube.com/watch?v=bqxBiIwtmX4 -description: Discover how to spot misleading job titles, hiring red flags and build stronger data teams-assess tech stacks, interview rigor, salary ranges and career fit -intro: 'How can you tell if a "data scientist" job is really a data engineering role — or a mismatched hire waiting to happen? In this episode, Tereza Iofciu, PhD and seasoned data practitioner, walks through practical ways to spot misleading data job titles, hiring red flags, and how to build clearer, healthier data teams. Tereza brings experience across data science manager, data scientist, data engineer and product manager roles, plus teaching and community leadership (neuefische, PyLadies Hamburg, PSF community award), grounding her advice in real hiring and team-building work.

    We cover why companies rename roles, examples from Scala, Elasticsearch, ETL and Airflow stacks, and the costs of vague job descriptions. You’ll get a role-clarity checklist (team structure, objectives, responsibilities vs. tech lists), signals of data maturity, interview pitfalls (time-consuming take-home tasks, syntax-focused tests), red flags in descriptions (long tech lists, “rockstar” language), and tactics for researching employers (LinkedIn, team pages, conference talks). Also discussed: salary transparency, remote-work fit, retention and career ladders.

    Listen to learn concrete signals and questions to evaluate job descriptions, interviews, and shape better data hiring and team design.' +description: "Discover how to spot misleading job titles, hiring red flags and build stronger data teams-assess tech stacks, interview rigor, salary ranges and career fit" +intro: "How can you tell if a "data scientist" job is really a data engineering role — or a mismatched hire waiting to happen? In this episode, Tereza Iofciu, PhD and seasoned data practitioner, walks through practical ways to spot misleading data job titles, hiring red flags, and how to build clearer, healthier data teams. Tereza brings experience across data science manager, data scientist, data engineer and product manager roles, plus teaching and community leadership (neuefische, PyLadies Hamburg, PSF community award), grounding her advice in real hiring and team-building work.

    We cover why companies rename roles, examples from Scala, Elasticsearch, ETL and Airflow stacks, and the costs of vague job descriptions. You’ll get a role-clarity checklist (team structure, objectives, responsibilities vs. tech lists), signals of data maturity, interview pitfalls (time-consuming take-home tasks, syntax-focused tests), red flags in descriptions (long tech lists, “rockstar” language), and tactics for researching employers (LinkedIn, team pages, conference talks). Also discussed: salary transparency, remote-work fit, retention and career ladders.

    Listen to learn concrete signals and questions to evaluate job descriptions, interviews, and shape better data hiring and team design." topics: - data science - data engineering diff --git a/_podcast/data-science-leadership-hiring-mlops.md b/_podcast/data-science-leadership-hiring-mlops.md index ad7cc2c3..a9497be3 100644 --- a/_podcast/data-science-leadership-hiring-mlops.md +++ b/_podcast/data-science-leadership-hiring-mlops.md @@ -1,6 +1,6 @@ --- -title: 'Data Science Leadership: Product-First ML, Recommenders & RTB, MLOps, Hiring & Mentoring' -short: Becoming a Data Science Manager +title: "Data Science Leadership: Product-First ML, Recommenders & RTB, MLOps, Hiring & Mentoring" +short: "Becoming a Data Science Manager" season: 6 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/28Sy4owRwvSJRFTeKAamz2 apple: https://podcasts.apple.com/us/podcast/becoming-a-data-science-manager-mariano-semelman/id1541710331?i=1000547222296 -description: Discover data science leadership, recommender systems & MLOps tactics—hire, mentor and deploy models faster with practical frameworks and tips -intro: How do you lead a data science team that prioritizes product impact while building recommender systems, real-time bidding (RTB) solutions, and maintainable MLOps? In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 years of experience, walks through practical leadership decisions that bridge models and products.

    Mariano describes his shift from software development to data science leadership, daily responsibilities (meetings, mentoring, planning), and how he structures teams of data scientists and ML engineers. Key topics include product-first ML, search and recommender systems, advertising and RTB campaign optimization, CRISP-DM in production, diagnosing overfitting and feature issues, and pragmatic deployment patterns like start simple, fail fast, and iterative experiments. He also shares onboarding tactics (30-60-90 plans), feedback techniques ("ask permission, care, offer options"), one-on-ones, handling departures, code reviews as a manager, delegation through senior engineers, and hiring/remediation practices.

    Listen to learn concrete approaches for prioritizing modeling time, running experiments in production, improving MLOps and NLP practices, and mentoring engineers to deliver measurable product outcomes +description: "Discover data science leadership, recommender systems & MLOps tactics—hire, mentor and deploy models faster with practical frameworks and tips" +intro: "How do you lead a data science team that prioritizes product impact while building recommender systems, real-time bidding (RTB) solutions, and maintainable MLOps? In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 years of experience, walks through practical leadership decisions that bridge models and products.

    Mariano describes his shift from software development to data science leadership, daily responsibilities (meetings, mentoring, planning), and how he structures teams of data scientists and ML engineers. Key topics include product-first ML, search and recommender systems, advertising and RTB campaign optimization, CRISP-DM in production, diagnosing overfitting and feature issues, and pragmatic deployment patterns like start simple, fail fast, and iterative experiments. He also shares onboarding tactics (30-60-90 plans), feedback techniques ("ask permission, care, offer options"), one-on-ones, handling departures, code reviews as a manager, delegation through senior engineers, and hiring/remediation practices.

    Listen to learn concrete approaches for prioritizing modeling time, running experiments in production, improving MLOps and NLP practices, and mentoring engineers to deliver measurable product outcomes" topics: - data science - machine learning diff --git a/_podcast/data-science-management-and-agile-machine-learning.md b/_podcast/data-science-management-and-agile-machine-learning.md index b065a4c0..8d9960e6 100644 --- a/_podcast/data-science-management-and-agile-machine-learning.md +++ b/_podcast/data-science-management-and-agile-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Master Data Science Management: Agile ML, Debrief Culture, Metrics & Scale to Production' -short: The Secret Sauce of Data Science Management +title: "Master Data Science Management: Agile ML, Debrief Culture, Metrics & Scale to Production" +short: "The Secret Sauce of Data Science Management" season: 13 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4kzcUCVPVN1Opq7XI1Dibd?si=f7GlEOs-TFiC9dxTJlXVyw youtube: https://www.youtube.com/watch?v=gcxP0qRO-MY -description: 'Master data science management: learn Agile ML, debrief culture, metrics and POC-to-production strategies to scale teams, boost impact and ship reliable models.' -intro: How do you run data science teams so experiments become reliable, measurable products? In this episode, Shir Meir Lador, a data science group manager at Intuit who builds machine and deep learning models for document intelligence in TurboTax and QuickBooks, walks through practical approaches to data science management and agile ML.

    We explore the origins of debrief culture from military pilot training and how pre/post debriefs drive continuous improvement; concrete practices for agile ML including two-week sprints, exploration sprints, design stories and grooming; and how to scope work, handle AI project uncertainty, and use rapid experimentation to mitigate data risks. Shir also digs into metrics for production ML—business impact, A/B testing, customer-focused KPIs—and people metrics like pulse surveys, manager score and skip-level feedback. You’ll hear about leadership pillars (vision, driving results, culture), team development, goal alignment, cross-functional product partnerships, and tactics for fostering innovation (hackathons, paper clubs).

    Listen for actionable guidance on measuring success, scaling ML to production, and building the managerial skills to lead high-performance data science teams. This episode is for managers and technical leads focused on production ML, machine learning operations, and team-driven impact +description: "Master data science management: learn Agile ML, debrief culture, metrics and POC-to-production strategies to scale teams, boost impact and ship reliable models." +intro: "How do you run data science teams so experiments become reliable, measurable products? In this episode, Shir Meir Lador, a data science group manager at Intuit who builds machine and deep learning models for document intelligence in TurboTax and QuickBooks, walks through practical approaches to data science management and agile ML.

    We explore the origins of debrief culture from military pilot training and how pre/post debriefs drive continuous improvement; concrete practices for agile ML including two-week sprints, exploration sprints, design stories and grooming; and how to scope work, handle AI project uncertainty, and use rapid experimentation to mitigate data risks. Shir also digs into metrics for production ML—business impact, A/B testing, customer-focused KPIs—and people metrics like pulse surveys, manager score and skip-level feedback. You’ll hear about leadership pillars (vision, driving results, culture), team development, goal alignment, cross-functional product partnerships, and tactics for fostering innovation (hackathons, paper clubs).

    Listen for actionable guidance on measuring success, scaling ML to production, and building the managerial skills to lead high-performance data science teams. This episode is for managers and technical leads focused on production ML, machine learning operations, and team-driven impact" topics: - management - machine learning diff --git a/_podcast/data-science-manager-vs-expert-hiring-guide.md b/_podcast/data-science-manager-vs-expert-hiring-guide.md index be5180ed..2a4e565d 100644 --- a/_podcast/data-science-manager-vs-expert-hiring-guide.md +++ b/_podcast/data-science-manager-vs-expert-hiring-guide.md @@ -1,6 +1,6 @@ --- -title: 'Data Science Manager vs Expert: Hiring Strategy, Skills, Team Building & When to Use ML' -short: Data Science Manager vs Data Science Expert +title: "Data Science Manager vs Expert: Hiring Strategy, Skills, Team Building & When to Use ML" +short: "Data Science Manager vs Data Science Expert" season: 6 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5Ug8YA3hKY9Kr5hVFDqZ77 apple: https://podcasts.apple.com/us/podcast/data-science-manager-vs-data-science-expert-barbara/id1541710331?i=1000542496818 -description: Learn hiring strategies for Data Science Manager vs Data Science Expert—when to hire experts, build teams, assess ML needs, and boost business impact -intro: 'When should you hire a data science manager versus a deep technical expert, and how do you decide whether machine learning is actually the right solution? In this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, and manager by passion — walks through her career from GIS → SQL → BI to leading teams, and tackles hiring strategy, role design, and practical ML use cases like mental health monitoring and demand forecasting.

    We cover common pitfalls (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager skill balance between technical literacy and soft skills, and what “hands-on” really means for managers: high-level understanding, code review, and time allocation. Learn when to hire a data science expert for complex models or domain knowledge, and when a manager-plus-generalist approach or a startup “unicorn” makes sense. Barbara also discusses team building (learning plans, pairing), project prioritization, model monitoring, feasibility checks (data quality and baselines), and measuring impact with KPIs and client discovery.

    Listen to gain practical hiring criteria, role profiles, and decision frameworks for when to use machine learning and how to build teams that deliver.' +description: "Learn hiring strategies for Data Science Manager vs Data Science Expert—when to hire experts, build teams, assess ML needs, and boost business impact" +intro: "When should you hire a data science manager versus a deep technical expert, and how do you decide whether machine learning is actually the right solution? In this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, and manager by passion — walks through her career from GIS → SQL → BI to leading teams, and tackles hiring strategy, role design, and practical ML use cases like mental health monitoring and demand forecasting.

    We cover common pitfalls (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager skill balance between technical literacy and soft skills, and what “hands-on” really means for managers: high-level understanding, code review, and time allocation. Learn when to hire a data science expert for complex models or domain knowledge, and when a manager-plus-generalist approach or a startup “unicorn” makes sense. Barbara also discusses team building (learning plans, pairing), project prioritization, model monitoring, feasibility checks (data quality and baselines), and measuring impact with KPIs and client discovery.

    Listen to gain practical hiring criteria, role profiles, and decision frameworks for when to use machine learning and how to build teams that deliver." topics: - data science - machine learning diff --git a/_podcast/data-science-team-structure-and-org-design.md b/_podcast/data-science-team-structure-and-org-design.md index ff85cf8e..b9c32fbd 100644 --- a/_podcast/data-science-team-structure-and-org-design.md +++ b/_podcast/data-science-team-structure-and-org-design.md @@ -1,6 +1,6 @@ --- -title: 'Designing High-Impact Data Science Teams: Centralized vs Embedded Models, Experimentation & Staffing' -short: Designing a Data Science Team +title: "Designing High-Impact Data Science Teams: Centralized vs Embedded Models, Experimentation & Staffing" +short: "Designing a Data Science Team" season: 9 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/62ZzHBEuOLbm6ft0u9dlh7?si=182bea5ac49243af youtube: https://www.youtube.com/watch?v=F_rJ4fg5ZEA -description: 'Discover how to design high-impact data science orgs: centralized vs embedded models, staffing ratios and experimentation to speed decisions and scale impact.' -intro: 'How should you structure a data science organization to maximize product impact: centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director of Data Science at Twitter who leads 70 data scientists and previously led Azure Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation patterns for designing high-impact data science orgs.

    We cover centralized vs embedded models and what “embedding” really means for reporting lines and day-to-day integration with feature teams; Twitter’s hybrid per-division approach for product and ads; staffing guidance (including an engineers-to-data-scientist ratio reference); and rhythms for cross-functional planning, OKRs, and dependency management. Lisa also discusses experimentation and experiment review, defining success metrics and ship criteria, knowledge sharing practices, differences between analytics and ML-heavy data science, and how to partner with product, engineering, design, and research.

    Listen to gain actionable guidance on choosing an org model, setting staffing expectations, establishing experiment and metrics practices, and aligning data pipelines, data quality, and OKRs to drive data-driven product decisions.' +description: "Discover how to design high-impact data science orgs: centralized vs embedded models, staffing ratios and experimentation to speed decisions and scale impact." +intro: "How should you structure a data science organization to maximize product impact: centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director of Data Science at Twitter who leads 70 data scientists and previously led Azure Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation patterns for designing high-impact data science orgs.

    We cover centralized vs embedded models and what “embedding” really means for reporting lines and day-to-day integration with feature teams; Twitter’s hybrid per-division approach for product and ads; staffing guidance (including an engineers-to-data-scientist ratio reference); and rhythms for cross-functional planning, OKRs, and dependency management. Lisa also discusses experimentation and experiment review, defining success metrics and ship criteria, knowledge sharing practices, differences between analytics and ML-heavy data science, and how to partner with product, engineering, design, and research.

    Listen to gain actionable guidance on choosing an org model, setting staffing expectations, establishing experiment and metrics practices, and aligning data pipelines, data quality, and OKRs to drive data-driven product decisions." topics: - data science - data teams diff --git a/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md index 9615cedf..d981ffc5 100644 --- a/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md +++ b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md @@ -1,6 +1,6 @@ --- -title: 'Indie Hacking and Bootstrapping Side Projects for Data Scientists: Build, Launch & Monetize Indie Hacker Products' -short: Indie Hacking and Bootstrapping Side Projects for Data Scientists +title: "Indie Hacking and Bootstrapping Side Projects for Data Scientists: Build, Launch & Monetize Indie Hacker Products" +short: "Indie Hacking and Bootstrapping Side Projects for Data Scientists" season: 12 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2DlD756csrDFAxfuTjSKwY?si=_H2G3bJtQIuJMAe8daEIYg youtube: https://www.youtube.com/watch?v=KsV_SVXlTo8 -description: 'Build indie-hacking products: launch crypto alerts & generative AI apps, validate ideas, choose tech, price effectively and monetize for sustainable growth.' -intro: 'How do you build, launch, and actually monetize indie-hacker products in crypto alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux — an IBM data science manager and consultant with eight years’ experience who also ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired selfie-to-art tool) — walks through the practical steps.

    We cover Pauline’s career path and an ML production case study (money-laundering detection), then move into indie-hacking essentials: bootstrapping, splitting time between a full-time role and side projects, and validating ideas. You’ll hear how she productized projects (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API fine-tuning vs self-hosted GPUs), managed operating costs, and launched via Twitter and niche listings. The conversation also tackles customer acquisition, pricing constraints, marketing and content strategy, and skills gained across GCP, data engineering, web dev, and growth.

    Listen for actionable guidance on product launch, monetization, and time management for indie hackers working on crypto alerts and generative AI—concrete steps to validate, build, and grow side products without external funding.' +description: "Build indie-hacking products: launch crypto alerts & generative AI apps, validate ideas, choose tech, price effectively and monetize for sustainable growth." +intro: "How do you build, launch, and actually monetize indie-hacker products in crypto alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux — an IBM data science manager and consultant with eight years’ experience who also ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired selfie-to-art tool) — walks through the practical steps.

    We cover Pauline’s career path and an ML production case study (money-laundering detection), then move into indie-hacking essentials: bootstrapping, splitting time between a full-time role and side projects, and validating ideas. You’ll hear how she productized projects (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API fine-tuning vs self-hosted GPUs), managed operating costs, and launched via Twitter and niche listings. The conversation also tackles customer acquisition, pricing constraints, marketing and content strategy, and skills gained across GCP, data engineering, web dev, and growth.

    Listen for actionable guidance on product launch, monetization, and time management for indie hackers working on crypto alerts and generative AI—concrete steps to validate, build, and grow side products without external funding." topics: - indie hacking - bootstrapping diff --git a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md index 5388e59f..84b8f2e7 100644 --- a/_podcast/data-strategy-and-dataops-for-ai-powered-products.md +++ b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md @@ -1,6 +1,6 @@ --- -title: 'Actionable Data Strategy & DataOps for AI-Powered Products: Pitch, Measure, Use GPT' -short: 'Data Strategy: Key Principles and Best Practices' +title: "Actionable Data Strategy & DataOps for AI-Powered Products: Pitch, Measure, Use GPT" +short: "Data Strategy: Key Principles and Best Practices" season: 14 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7tITQ4nLypogRLUjjK75mx?si=722BlhoLSGuxZlE9ia7VhA youtube: https://www.youtube.com/watch?v=jGbfeYdlCiQ -description: 'Master actionable data strategy, DataOps & GPT: learn to pitch small AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products.' -intro: How do you turn AI ambitions into measurable, deliverable data products? In this episode Boyan Angelov — author of Elements of Data Strategy and leader of data strategy at Exxeta AG — walks through practical steps to make data strategy actionable for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused plan and explains the due diligence needed to align business goals with feasible use cases.

    Topics covered include use case ideation, feasibility and prioritization, managing influence cascades and scope creep, impact assessment and portfolio management, and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for data — and clarify platform, AI and BI roles and the core skills required for strategists. Boyan also shows how GPT and ChatGPT can be used as a writing co-pilot for outlines, pitches and technical guidance, and recommends starting small with budgeted use cases plus baseline and post-implementation metrics to measure success.

    Listen to get concrete guidance on pitching, measuring and operationalizing a data strategy for AI-powered products — including practical DataOps and GPT workflows you can apply right away +description: "Master actionable data strategy, DataOps & GPT: learn to pitch small AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products." +intro: "How do you turn AI ambitions into measurable, deliverable data products? In this episode Boyan Angelov — author of Elements of Data Strategy and leader of data strategy at Exxeta AG — walks through practical steps to make data strategy actionable for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused plan and explains the due diligence needed to align business goals with feasible use cases.

    Topics covered include use case ideation, feasibility and prioritization, managing influence cascades and scope creep, impact assessment and portfolio management, and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for data — and clarify platform, AI and BI roles and the core skills required for strategists. Boyan also shows how GPT and ChatGPT can be used as a writing co-pilot for outlines, pitches and technical guidance, and recommends starting small with budgeted use cases plus baseline and post-implementation metrics to measure success.

    Listen to get concrete guidance on pitching, measuring and operationalizing a data strategy for AI-powered products — including practical DataOps and GPT workflows you can apply right away" topics: - data strategy - dataops diff --git a/_podcast/data-team-roles.md b/_podcast/data-team-roles.md index 1562a5c5..c131ace3 100644 --- a/_podcast/data-team-roles.md +++ b/_podcast/data-team-roles.md @@ -1,6 +1,6 @@ --- -title: 'Data Team Roles Explained: Skills, Responsibilities, and How Teams Ship ML Products' -short: Roles in a Data Team +title: "Data Team Roles Explained: Skills, Responsibilities, and How Teams Ship ML Products" +short: "Roles in a Data Team" season: 1 episode: 1 guests: diff --git a/_podcast/data-translator-role-and-data-strategy.md b/_podcast/data-translator-role-and-data-strategy.md index 5639b204..8e4e4deb 100644 --- a/_podcast/data-translator-role-and-data-strategy.md +++ b/_podcast/data-translator-role-and-data-strategy.md @@ -1,6 +1,6 @@ --- -title: 'Data Strategist Guide: Effective Communication to Bridge Data Teams & Management for Data-Driven Growth' -short: 'Data Strategist Guide to Driving Growth: Prototypes, MVPs & Building Data Trust' +title: "Data Strategist Guide: Effective Communication to Bridge Data Teams & Management for Data-Driven Growth" +short: "Data Strategist Guide to Driving Growth: Prototypes, MVPs & Building Data Trust" season: 3 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4RF592cRWxHgcXbx6pV0Ja apple: https://podcasts.apple.com/us/podcast/effective-communication-business-for-data-professionals/id1541710331?i=1000519463715 -description: Discover how a data translator bridges management and tech to drive data-driven growth—practical data strategy, forecasts, prototypes, and team alignment -intro: 'How do you bridge the gap between data teams and management so analytics actually drives growth? In this episode, Lior Barak — author of "Data is Like a Plate of Hummus," co-host of WHAT the Data?! and founder of Tale About Data with 12+ years building data teams — lays out the role of a data translator: a product-minded strategist who converts technical outputs into business-aligned action.

    We explore practical tactics for building data trust (proactive alerts, QA dashboards, and confidence intervals for forecasts), embedding with business teams to learn workflows, and using data-led growth to improve recruitment, marketing, and operations. Lior walks through ways to overcome resistance — hackathons and side projects — and advocates lean delivery: MVPs, prototype-first development, clear handover strategies, and scaling with OKRs. He also covers how to explain effort to non-technical stakeholders, break silos through co-working, and use chat-driven remote collaboration effectively.

    Listen to learn concrete approaches for data strategy, data communication, and production-ready delivery that help your organization move from data chaos to measurable, data-driven growth.' +description: "Discover how a data translator bridges management and tech to drive data-driven growth—practical data strategy, forecasts, prototypes, and team alignment" +intro: "How do you bridge the gap between data teams and management so analytics actually drives growth? In this episode, Lior Barak — author of "Data is Like a Plate of Hummus," co-host of WHAT the Data?! and founder of Tale About Data with 12+ years building data teams — lays out the role of a data translator: a product-minded strategist who converts technical outputs into business-aligned action.

    We explore practical tactics for building data trust (proactive alerts, QA dashboards, and confidence intervals for forecasts), embedding with business teams to learn workflows, and using data-led growth to improve recruitment, marketing, and operations. Lior walks through ways to overcome resistance — hackathons and side projects — and advocates lean delivery: MVPs, prototype-first development, clear handover strategies, and scaling with OKRs. He also covers how to explain effort to non-technical stakeholders, break silos through co-working, and use chat-driven remote collaboration effectively.

    Listen to learn concrete approaches for data strategy, data communication, and production-ready delivery that help your organization move from data chaos to measurable, data-driven growth." topics: - data strategy - communication @@ -108,7 +108,7 @@ quotableClips: startOffset: 3050 url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3050 endOffset: 3096 -- name: 'Book Overview: Purpose of "Data is Like a Plate of Hummus"' +- name: 'Book Overview: Purpose of "Data is Like a Plate of Hummus" startOffset: 3096 url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3096 endOffset: 3200 @@ -907,7 +907,7 @@ transcript: sec: 3078 time: '51:18' who: Alexey -- header: 'Book Overview: Purpose of "Data is Like a Plate of Hummus"' +- header: 'Book Overview: Purpose of "Data is Like a Plate of Hummus" - line: Why did you call your book ”Data is Like a Plate of Hummus”? I think I am getting some ideas from our conversation. But maybe you have a short answer to that question? diff --git a/_podcast/dataops-and-gitops-best-practices-for-data-teams.md b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md index 371de265..df40aded 100644 --- a/_podcast/dataops-and-gitops-best-practices-for-data-teams.md +++ b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md @@ -1,6 +1,6 @@ --- -title: 'DataOps & GitOps for Data Teams: Onboarding, IaC, Reproducibility & Production Best Practices' -short: From Data Science to DataOps +title: "DataOps & GitOps for Data Teams: Onboarding, IaC, Reproducibility & Production Best Practices" +short: "From Data Science to DataOps" season: 11 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6jLgdl59sVCdVNJezdIqJY?si=NXasnXtFQVO0KAcCFbvUtQ youtube: https://www.youtube.com/watch?v=lem7knxqNzg -description: Master DataOps, GitOps and IaC best practices for reproducibility, onboarding and production reliability — actionable Git workflows, Terraform, Docker tips -intro: How do you make data work less fragile and easier to onboard while keeping production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner from Poznań with roots in econometrics, product analytics, data engineering and ML, walks through practical DataOps and GitOps patterns for data teams. We cover platform onboarding (requesting infra vs. merge requests), Infrastructure as Code with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis dry-run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, dependency management—and common production pitfalls like silent failures and Airflow caveats. You’ll hear about reducing onboarding friction for data scientists, the minimal operational skills every data role benefits from (Git, CLI, IAM), and platform team responsibilities for review, enablement and proactive support. If you’re focused on Infrastructure as Code, GitOps, reproducible pipelines, or practical production best practices for batch workloads and CI migrations, this episode delivers hands-on advice, learning paths and tooling choices to make your data work faster, safer and more maintainable +description: "Master DataOps, GitOps and IaC best practices for reproducibility, onboarding and production reliability — actionable Git workflows, Terraform, Docker tips" +intro: "How do you make data work less fragile and easier to onboard while keeping production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner from Poznań with roots in econometrics, product analytics, data engineering and ML, walks through practical DataOps and GitOps patterns for data teams. We cover platform onboarding (requesting infra vs. merge requests), Infrastructure as Code with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis dry-run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, dependency management—and common production pitfalls like silent failures and Airflow caveats. You’ll hear about reducing onboarding friction for data scientists, the minimal operational skills every data role benefits from (Git, CLI, IAM), and platform team responsibilities for review, enablement and proactive support. If you’re focused on Infrastructure as Code, GitOps, reproducible pipelines, or practical production best practices for batch workloads and CI migrations, this episode delivers hands-on advice, learning paths and tooling choices to make your data work faster, safer and more maintainable" topics: - DataOps - GitOps diff --git a/_podcast/dataops-automation-and-reliable-data-pipelines.md b/_podcast/dataops-automation-and-reliable-data-pipelines.md index f2bd2694..80341010 100644 --- a/_podcast/dataops-automation-and-reliable-data-pipelines.md +++ b/_podcast/dataops-automation-and-reliable-data-pipelines.md @@ -1,6 +1,6 @@ --- -title: 'Mastering DataOps: Automation, Observability & CI/CD for Reliable Data Pipelines' -short: Storytime for DataOps +title: "Mastering DataOps: Automation, Observability & CI/CD for Reliable Data Pipelines" +short: "Storytime for DataOps" season: 8 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2PcBsHslUVnjXFhC9hv6zk youtube: https://www.youtube.com/watch?v=0Fx5PCoLkf4 -description: 'Master DataOps: automate pipelines, data observability and CI/CD to cut errors, speed deployments, and deliver reliable, testable data pipelines.' -intro: 'How do you build reliable data pipelines that move fast without breaking production? In this episode, Christopher Bergh — CEO and Head Chef at DataKitchen, co-author of the DataOps Cookbook and Manifesto, and a 25+-year veteran across research, engineering, analytics, and leadership — walks through practical approaches to mastering DataOps: automation, observability, and CI/CD for dependable data delivery.

    We cover core targets like error reduction, deployment cycle time, and team productivity; the role of data observability and monitoring in catching production errors; and the trade-offs between “done” and “good.” Chris explains the shift from runbooks to automated playbooks, an automation-first mindset (“code that acts on data”), and seven practical steps for healthier pipelines—VC, tests, CI/CD, and more. He contrasts DataOps with MLOps, argues for end-to-end versioning, and discusses tooling choices including dbt, Great Expectations, and SQL tests, plus platform orchestration and governance.

    Listen to learn concrete tactics for improving data quality, shrinking incident toil, proving systems with end-to-end testing, and where to focus time and tooling to accelerate reliable analytics delivery.' +description: "Master DataOps: automate pipelines, data observability and CI/CD to cut errors, speed deployments, and deliver reliable, testable data pipelines." +intro: "How do you build reliable data pipelines that move fast without breaking production? In this episode, Christopher Bergh — CEO and Head Chef at DataKitchen, co-author of the DataOps Cookbook and Manifesto, and a 25+-year veteran across research, engineering, analytics, and leadership — walks through practical approaches to mastering DataOps: automation, observability, and CI/CD for dependable data delivery.

    We cover core targets like error reduction, deployment cycle time, and team productivity; the role of data observability and monitoring in catching production errors; and the trade-offs between “done” and “good.” Chris explains the shift from runbooks to automated playbooks, an automation-first mindset (“code that acts on data”), and seven practical steps for healthier pipelines—VC, tests, CI/CD, and more. He contrasts DataOps with MLOps, argues for end-to-end versioning, and discusses tooling choices including dbt, Great Expectations, and SQL tests, plus platform orchestration and governance.

    Listen to learn concrete tactics for improving data quality, shrinking incident toil, proving systems with end-to-end testing, and where to focus time and tooling to accelerate reliable analytics delivery." topics: - dataops - practices diff --git a/_podcast/dataops-for-data-engineering.md b/_podcast/dataops-for-data-engineering.md index 1d88d826..bb2bda4a 100644 --- a/_podcast/dataops-for-data-engineering.md +++ b/_podcast/dataops-for-data-engineering.md @@ -1,7 +1,6 @@ --- -title: 'DataOps for Data Engineering: Automation, Observability, CI/CD & Reliable - ML Deployments' -short: DataOps, Observability, and The Cure for Data Team Blues +title: "DataOps for Data Engineering: Automation, Observability, CI/CD & Reliable ML Deployments" +short: "DataOps, Observability, and The Cure for Data Team Blues" season: 18 episode: 9 guests: @@ -15,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/dataops-observability-and-the-cure-for-data-team/id1541710331?i=1000665429770 spotify: https://open.spotify.com/episode/02VoOk5UkMcvfq7VkSOegb youtube: https://www.youtube.com/watch?v=HzGpIxV8HtA -description: Master DataOps, data engineering, and CI/CD to deploy reliable ML, cut - cycle time, reduce rework, and build production-ready tests for on-call readiness. -intro: How do you transform fragile data pipelines and unreliable ML deployments into - automated, observable, production-ready systems? In this episode Christopher Bergh, - CEO of DataKitchen and co-author of the DataOps Cookbook and DataOps Manifesto, - walks through practical DataOps for data engineering—drawing on 25+ years across - research, software engineering, and analytics.

    We trace his career from - pre-cloud SQL Server scaling challenges to early DevOps lessons, then dig into what - DataOps means for teams facing burnout, deployment fear, and inconsistent processes. - Key topics include automation, observability, CI/CD pipelines, regression tests - and test data for analytics, model reliability and on-call readiness, end-to-end - deployment automation, data versioning, and the differences between containers and - serverless. The episode also clarifies MLOps and LLM buzzwords, explores day-one/day-two/day-three - operational lifecycle practices, and outlines concrete steps to reduce rework and - cycle time.

    If you’re a data engineer, data scientist, or engineering leader - looking to improve analytics delivery, this conversation offers actionable guidance - on implementing DataOps practices — automation, monitoring, CI/CD, and culture changes - — to make ML deployments more reliable and repeatable. +description: "Master DataOps, data engineering, and CI/CD to deploy reliable ML, cut cycle time, reduce rework, and build production-ready tests for on-call readiness." +topics: +- DataOps +- MLOps +- data engineering +- production +- practices +intro: "How do you transform fragile data pipelines and unreliable ML deployments into automated, observable, production-ready systems? In this episode Christopher Bergh, CEO of DataKitchen and co-author of the DataOps Cookbook and DataOps Manifesto, walks through practical DataOps for data engineering—drawing on 25+ years across research, software engineering, and analytics.

    We trace his career from pre-cloud SQL Server scaling challenges to early DevOps lessons, then dig into what DataOps means for teams facing burnout, deployment fear, and inconsistent processes. Key topics include automation, observability, CI/CD pipelines, regression tests and test data for analytics, model reliability and on-call readiness, end-to-end deployment automation, data versioning, and the differences between containers and serverless. The episode also clarifies MLOps and LLM buzzwords, explores day-one/day-two/day-three operational lifecycle practices, and outlines concrete steps to reduce rework and cycle time.

    If you’re a data engineer, data scientist, or engineering leader looking to improve analytics delivery, this conversation offers actionable guidance on implementing DataOps practices — automation, monitoring, CI/CD, and culture changes — to make ML deployments more reliable and repeatable." dateadded: 2024-09-04 duration: PT01H01M55S quotableClips: diff --git a/_podcast/dataops-principles-and-scalable-data-platforms.md b/_podcast/dataops-principles-and-scalable-data-platforms.md index 1f414e51..cd90c42b 100644 --- a/_podcast/dataops-principles-and-scalable-data-platforms.md +++ b/_podcast/dataops-principles-and-scalable-data-platforms.md @@ -1,6 +1,6 @@ --- -title: 'DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self-Service Lakehouse & Reproducibility' -short: DataOps 101 +title: "DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self-Service Lakehouse & Reproducibility" +short: "DataOps 101" season: 2 episode: 11 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5c2m4FVq4KPCfSXndCAzNd apple: https://podcasts.apple.com/us/podcast/dataops-101-lars-albertsson/id1541710331?i=1000514542438 -description: Discover DataOps strategies, immutable pipelines & a self-service lakehouse to boost reproducibility, scale data platforms, enable analysts and speed delivery -intro: How do you scale a data platform that supports self-service analytics while keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, founder of Scling and former Google, Spotify and Schibsted engineer, walks through pragmatic DataOps principles for building scalable data platforms.

    We dig into building self-service at Spotify, orchestration with Luigi, and the core platform components—storage, compute and workflow engines—plus compute choices like Spark, Flink, containers and managed services. Lars explains immutable, functional pipeline design to solve reproducibility problems, contrasts data lakes and warehouses (raw dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, CDC and database versioning strategies. He also explores batch vs streaming trade-offs, micro-batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, and risks around data mesh and decentralization.

    Listeners will come away with concrete architectural trade-offs, patterns for immutable pipelines and self-service lakehouse design, and recommended readings from the Scling list to deepen expertise in DataOps, lineage, versioning and practical data engineering +description: "Discover DataOps strategies, immutable pipelines & a self-service lakehouse to boost reproducibility, scale data platforms, enable analysts and speed delivery" +intro: "How do you scale a data platform that supports self-service analytics while keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, founder of Scling and former Google, Spotify and Schibsted engineer, walks through pragmatic DataOps principles for building scalable data platforms.

    We dig into building self-service at Spotify, orchestration with Luigi, and the core platform components—storage, compute and workflow engines—plus compute choices like Spark, Flink, containers and managed services. Lars explains immutable, functional pipeline design to solve reproducibility problems, contrasts data lakes and warehouses (raw dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, CDC and database versioning strategies. He also explores batch vs streaming trade-offs, micro-batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, and risks around data mesh and decentralization.

    Listeners will come away with concrete architectural trade-offs, patterns for immutable pipelines and self-service lakehouse design, and recommended readings from the Scling list to deepen expertise in DataOps, lineage, versioning and practical data engineering" topics: - DataOps - date engineering diff --git a/_podcast/datatalksclub-building-scaling-data-community.md b/_podcast/datatalksclub-building-scaling-data-community.md index d469ab3a..c290ba4a 100644 --- a/_podcast/datatalksclub-building-scaling-data-community.md +++ b/_podcast/datatalksclub-building-scaling-data-community.md @@ -1,6 +1,6 @@ --- -title: 'DataTalks.Club Behind the Scenes: Alexey Grigorev on Scaling and Growing the Community' -short: DataTalks.Club Behind the Scenes +title: "DataTalks.Club Behind the Scenes: Alexey Grigorev on Scaling and Growing the Community" +short: "DataTalks.Club Behind the Scenes" season: 7 episode: 1 guests: @@ -16,8 +16,8 @@ links: spotify: https://open.spotify.com/episode/3ltAxUsCE8EAf0pRb9zxDK apple: https://podcasts.apple.com/us/podcast/datatalks-club-behind-the-scenes-eugene-yan-alexey/id1541710331?i=1000548608967 -description: Discover how to scale a 9k+ data science community, automate events, and advance your machine learning career with deployment, mentorship and growth tactics. -intro: 'How do you scale a grassroots machine learning community from a few forum posts to thousands of active members? In this episode, Alexey Grigorev — founder of DataTalks.Club — sits down with Eugene Yan to walk through the real-world steps behind scaling and growing a machine learning community. Alexey shares his origins (forums, landing page, early events), the growth inflection that led to ~9k members, and practical event formats that work: Open Source Spotlight, Minis, Book of the Week, live coding and office hours.

    We cover tactical topics listeners can apply: content production and automation (planning, Zapier, Eventbrite), monetization and sponsorship models, and how project-based offerings like ML Bookcamp and Machine Learning Zoomcamp emphasize end-to-end deployment (Flask, AWS Lambda, Kubernetes, Kubeflow). Alexey also discusses community management lessons — mentoring, product mindset, avoiding tool churn — plus career and productivity advice such as learning-by-projects, public deadlines, and maintaining motivation.

    If you’re building or scaling a machine learning community, this episode offers concrete strategies for community growth, event design, content automation, and running project-focused training.' +description: "Discover how to scale a 9k+ data science community, automate events, and advance your machine learning career with deployment, mentorship and growth tactics." +intro: "How do you scale a grassroots machine learning community from a few forum posts to thousands of active members? In this episode, Alexey Grigorev — founder of DataTalks.Club — sits down with Eugene Yan to walk through the real-world steps behind scaling and growing a machine learning community. Alexey shares his origins (forums, landing page, early events), the growth inflection that led to ~9k members, and practical event formats that work: Open Source Spotlight, Minis, Book of the Week, live coding and office hours.

    We cover tactical topics listeners can apply: content production and automation (planning, Zapier, Eventbrite), monetization and sponsorship models, and how project-based offerings like ML Bookcamp and Machine Learning Zoomcamp emphasize end-to-end deployment (Flask, AWS Lambda, Kubernetes, Kubeflow). Alexey also discusses community management lessons — mentoring, product mindset, avoiding tool churn — plus career and productivity advice such as learning-by-projects, public deadlines, and maintaining motivation.

    If you’re building or scaling a machine learning community, this episode offers concrete strategies for community growth, event design, content automation, and running project-focused training." topics: - community building - machine learning diff --git a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md index 6dc26817..0b32fa3d 100644 --- a/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md +++ b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md @@ -1,7 +1,6 @@ --- -title: 'Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and - Evolution' -short: DataTalks.Club Anniversary Interview +title: "Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and Evolution" +short: "DataTalks.Club Anniversary Interview" season: 16 episode: 1 guests: @@ -16,21 +15,7 @@ links: apple: https://podcasts.apple.com/us/podcast/datatalks-club-anniversary-interview-alexey-grigorev/id1541710331?i=1000631114088 spotify: https://open.spotify.com/episode/0j1eKj9NbK3oAXHXHyaNae?si=M7rw9WixTvWw-BfKPXPwVg youtube: https://www.youtube.com/watch?v=nCqwZT9zA0M -intro: 'How do you build a sustainable data community that endures beyond meetup hype - and founder energy? In this episode Alexey Grigorev, founder of DataTalks.Club, - and Johanna Bayer, a researcher about to complete her PhD in machine learning for - clinical neuroimaging at the University of Melbourne, discuss three years of community - growth and evolution.

    Alexey brings the perspective of launching and running - a global data community, while Johanna contributes her background in psychology, - computational neuroscience, and research software engineering, plus advocacy for - open source and open science. Together they explore core topics around sustainable - data community building: membership growth, volunteer and contributor roles, the - intersection of research software engineering with community practice, and how open - source and open science principles support longevity.

    Listeners will come - away with concrete considerations for creating and maintaining a data-focused community—practical - lessons on community governance, contributor engagement, and aligning technical - and social infrastructure—making this episode valuable for anyone building a dataTalks-style - group, open source project, or research software community.' +intro: "How do you build a sustainable data community that endures beyond meetup hype and founder energy? In this episode Alexey Grigorev, founder of DataTalks.Club, and Johanna Bayer, a researcher about to complete her PhD in machine learning for clinical neuroimaging at the University of Melbourne, discuss three years of community growth and evolution.

    Alexey brings the perspective of launching and running a global data community, while Johanna contributes her background in psychology, computational neuroscience, and research software engineering, plus advocacy for open source and open science. Together they explore core topics around sustainable data community building: membership growth, volunteer and contributor roles, the intersection of research software engineering with community practice, and how open source and open science principles support longevity.

    Listeners will come away with concrete considerations for creating and maintaining a data-focused community—practical lessons on community governance, contributor engagement, and aligning technical and social infrastructure—making this episode valuable for anyone building a dataTalks-style group, open source project, or research software community." topics: - community building - machine learning @@ -1221,6 +1206,5 @@ transcript: who: Johanna context: 'Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and Evolution' -description: Discover DataTalks.Club's 3-year playbook to build a sustainable data - community—scaling, engagement & retention tactics that grow your network. +description: "Discover DataTalks.Club's 3-year playbook to build a sustainable data community—scaling, engagement & retention tactics that grow your network." --- diff --git a/_podcast/datatalksclub-scaling-and-free-courses.md b/_podcast/datatalksclub-scaling-and-free-courses.md index e56caeed..6663799c 100644 --- a/_podcast/datatalksclub-scaling-and-free-courses.md +++ b/_podcast/datatalksclub-scaling-and-free-courses.md @@ -1,7 +1,6 @@ --- -title: 'Inside Scaling DataTalks.Club: How We Built Free Data Engineering, MLOps & - LLM Courses' -short: DataTalks.Club Anniversary Podcast +title: "Inside Scaling DataTalks.Club: How We Built Free Data Engineering, MLOps & LLM Courses" +short: "DataTalks.Club Anniversary Podcast" season: 19 episode: 3 guests: @@ -15,8 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/datatalks-club-4th-anniversary-ama-podcast-alexey-grigorev/id1541710331?i=1000674473200 spotify: https://open.spotify.com/episode/50wIZxjq6goREu9pwXYITP?si=mPW0v5fBQxuBpg622CpCEA youtube: https://www.youtube.com/watch?v=GHbeXIKnkLQ -description: 'Discover how DataTalks.Club built free Data Engineering, MLOps & LLM - courses: scaling open-source curriculum, community growth, and career-ready projects.' +description: "Discover how DataTalks.Club built free Data Engineering, MLOps & LLM courses: scaling open-source curriculum, community growth, and career-ready projects." +topics: +- MLOps +- LLMs +- data engineering +- machine learning +- career transition +- community building +- teaching dateadded: 2024-11-08 duration: PT01H03M17S quotableClips: @@ -310,7 +316,7 @@ transcript: - line: 'Yeah, I didn’t know about the Upwork scam. That’s really good to know. Alright, let’s take some questions from the community. Here’s one: "Why did you decide to create a free-to-learn community? What keeps you motivated, and have you ever - thought about stopping or leaving the community?"' + thought about stopping or leaving the community?" sec: 694 time: '11:34' who: Johanna @@ -1198,19 +1204,5 @@ context: 'Context: Born during COVID as a volunteer meetup, DataTalks.Club scale events, accessible learning) create lasting impact and resilience—allowing thoughtful stewardship to adapt to AI-driven change, achieve product-market fit, and scale education without sacrificing community values.' -intro: How do you scale a volunteer-run learning community into a sustainable platform - offering free data engineering, MLOps, and LLM courses? In this episode Alexey Grigorev, - founder of DataTalks.Club, walks through the origin story of the project, the leap - to running it full-time, and the practical tradeoffs of building free data engineering - courses at scale.

    Alexey’s background as the founder guides discussions - on course portfolio decisions (Machine Learning, Data Engineering, MLOps, LLMs, - Stock Analytics), organic growth strategies like Zoomcamp word-of-mouth, and technical - choices—building the course platform in Django. We cover community safety and moderation, - revenue volatility from sponsorships, tax and cashflow considerations in Germany, - and how staying technical through pet projects and LLM experiments informed their - curriculum (including RAG and LLM course development).

    Listeners will get - concrete takeaways on scaling online education, community-driven learning, course - product work, and practical ways to help—mentoring, guesting, or joining projects - and events. Useful for educators, course builders, and data practitioners wondering - how to create and sustain free, high-quality data science and MLOps training. +intro: "How do you scale a volunteer-run learning community into a sustainable platform offering free data engineering, MLOps, and LLM courses? In this episode Alexey Grigorev, founder of DataTalks.Club, walks through the origin story of the project, the leap to running it full-time, and the practical tradeoffs of building free data engineering courses at scale.

    Alexey’s background as the founder guides discussions on course portfolio decisions (Machine Learning, Data Engineering, MLOps, LLMs, Stock Analytics), organic growth strategies like Zoomcamp word-of-mouth, and technical choices—building the course platform in Django. We cover community safety and moderation, revenue volatility from sponsorships, tax and cashflow considerations in Germany, and how staying technical through pet projects and LLM experiments informed their curriculum (including RAG and LLM course development).

    Listeners will get concrete takeaways on scaling online education, community-driven learning, course product work, and practical ways to help—mentoring, guesting, or joining projects and events. Useful for educators, course builders, and data practitioners wondering how to create and sustain free, high-quality data science and MLOps training." --- diff --git a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md index 155e4dfa..b1e72485 100644 --- a/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md +++ b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md @@ -1,6 +1,6 @@ --- -title: 'Deploying LLMs in Production: Fine-Tuning, Retrieval & Open-Source vs API Tradeoffs' -short: LLMs for Everyone +title: "Deploying LLMs in Production: Fine-Tuning, Retrieval & Open-Source vs API Tradeoffs" +short: "LLMs for Everyone" season: 15 episode: 3 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/0tmi2ytNk1bEPldcbhkvhN?si=DtU2OM3RTFmPBdY8sFCv5g youtube: https://www.youtube.com/watch?v=6dn6uZFkk04 -description: 'Discover LLM deployment tactics: fine-tuning, retrieval and open-source vs API tradeoffs to cut latency, control costs, and ground production models.' -intro: 'How do you take large language models from experiment to reliable production—balancing fine-tuning, retrieval strategies, and the tradeoffs between open-source models and API services? In this episode, Meryem Arik, a recovering physicist and co-founder of TitanML, walks through practical choices for LLM deployment based on her pivot from computer vision to building tools that make models smaller, cheaper, and easier to run in production.

    We cover model fundamentals and selection (classification vs generative tasks), open-source model options like LLaMA, FLAN-T5, Falcon and MPT, and the operational realities of serving: model size, compression, inference optimization, latency and cost tradeoffs. Meryem explains when to prototype with GPT-3.5/4 APIs versus self-hosting, the risks of API model drift, and why fine-tuning or retrieval-augmented generation often beats continuous retraining. You’ll also get a clear breakdown of retrieval patterns, vector databases for semantic search, dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing architectures, reducing costs, and grounding answers reliably with retrieval.' +description: "Discover LLM deployment tactics: fine-tuning, retrieval and open-source vs API tradeoffs to cut latency, control costs, and ground production models." +topics: +- LLMs +- MLOps +- open-source +- production +- retrieval-augmented generation +intro: "How do you take large language models from experiment to reliable production—balancing fine-tuning, retrieval strategies, and the tradeoffs between open-source models and API services? In this episode, Meryem Arik, a recovering physicist and co-founder of TitanML, walks through practical choices for LLM deployment based on her pivot from computer vision to building tools that make models smaller, cheaper, and easier to run in production.

    We cover model fundamentals and selection (classification vs generative tasks), open-source model options like LLaMA, FLAN-T5, Falcon and MPT, and the operational realities of serving: model size, compression, inference optimization, latency and cost tradeoffs. Meryem explains when to prototype with GPT-3.5/4 APIs versus self-hosting, the risks of API model drift, and why fine-tuning or retrieval-augmented generation often beats continuous retraining. You’ll also get a clear breakdown of retrieval patterns, vector databases for semantic search, dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing architectures, reducing costs, and grounding answers reliably with retrieval." dateadded: 2023-07-29 duration: PT00H59M31S diff --git a/_podcast/developer-personal-brand-learn-in-public.md b/_podcast/developer-personal-brand-learn-in-public.md index 3666afec..240ca073 100644 --- a/_podcast/developer-personal-brand-learn-in-public.md +++ b/_podcast/developer-personal-brand-learn-in-public.md @@ -1,6 +1,6 @@ --- -title: 'Learn in Public: Personal Branding & Career Marketing for Developers' -short: 'Learn in Public: Personal Branding & Career Marketing for Developers' +title: "Learn in Public: Personal Branding & Career Marketing for Developers" +short: "Learn in Public: Personal Branding & Career Marketing for Developers" season: 3 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6uLyKxpVZv0wItCNyGPdAN apple: https://podcasts.apple.com/us/podcast/how-to-market-yourself-without-being-celebrity-shawn/id1541710331?i=1000522670386 -description: 'Discover personal branding & career marketing for devs: learn-in-public tactics, niche choice and internal promotion to boost visibility and land promotions.' -intro: 'How do developers build visibility, earn promotions, and steer their careers by learning in public? In this episode, Shawn Swyx Wang — Senior Developer Advocate for AWS Amplify, author of The Coding Career Handbook, and former engineer at Netlify and Temporal — walks through a practical framework for personal branding and career marketing for developers. We unpack why self-marketing matters beyond job hunting and the five-part personal marketing framework: brand, domain, value, skills, and channel.

    You''ll hear concrete guidance on choosing and validating a niche (meetups, conferences, community signals), building an owned platform (blog, newsletter, mailing list), and distribution tactics from early social growth to the engagement move "pick up what they put down." Swyx also covers career transition strategies, hiring portfolios and case studies, internal pathways like lateral moves and signature initiatives, and creating reusable talks and demos. Practical tools discussed include brag documents, demos for internal promotion, and open knowledge projects as visibility builders. Tune in to get actionable steps to craft a developer personal brand, grow influence, and apply learn-in-public tactics to advance your career and job opportunities.' +description: "Discover personal branding & career marketing for devs: learn-in-public tactics, niche choice and internal promotion to boost visibility and land promotions." +intro: "How do developers build visibility, earn promotions, and steer their careers by learning in public? In this episode, Shawn Swyx Wang — Senior Developer Advocate for AWS Amplify, author of The Coding Career Handbook, and former engineer at Netlify and Temporal — walks through a practical framework for personal branding and career marketing for developers. We unpack why self-marketing matters beyond job hunting and the five-part personal marketing framework: brand, domain, value, skills, and channel.

    You''ll hear concrete guidance on choosing and validating a niche (meetups, conferences, community signals), building an owned platform (blog, newsletter, mailing list), and distribution tactics from early social growth to the engagement move "pick up what they put down." Swyx also covers career transition strategies, hiring portfolios and case studies, internal pathways like lateral moves and signature initiatives, and creating reusable talks and demos. Practical tools discussed include brag documents, demos for internal promotion, and open knowledge projects as visibility builders. Tune in to get actionable steps to craft a developer personal brand, grow influence, and apply learn-in-public tactics to advance your career and job opportunities." topics: - personal brand - career growth diff --git a/_podcast/devrel-data-science-open-source-tools.md b/_podcast/devrel-data-science-open-source-tools.md index 1f4fd893..b542f4c2 100644 --- a/_podcast/devrel-data-science-open-source-tools.md +++ b/_podcast/devrel-data-science-open-source-tools.md @@ -1,6 +1,6 @@ --- -title: 'DevRel for Data Science: Build Community, Create Content, and Grow Your Career' -short: Developer Advocacy for Data Science +title: "DevRel for Data Science: Build Community, Create Content, and Grow Your Career" +short: "Developer Advocacy for Data Science" season: 2 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6Hq0ZGPTkDk1h8orfCU78I apple: https://podcasts.apple.com/us/podcast/developer-advocacy-for-data-science-elle-obrien/id1541710331?i=1000506315396 -description: 'Discover DevRel tactics for Data Science: community growth, reproducibility, and content strategy—practical metrics, safety practices, and career growth tips.' -intro: How do you practice developer relations for data science while balancing reproducibility, community growth, and content strategy? In this episode, Elle O’Brien — a data scientist at Iterative (working on DVC and CML) and a lecturer at the University of Michigan with a PhD in neuroscience and computational modeling from UW — walks through practical DevRel for data-focused tools and teaching.

    We cover her shift from a viral StyleGAN project into DevRel, the scope of a solo developer advocate (product work, docs, PRs, videos, hiring), and how she prioritizes releases versus evergreen content. Elle shares promotion tactics (Hacker News, Reddit, social), approaches to community safety and moderation, and the emotional realities of online work. She explains community metrics, role distinctions between DevRel/advocate/evangelist, and core skills like technical credibility and rapid learning. We also dig into content strategy for teaching—curriculum design, reusable video content, recording lectures as open educational resources, and practical ways to get started blogging and building a developer portfolio.

    Listen to gain actionable guidance on community growth, reproducibility best practices, content planning, and the trade-offs of DevRel work in open source data science +description: "Discover DevRel tactics for Data Science: community growth, reproducibility, and content strategy—practical metrics, safety practices, and career growth tips." +intro: "How do you practice developer relations for data science while balancing reproducibility, community growth, and content strategy? In this episode, Elle O’Brien — a data scientist at Iterative (working on DVC and CML) and a lecturer at the University of Michigan with a PhD in neuroscience and computational modeling from UW — walks through practical DevRel for data-focused tools and teaching.

    We cover her shift from a viral StyleGAN project into DevRel, the scope of a solo developer advocate (product work, docs, PRs, videos, hiring), and how she prioritizes releases versus evergreen content. Elle shares promotion tactics (Hacker News, Reddit, social), approaches to community safety and moderation, and the emotional realities of online work. She explains community metrics, role distinctions between DevRel/advocate/evangelist, and core skills like technical credibility and rapid learning. We also dig into content strategy for teaching—curriculum design, reusable video content, recording lectures as open educational resources, and practical ways to get started blogging and building a developer portfolio.

    Listen to gain actionable guidance on community growth, reproducibility best practices, content planning, and the trade-offs of DevRel work in open source data science" topics: - developer relations - data science diff --git a/_podcast/devrel-open-source-machine-learning.md b/_podcast/devrel-open-source-machine-learning.md index baa82839..86a3aa44 100644 --- a/_podcast/devrel-open-source-machine-learning.md +++ b/_podcast/devrel-open-source-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'DevRel Role for Machine Learning: ML Ecosystems, Open-Source Governance & Developer Experience with Metaflow' -short: DevRel Role for Machine Learning +title: "DevRel Role for Machine Learning: ML Ecosystems, Open-Source Governance & Developer Experience with Metaflow" +short: "DevRel Role for Machine Learning" season: 14 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7bVCKqn9fLt6ETq8hxId5V?si=GZSC3NbvRuyXD85iOQo51Q youtube: https://www.youtube.com/watch?v=z7BvslwVRbQ -description: 'Explore the role of developer relations for machine learning: ML ecosystems, open-source governance and developer experience with Metaflow.' -intro: How do you build effective developer relations for machine learning ecosystems while navigating open-source governance and enhancing developer experience? In this episode, Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime educator and podcast host — demonstrates Metaflow's capabilities and shares practical guidance for building reproducible ML workflows. Drawing on his background at Coiled and DataCamp and his experience teaching and creating courses, Hugo explores ML ecosystem integrations (AWS, Kubernetes, Argo), interoperability considerations, and company support models for open-source projects like Dask and Metaflow.

    We dive into the DevRel career path, essential skills (technical fluency, writing, community building), organizational structures, and how developer feedback and dogfooding enhance documentation and reproducibility. Hugo discusses generative AI's impact on ML infrastructure and DevRel practices, AI-assisted content creation tools like Whisper and ChatGPT, and strategic approaches to tutorials, blogs, and conference talks. Listen to gain actionable insights on ML ecosystem development, improving developer experience with Metaflow, and aligning DevRel strategies with open-source governance to support scalable machine learning infrastructure +description: "Explore the role of developer relations for machine learning: ML ecosystems, open-source governance and developer experience with Metaflow." +intro: "How do you build effective developer relations for machine learning ecosystems while navigating open-source governance and enhancing developer experience? In this episode, Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime educator and podcast host — demonstrates Metaflow's capabilities and shares practical guidance for building reproducible ML workflows. Drawing on his background at Coiled and DataCamp and his experience teaching and creating courses, Hugo explores ML ecosystem integrations (AWS, Kubernetes, Argo), interoperability considerations, and company support models for open-source projects like Dask and Metaflow.

    We dive into the DevRel career path, essential skills (technical fluency, writing, community building), organizational structures, and how developer feedback and dogfooding enhance documentation and reproducibility. Hugo discusses generative AI's impact on ML infrastructure and DevRel practices, AI-assisted content creation tools like Whisper and ChatGPT, and strategic approaches to tutorials, blogs, and conference talks. Listen to gain actionable insights on ML ecosystem development, improving developer experience with Metaflow, and aligning DevRel strategies with open-source governance to support scalable machine learning infrastructure" dateadded: 2023-06-17 topics: - developer relations @@ -57,7 +57,7 @@ quotableClips: startOffset: 874 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=874 endOffset: 1083 -- name: 'DevRel Explained: Education, Documentation & the "Wisdom Layer"' +- name: 'DevRel Explained: Education, Documentation & the "Wisdom Layer" startOffset: 1083 url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1083 endOffset: 1372 @@ -419,7 +419,7 @@ transcript: sec: 917 time: '15:17' who: Hugo -- header: 'DevRel Explained: Education, Documentation & the "Wisdom Layer"' +- header: 'DevRel Explained: Education, Documentation & the "Wisdom Layer" - line: What is actually DevRel? What is it? sec: 1083 time: '18:03' diff --git a/_podcast/fairness-in-ai-ml-engineering.md b/_podcast/fairness-in-ai-ml-engineering.md index 8dacb4bf..7f0c4011 100644 --- a/_podcast/fairness-in-ai-ml-engineering.md +++ b/_podcast/fairness-in-ai-ml-engineering.md @@ -1,7 +1,6 @@ --- -title: 'Fairness in AI/ML Engineering: Interpretability, Metrics and Sociotechnical - Design' -short: Linguistics and Fairness +title: "Fairness in AI/ML Engineering: Interpretability, Metrics and Sociotechnical Design" +short: "Linguistics and Fairness" season: 19 episode: 9 guests: @@ -15,25 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/linguistics-and-fairness-tamara-atanasoska/id1541710331?i=1000684411354 spotify: https://open.spotify.com/episode/6S4a85iiRzl7NU1HykXeKT?si=FNoDtj74T2ujQKzKdDWwzA youtube: https://www.youtube.com/watch?v=sXU9vMDBjmk -description: Learn fairness, interpretability, and metrics in AI/ML engineering—practical - sociotechnical design steps to evaluate bias, improve transparency, protect users. -intro: How do you reduce bias in credit scoring models without sacrificing explainability? - In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., - Fairlearn maintainer, and contributor to scikit-learn and skops with a background - in software engineering and computational linguistics — walks through practical - approaches to fairness in AI. We dig into a real credit scoring use case, empirical - findings on gender disparities, and the societal harms of biased models such as - debt and repossession.

    Tamara explains Fairlearn’s group fairness tools, - visualization and mitigation methods, and the tradeoffs between false positives, - false negatives, and demographic parity. She discusses how to choose sensitive groups - in domain-specific settings, the limits of automation, the need for human-in-the-loop - systems, and who in an organization should decide fairness tradeoffs. The episode - also covers interpretability and explainable models — inspection tools, partial - dependence, and cross-library integration with scikit-learn and estimator APIs — - plus practical concerns like secure model serialization and community contribution - paths.

    Listen to learn actionable guidance on auditing and mitigating credit - scoring bias, building explainable models, and integrating Fairlearn into real-world - ML workflows +description: "Learn fairness, interpretability, and metrics in AI/ML engineering—practical sociotechnical design steps to evaluate bias, improve transparency, protect users." +topics: +- machine learning +- LLMs +- open-source +- tools +- data governance +- fairness +intro: "How do you reduce bias in credit scoring models without sacrificing explainability? In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., Fairlearn maintainer, and contributor to scikit-learn and skops with a background in software engineering and computational linguistics — walks through practical approaches to fairness in AI. We dig into a real credit scoring use case, empirical findings on gender disparities, and the societal harms of biased models such as debt and repossession.

    Tamara explains Fairlearn’s group fairness tools, visualization and mitigation methods, and the tradeoffs between false positives, false negatives, and demographic parity. She discusses how to choose sensitive groups in domain-specific settings, the limits of automation, the need for human-in-the-loop systems, and who in an organization should decide fairness tradeoffs. The episode also covers interpretability and explainable models — inspection tools, partial dependence, and cross-library integration with scikit-learn and estimator APIs — plus practical concerns like secure model serialization and community contribution paths.

    Listen to learn actionable guidance on auditing and mitigating credit scoring bias, building explainable models, and integrating Fairlearn into real-world ML workflows" dateadded: 2025-02-24 duration: PT00H59M14S quotableClips: diff --git a/_podcast/feature-engineering-model-monitoring-and-data-governance.md b/_podcast/feature-engineering-model-monitoring-and-data-governance.md index c19062d9..c2058de7 100644 --- a/_podcast/feature-engineering-model-monitoring-and-data-governance.md +++ b/_podcast/feature-engineering-model-monitoring-and-data-governance.md @@ -1,6 +1,6 @@ --- -title: 'Practical Data Science & ML: Feature Engineering, Model Monitoring, Data Governance & Storytelling' -short: Building Business Acumen for Data Professionals +title: "Practical Data Science & ML: Feature Engineering, Model Monitoring, Data Governance & Storytelling" +short: "Building Business Acumen for Data Professionals" season: 5 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4dFbkQI9pF4wUDueZFqxGY apple: https://podcasts.apple.com/us/podcast/building-business-acumen-for-data-professionals-thom-ives/id1541710331?i=1000540181044 -description: 'Master feature engineering, model monitoring & data governance: ML tactics to prevent drift, boost performance, and sharpen data storytelling.' -intro: How do you move from models that look good on paper to reliable machine learning in production—while keeping data clean and stakeholders aligned? In this episode Thom Ives, founder of Integrated Machine Learning & AI and a veteran data scientist, walks through practical approaches to feature engineering, model monitoring, data governance, and data storytelling. Thom draws on a career spanning industry roles and mentoring to contrast concept-focused learning versus specialist detail work, and to explain why business acumen and role clarity matter for data teams.

    You’ll hear concrete guidance on ETL reliability, closing data collection gaps, and shared responsibility for data governance and literacy. Thom breaks down the ML pipeline—from feature conditioning, scaling, selection, and engineered features to addressing collinearity with PCA and pursuing model parsimony. He also covers model selection trade-offs, spotting data drift and concept drift in production, and the maintenance needed for long-term generalizability. Finally, he emphasizes analytical storytelling and persuasion skills for influencing decisions, plus community resources and mentoring through Integrated ML & AI. Tune in for actionable tactics to improve data quality, monitoring, and stakeholder communication in real-world data science +description: "Master feature engineering, model monitoring & data governance: ML tactics to prevent drift, boost performance, and sharpen data storytelling." +intro: "How do you move from models that look good on paper to reliable machine learning in production—while keeping data clean and stakeholders aligned? In this episode Thom Ives, founder of Integrated Machine Learning & AI and a veteran data scientist, walks through practical approaches to feature engineering, model monitoring, data governance, and data storytelling. Thom draws on a career spanning industry roles and mentoring to contrast concept-focused learning versus specialist detail work, and to explain why business acumen and role clarity matter for data teams.

    You’ll hear concrete guidance on ETL reliability, closing data collection gaps, and shared responsibility for data governance and literacy. Thom breaks down the ML pipeline—from feature conditioning, scaling, selection, and engineered features to addressing collinearity with PCA and pursuing model parsimony. He also covers model selection trade-offs, spotting data drift and concept drift in production, and the maintenance needed for long-term generalizability. Finally, he emphasizes analytical storytelling and persuasion skills for influencing decisions, plus community resources and mentoring through Integrated ML & AI. Tune in for actionable tactics to improve data quality, monitoring, and stakeholder communication in real-world data science" topics: - data science - machine learning diff --git a/_podcast/finops-for-data-engineers.md b/_podcast/finops-for-data-engineers.md index d2b46ea5..11b1871d 100644 --- a/_podcast/finops-for-data-engineers.md +++ b/_podcast/finops-for-data-engineers.md @@ -1,6 +1,6 @@ --- -title: 'FinOps for Data Engineers: Optimize Cloud Costs, BigQuery & Modern Data Stack' -short: From Supply Chain Management to Digital Warehousing and FinOps +title: "FinOps for Data Engineers: Optimize Cloud Costs, BigQuery & Modern Data Stack" +short: "From Supply Chain Management to Digital Warehousing and FinOps" season: 20 episode: 6 guests: @@ -14,23 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/from-supply-chain-management-to-digital-warehousing/id1541710331?i=1000702233986 spotify: https://open.spotify.com/episode/33YZpX7zE6YcBGbQK9Iclp youtube: https://www.youtube.com/watch?v=7ePp6wuxM5s -description: 'Master FinOps for data engineers: optimize cloud costs with BigQuery - best practices, query tuning and governance to cut spend and boost performance.' -intro: How can data engineers bring FinOps practices into their day-to-day work to - control cloud spend across BigQuery and the modern data stack? In this episode, - Eddy Zulkifly — Staff Data Engineer at Kinaxis with a decade of experience building - data platforms on Google Cloud, Azure, and AWS — breaks down practical ways to make - cost optimization part of platform design and operations.

    We explore core - topics including cloud cost optimization for data teams, BigQuery cost controls - and query efficiency, cost-aware architecture in the modern data stack, multi-cloud - considerations, and monitoring and governance for predictable spend. Eddy draws - on experience from Home Depot e-commerce and supply chain analytics, mentoring and - teaching roles, and his work on open-source data projects to translate FinOps principles - into engineering choices.

    If you’re a data engineer or platform owner responsible - for budgets and performance, you’ll get actionable guidance on reducing unnecessary - cloud costs, improving visibility into usage, and designing pipelines that balance - performance with price. Listen to learn practical steps to align data engineering - practices with FinOps goals and make cloud spend more predictable. +description: "Master FinOps for data engineers: optimize cloud costs with BigQuery best practices, query tuning and governance to cut spend and boost performance." +topics: +- data engineering +- finops +- modern data stack +- tools +- career growth +intro: "How can data engineers bring FinOps practices into their day-to-day work to control cloud spend across BigQuery and the modern data stack? In this episode, Eddy Zulkifly — Staff Data Engineer at Kinaxis with a decade of experience building data platforms on Google Cloud, Azure, and AWS — breaks down practical ways to make cost optimization part of platform design and operations.

    We explore core topics including cloud cost optimization for data teams, BigQuery cost controls and query efficiency, cost-aware architecture in the modern data stack, multi-cloud considerations, and monitoring and governance for predictable spend. Eddy draws on experience from Home Depot e-commerce and supply chain analytics, mentoring and teaching roles, and his work on open-source data projects to translate FinOps principles into engineering choices.

    If you’re a data engineer or platform owner responsible for budgets and performance, you’ll get actionable guidance on reducing unnecessary cloud costs, improving visibility into usage, and designing pipelines that balance performance with price. Listen to learn practical steps to align data engineering practices with FinOps goals and make cloud spend more predictable." dateadded: 2025-04-30 duration: PT00H59M54S quotableClips: diff --git a/_podcast/freelance-data-engineering-pricing-and-clients.md b/_podcast/freelance-data-engineering-pricing-and-clients.md index d3f0180c..b1246386 100644 --- a/_podcast/freelance-data-engineering-pricing-and-clients.md +++ b/_podcast/freelance-data-engineering-pricing-and-clients.md @@ -1,6 +1,6 @@ --- -title: 'Freelance Data Engineering Playbook: Pricing, Client Acquisition & Tools' -short: Freelancing and Consulting with Data Engineering +title: "Freelance Data Engineering Playbook: Pricing, Client Acquisition & Tools" +short: "Freelancing and Consulting with Data Engineering" season: 9 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5M9HFWt8xmqf5HyGu40RmJ?si=C95h0CMkRiazs_ft8Z_gRg youtube: https://www.youtube.com/watch?v=9DTTrN-khCk -description: 'Master freelance data engineering: pricing, client acquisition & tools to negotiate rates, scope projects, build reusable portfolios and win repeat clients.' -intro: 'How do you price freelance data engineering work, win steady clients, and pick the right tools for messy production problems? In this episode, Adrian Brudaru — an economist-turned-business analyst who moved to Berlin, left corporate/startup cycles to freelance for five years, and now co-founds a data company releasing open source tooling — walks through a practical playbook for freelance data engineers.

    We cover pricing models (hourly rates, negotiation, occupancy and income variability), client acquisition (networking, repeat business, recruiters vs. direct contracts, Upwork pros and cons), and scoping techniques (spikes, scope documents, managing expectations). Adrian also digs into technical topics: legacy cleanup, Airflow work, and a data loading tool for volatile schemas and automatic unpacking. Along the way, he explains building a reusable portfolio, transitioning from freelancing to product or investing, working remotely vs. on-site, and how to create opportunities in local markets like Berlin.

    Listen to learn concrete approaches to freelance data engineering pricing, client acquisition strategies, scoping projects, and practical tools to handle unstable schemas — so you can evaluate projects, set rates, and grow a sustainable freelance practice.' +description: "Master freelance data engineering: pricing, client acquisition & tools to negotiate rates, scope projects, build reusable portfolios and win repeat clients." +intro: "How do you price freelance data engineering work, win steady clients, and pick the right tools for messy production problems? In this episode, Adrian Brudaru — an economist-turned-business analyst who moved to Berlin, left corporate/startup cycles to freelance for five years, and now co-founds a data company releasing open source tooling — walks through a practical playbook for freelance data engineers.

    We cover pricing models (hourly rates, negotiation, occupancy and income variability), client acquisition (networking, repeat business, recruiters vs. direct contracts, Upwork pros and cons), and scoping techniques (spikes, scope documents, managing expectations). Adrian also digs into technical topics: legacy cleanup, Airflow work, and a data loading tool for volatile schemas and automatic unpacking. Along the way, he explains building a reusable portfolio, transitioning from freelancing to product or investing, working remotely vs. on-site, and how to create opportunities in local markets like Berlin.

    Listen to learn concrete approaches to freelance data engineering pricing, client acquisition strategies, scoping projects, and practical tools to handle unstable schemas — so you can evaluate projects, set rates, and grow a sustainable freelance practice." topics: - data engineering - freelance diff --git a/_podcast/freelancing-in-machine-learning.md b/_podcast/freelancing-in-machine-learning.md index 56659d22..8a592776 100644 --- a/_podcast/freelancing-in-machine-learning.md +++ b/_podcast/freelancing-in-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Freelancing in Machine Learning: Pricing, Client Acquisition & Proposals' -short: Freelancing in Machine Learning +title: "Freelancing in Machine Learning: Pricing, Client Acquisition & Proposals" +short: "Freelancing in Machine Learning" season: 4 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2oE13mUEa9k4AO5qogYdqv apple: https://podcasts.apple.com/us/podcast/freelancing-in-machine-learning-mikio-braun/id1541710331?i=1000532612872 -description: 'Learn freelancing in machine learning: pricing, client acquisition, and proposals to win ML consulting gigs, scale sustainably, and secure steady income' -intro: 'How do you move from academic research or in-house ML engineering to a sustainable freelance career in machine learning — getting clients, pricing your work, and delivering production systems? In this episode, Mikio Braun, who transitioned from TU Berlin into ML roles at Zalando and GetYourGuide and now consults on machine learning production, infrastructure, and teams, walks through that path step by step.

    We cover the practical parts of freelancing in machine learning: launching first clients, sourcing leads through network and referrals, and demand generation with LinkedIn, talks, and podcasts; pre-sales tactics like free intro calls, problem discovery, and clear proposals; pricing models and rate-setting strategies; financial planning, capacity management, and avoiding burnout; plus specialization, productizing consulting, and scaling options (agency, product, or return to employment). The episode also addresses administrative essentials for freelancers in Germany (registration, VAT, payments), accounting choices, professional liability, and how to compete in a global remote market. Listen for concrete advice on client-finding, scope discipline, and deliverables so you can evaluate whether freelancing in machine learning is the right next step and how to start with a safety net.' +description: "Learn freelancing in machine learning: pricing, client acquisition, and proposals to win ML consulting gigs, scale sustainably, and secure steady income" +intro: "How do you move from academic research or in-house ML engineering to a sustainable freelance career in machine learning — getting clients, pricing your work, and delivering production systems? In this episode, Mikio Braun, who transitioned from TU Berlin into ML roles at Zalando and GetYourGuide and now consults on machine learning production, infrastructure, and teams, walks through that path step by step.

    We cover the practical parts of freelancing in machine learning: launching first clients, sourcing leads through network and referrals, and demand generation with LinkedIn, talks, and podcasts; pre-sales tactics like free intro calls, problem discovery, and clear proposals; pricing models and rate-setting strategies; financial planning, capacity management, and avoiding burnout; plus specialization, productizing consulting, and scaling options (agency, product, or return to employment). The episode also addresses administrative essentials for freelancers in Germany (registration, VAT, payments), accounting choices, professional liability, and how to compete in a global remote market. Listen for concrete advice on client-finding, scope discipline, and deliverables so you can evaluate whether freelancing in machine learning is the right next step and how to start with a safety net." topics: - freelance - consulting diff --git a/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md index 71f2d489..10f1af46 100644 --- a/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md +++ b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md @@ -1,6 +1,6 @@ --- -title: 'Transitioning from Academia to Industry as a Staff AI Engineer: Interview Prep, MLOps & Onboarding' -short: Transitioning from Academia to Industry as a Staff AI Engineer +title: "Transitioning from Academia to Industry as a Staff AI Engineer: Interview Prep, MLOps & Onboarding" +short: "Transitioning from Academia to Industry as a Staff AI Engineer" season: 12 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4o52jMRR2cctCD8LuFFLdD?si=tBO_9KkiSWySHu7jaM-McQ youtube: https://www.youtube.com/watch?v=_xr1_xb736E -description: Discover Staff AI Engineer interview prep, MLOps & onboarding tactics to transition from academia—coding strategies, system design, mentorship for impact -intro: 'How do you transition from academia into a Staff AI Engineer role while nailing interview prep, MLOps, and onboarding? In this episode, Tatiana Gabruseva — a computer vision/deep learning engineer, Kaggle Competitions Master, and Senior ML Engineer at Cork University Hospital — walks through her shift from physics and healthcare research into industry engineering leadership.

    We cover practical, concrete topics listeners can use: the onboarding shock she experienced at LinkedIn and how to prioritize learning; ramping up production stacks (Scala, Spark, Kubernetes); the Staff AI Engineer remit of opinion-setting, cross-functional influence, and strategy; and staff engineer archetypes (deep specialist, cross-team advisor, hands-on mentor). Tatiana explains how to translate academic leadership, grants, and research into industry roadmaps, how mentorship accelerates onboarding, and how to convince employers with applied projects and collaborations.

    For candidates she shares interview prep tactics — LeetCode coding plans, ML and system design prep, mock interviews, networking and referrals, and reframing rejections — plus real-world involvement in MLOps, ETL pipelines, and heavy code review. Tune in to learn actionable steps for moving from academia to a staff engineering role and succeeding in interviews, onboarding, and production ML.' +description: "Discover Staff AI Engineer interview prep, MLOps & onboarding tactics to transition from academia—coding strategies, system design, mentorship for impact" +intro: "How do you transition from academia into a Staff AI Engineer role while nailing interview prep, MLOps, and onboarding? In this episode, Tatiana Gabruseva — a computer vision/deep learning engineer, Kaggle Competitions Master, and Senior ML Engineer at Cork University Hospital — walks through her shift from physics and healthcare research into industry engineering leadership.

    We cover practical, concrete topics listeners can use: the onboarding shock she experienced at LinkedIn and how to prioritize learning; ramping up production stacks (Scala, Spark, Kubernetes); the Staff AI Engineer remit of opinion-setting, cross-functional influence, and strategy; and staff engineer archetypes (deep specialist, cross-team advisor, hands-on mentor). Tatiana explains how to translate academic leadership, grants, and research into industry roadmaps, how mentorship accelerates onboarding, and how to convince employers with applied projects and collaborations.

    For candidates she shares interview prep tactics — LeetCode coding plans, ML and system design prep, mock interviews, networking and referrals, and reframing rejections — plus real-world involvement in MLOps, ETL pipelines, and heavy code review. Tune in to learn actionable steps for moving from academia to a staff engineering role and succeeding in interviews, onboarding, and production ML." topics: - machine learning - career transition diff --git a/_podcast/from-academic-research-to-data-engineering-freelancing.md b/_podcast/from-academic-research-to-data-engineering-freelancing.md index efed3052..0fe670d8 100644 --- a/_podcast/from-academic-research-to-data-engineering-freelancing.md +++ b/_podcast/from-academic-research-to-data-engineering-freelancing.md @@ -1,7 +1,6 @@ --- -title: 'From Academic Research to Lean Data Consulting: MVP Strategy, Problem-First - Thinking & Freelance Practice Building' -short: From Simulation Algorithms to Production-Grade Data Systems +title: "From Academic Research to Lean Data Consulting: MVP Strategy, Problem-First Thinking & Freelance Practice Building" +short: "From Simulation Algorithms to Production-Grade Data Systems" season: 21 episode: 1 guests: @@ -15,24 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/from-simulations-to-freelance-data-engineering-orells/id1541710331?i=1000720245457 spotify: https://open.spotify.com/episode/5HCSIO0mO8Pr5Yv9puZ72R youtube: https://www.youtube.com/watch?v=pkcpH5N-GP8 -description: Learn lean data, MVP strategy, and problem-first thinking to build a - freelance consulting practice—turn research into actionable services and win clients - fast. -intro: How do you turn academic research and simulation expertise into a lean data - consulting practice without getting bogged down in perfect solutions? In this episode - we talk with Orell Garten, an electrical engineering graduate who focused on simulation - algorithms, left a PhD during COVID, and learned through a government-funded startup - program how to translate scientific research into real products.

    Orell - breaks down problem-first thinking, MVP strategy for data and simulation projects, - and the practical steps involved in freelance practice building after academia. - We explore how to apply rigorous simulation methods to client problems, prioritize - minimal viable products over perfection, and navigate the transition from lab-based - research to lean data consulting.

    Listeners will come away with a clearer - framework for deciding what to build first, how to validate assumptions with lightweight - experiments, and how to position technical skills for consulting engagements. This - episode is for researchers and engineers considering freelance work, consultants - refining their MVP approach, and anyone interested in applying simulation methods - and problem-first thinking to deliver practical data-driven solutions. +description: "Learn lean data, MVP strategy, and problem-first thinking to build a freelance consulting practice—turn research into actionable services and win clients fast." +topics: +- data engineering +- academia +- AI +- MLOps +- computer vision +- freelance +- career transition +intro: "How do you turn academic research and simulation expertise into a lean data consulting practice without getting bogged down in perfect solutions? In this episode we talk with Orell Garten, an electrical engineering graduate who focused on simulation algorithms, left a PhD during COVID, and learned through a government-funded startup program how to translate scientific research into real products.

    Orell breaks down problem-first thinking, MVP strategy for data and simulation projects, and the practical steps involved in freelance practice building after academia. We explore how to apply rigorous simulation methods to client problems, prioritize minimal viable products over perfection, and navigate the transition from lab-based research to lean data consulting.

    Listeners will come away with a clearer framework for deciding what to build first, how to validate assumptions with lightweight experiments, and how to position technical skills for consulting engagements. This episode is for researchers and engineers considering freelance work, consultants refining their MVP approach, and anyone interested in applying simulation methods and problem-first thinking to deliver practical data-driven solutions." dateadded: 2025-08-05 duration: PT01H03M31S quotableClips: diff --git a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md index c209204b..57fd1c71 100644 --- a/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md +++ b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md @@ -1,7 +1,6 @@ --- -title: 'From Biology to ML: Build a Data Science Portfolio with Open-Source, Computer - Vision & Transformers' -short: Career advice, learning, and featuring women in ML and AI +title: "From Biology to ML: Build a Data Science Portfolio with Open-Source, Computer Vision & Transformers" +short: "Career advice, learning, and featuring women in ML and AI" season: 19 episode: 7 guests: @@ -15,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/career-advice-learning-and-featuring-women-in-ml-and/id1541710331?i=1000680294201 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=rx69Xf98QZqGqgpEQgzX2w youtube: https://www.youtube.com/watch?v=GifY8Zn-pnU -description: Build a data science portfolio with open-source computer vision & transformers—gain - hands-on projects, GitHub proof, and interview-ready ML skills. -intro: How do you move from a biology background into machine learning and build a - data science portfolio that actually gets noticed? In this episode, Isabella Bicalho - — a machine learning engineer and data scientist with three years of hands-on AI - development and roots in computational research — walks through practical approaches - for showcasing skills with open-source, computer vision, and transformer projects. -

    We cover how to translate domain knowledge from biology into ML problem - framing, the role of open-source contributions in a data science portfolio, and - project ideas that demonstrate computer vision and transformer expertise. Isabella - also discusses how to document work, choose reproducible experiments, and highlight - impact for hiring managers or collaborators. She runs a newsletter dedicated to - showcasing women’s accomplishments in data science, bringing an equity-minded perspective - to building visible work.

    If you’re building a data science portfolio, - shifting careers into ML, or want concrete ways to leverage open-source and modern - architectures like transformers and computer vision models, this conversation offers - practical guidance, realistic project priorities, and tips for making your work - discoverable to recruiters and the community. +description: "Build a data science portfolio with open-source computer vision & transformers—gain hands-on projects, GitHub proof, and interview-ready ML skills." +topics: +- machine learning +- computer vision +- open-source +- bioinformatics +- career transition +intro: "How do you move from a biology background into machine learning and build a data science portfolio that actually gets noticed? In this episode, Isabella Bicalho — a machine learning engineer and data scientist with three years of hands-on AI development and roots in computational research — walks through practical approaches for showcasing skills with open-source, computer vision, and transformer projects.

    We cover how to translate domain knowledge from biology into ML problem framing, the role of open-source contributions in a data science portfolio, and project ideas that demonstrate computer vision and transformer expertise. Isabella also discusses how to document work, choose reproducible experiments, and highlight impact for hiring managers or collaborators. She runs a newsletter dedicated to showcasing women’s accomplishments in data science, bringing an equity-minded perspective to building visible work.

    If you’re building a data science portfolio, shifting careers into ML, or want concrete ways to leverage open-source and modern architectures like transformers and computer vision models, this conversation offers practical guidance, realistic project priorities, and tips for making your work discoverable to recruiters and the community." dateadded: 2024-12-17 duration: PT01H03M42S quotableClips: diff --git a/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md index abfc5d52..6d2ceb03 100644 --- a/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md +++ b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md @@ -1,7 +1,6 @@ --- -title: Applying Computer Vision Research to Building Production-Ready AI Systems for - Real-World Deployment -short: 'Lessons from Applied AI: Tesla, Waymo, and Beyond' +title: "Applying Computer Vision Research to Building Production-Ready AI Systems for Real-World Deployment" +short: "Lessons from Applied AI: Tesla, Waymo, and Beyond" season: 22 episode: 2 guests: @@ -15,24 +14,17 @@ links: apple: https://podcasts.apple.com/us/podcast/lessons-from-applied-ai-tesla-waymo-and-beyond/id1541710331?i=1000731200298 spotify: https://open.spotify.com/episode/0h9eX7m6H2TPqOjUwb3Jw6?si=I4rKrHXpQTmS7cJBMJbUMA youtube: https://www.youtube.com/watch?v=vK_SxyqIfwk -description: Master computer vision to build production-ready AI systems - learn deployment, - scaling, validation and monitoring to launch reliable real-world models. -intro: 'How do you take computer vision research out of the lab and turn it into production-ready - AI that actually works in the real world? In this episode Aishwarya Jadhav, a Machine - Learning Engineer with over four years of industry experience and a Master’s from - Carnegie Mellon University, walks through the challenges of applying computer vision - research to production systems. Her background spans multimodal LLMs, generative - AI, and computer vision, with research experience in multimodal deep learning and - text information extraction and projects including assistive technologies for the - visually impaired.

    We cover the bridge between applied research and engineering: - translating prototypes into robust, deployable models, integrating multimodal pipelines, - balancing model accuracy with latency and scalability, and practical considerations - for production-ready AI and real-world deployment. Listeners will gain concrete - perspectives on how research informs product choices, what to prioritize when deploying - computer vision systems, and how multimodal approaches and generative models fit - into end-to-end solutions. This episode is useful for ML engineers, researchers, - and product teams focused on building reliable, deployable computer vision and multimodal - AI systems.' +description: "Master computer vision to build production-ready AI systems - learn deployment, scaling, validation and monitoring to launch reliable real-world models." +topics: +- computer vision +- academia +- autonomous driving +- MLOps +- LLMs +- production +- career growth +- career transition +intro: "How do you take computer vision research out of the lab and turn it into production-ready AI that actually works in the real world? In this episode Aishwarya Jadhav, a Machine Learning Engineer with over four years of industry experience and a Master’s from Carnegie Mellon University, walks through the challenges of applying computer vision research to production systems. Her background spans multimodal LLMs, generative AI, and computer vision, with research experience in multimodal deep learning and text information extraction and projects including assistive technologies for the visually impaired.

    We cover the bridge between applied research and engineering: translating prototypes into robust, deployable models, integrating multimodal pipelines, balancing model accuracy with latency and scalability, and practical considerations for production-ready AI and real-world deployment. Listeners will gain concrete perspectives on how research informs product choices, what to prioritize when deploying computer vision systems, and how multimodal approaches and generative models fit into end-to-end solutions. This episode is useful for ML engineers, researchers, and product teams focused on building reliable, deployable computer vision and multimodal AI systems." dateadded: 2025-10-21 duration: PT00H59M01S quotableClips: diff --git a/_podcast/from-data-freelancer-to-startup-open-source-products.md b/_podcast/from-data-freelancer-to-startup-open-source-products.md index 435d01fc..eabef276 100644 --- a/_podcast/from-data-freelancer-to-startup-open-source-products.md +++ b/_podcast/from-data-freelancer-to-startup-open-source-products.md @@ -1,6 +1,6 @@ --- -title: 'From Data Freelancer to Startup: Open-Source Products and Bottom-Up Adoption' -short: 'The Entrepreneurship Journey: From Freelancing to Starting a Company' +title: "From Data Freelancer to Startup: Open-Source Products and Bottom-Up Adoption" +short: "The Entrepreneurship Journey: From Freelancing to Starting a Company" season: 17 episode: 1 guests: @@ -14,23 +14,8 @@ links: apple: https://podcasts.apple.com/us/podcast/the-entrepreneurship-journey-from-freelancing-to/id1541710331?i=1000638715212 spotify: https://open.spotify.com/episode/7wBmJHSXPHoW0mEIbNDgqr?si=z7klLtveT1ioGi6bg8hR7Q youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw -description: 'Discover how to build an open-source data product for Python devs: bootstrap, - ship DLT transforms, and drive bottom-up adoption to find PMF.' -intro: How do you move from freelancing to building an open-source data company that - wins via bottom-up adoption? In this episode Adrian Brudaru — an economics graduate - who pivoted to business analysis in Berlin, then spent years freelancing before - co-founding a data startup — walks through that transition and the practical tradeoffs - he encountered.

    We cover lessons from freelancing and agency work, why - they chose product over agency growth, and the recurring pain of stakeholder alignment - versus technical setup. Adrian explains DLT — a declarative JSON→relational transformation - for data pipelines — and why the product targets Python users as a developer-focused - library. Hear how workshops, documentation, and live support doubled as product - validation, how scrappy bootstrapping and consulting revenue funded early payroll, - and what signals indicate product–market fit for open-source tooling.

    If - you’re building open-source data tools, developer tooling, or plotting a bottom-up - go-to-market, this episode offers concrete tactics on iteration, docs-as-product, - ecosystem partnerships, and positioning against platforms like Airbyte/Fivetran - — helping you prioritize engineering, adoption, and sustainable monetization. +description: "Discover how to build an open-source data product for Python devs: bootstrap, ship DLT transforms, and drive bottom-up adoption to find PMF." +intro: "How do you move from freelancing to building an open-source data company that wins via bottom-up adoption? In this episode Adrian Brudaru — an economics graduate who pivoted to business analysis in Berlin, then spent years freelancing before co-founding a data startup — walks through that transition and the practical tradeoffs he encountered.

    We cover lessons from freelancing and agency work, why they chose product over agency growth, and the recurring pain of stakeholder alignment versus technical setup. Adrian explains DLT — a declarative JSON→relational transformation for data pipelines — and why the product targets Python users as a developer-focused library. Hear how workshops, documentation, and live support doubled as product validation, how scrappy bootstrapping and consulting revenue funded early payroll, and what signals indicate product–market fit for open-source tooling.

    If you’re building open-source data tools, developer tooling, or plotting a bottom-up go-to-market, this episode offers concrete tactics on iteration, docs-as-product, ecosystem partnerships, and positioning against platforms like Airbyte/Fivetran — helping you prioritize engineering, adoption, and sustainable monetization." topics: - entrepreneurship - freelance diff --git a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md index 066519b7..7947e092 100644 --- a/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md +++ b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md @@ -1,7 +1,6 @@ --- -title: 'From DevOps to Data Engineering: Automation, Open Source & Volunteering for - Career Transitions' -short: Career choices, transitions and promotions in and out of tech +title: "From DevOps to Data Engineering: Automation, Open Source & Volunteering for Career Transitions" +short: "Career choices, transitions and promotions in and out of tech" season: 19 episode: 8 guests: @@ -15,23 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/career-choices-transitions-and-promotions-in-and-out/id1541710331?i=1000683499310 spotify: https://open.spotify.com/episode/0UW7fLgm9fqMG64GQwvgIN?si=ZixbzDcZT2mNkVrJjZVbeA youtube: https://www.youtube.com/watch?v=QKWu5-6_6TE -description: 'Discover DevOps to Data Engineering strategies: open source contributions - and volunteering to build pipelines, projects and a hireable portfolio.' -intro: How do you pivot from DevOps to data engineering without starting over? In - this episode Agita Jaunzeme — a DevOps/DataOps engineer, manager, community builder - and NGO founder — breaks down practical strategies for career transitions that center - on automation, open source participation, and volunteering.

    Agita draws - on experience across corporate, startup, open source and non-governmental sectors - and shares how automation and DevOps practices translate to data engineering and - DataOps. We discuss using open source projects to build credibility, volunteering - and community work to gain hands-on experience and networks, and concrete approaches - to getting promoted or making purposeful career pivots. Agita also talks about designing - work that aligns with passion and purpose, including founding an NGO to support - expats and locals in Porto.

    Listeners will come away with actionable ideas - for bridging skill gaps, leveraging automation and open source contributions, and - using volunteering as a pathway into data engineering roles. This episode is for - DevOps professionals, aspiring data engineers, and career changers seeking pragmatic - advice on transitions, promotions, and aligning work with meaningful impact. +description: "Discover DevOps to Data Engineering strategies: open source contributions and volunteering to build pipelines, projects and a hireable portfolio." +topics: +- open-source +- data engineering +- software engineering +- practices +- career transition +- DevOps +intro: "How do you pivot from DevOps to data engineering without starting over? In this episode Agita Jaunzeme — a DevOps/DataOps engineer, manager, community builder and NGO founder — breaks down practical strategies for career transitions that center on automation, open source participation, and volunteering.

    Agita draws on experience across corporate, startup, open source and non-governmental sectors and shares how automation and DevOps practices translate to data engineering and DataOps. We discuss using open source projects to build credibility, volunteering and community work to gain hands-on experience and networks, and concrete approaches to getting promoted or making purposeful career pivots. Agita also talks about designing work that aligns with passion and purpose, including founding an NGO to support expats and locals in Porto.

    Listeners will come away with actionable ideas for bridging skill gaps, leveraging automation and open source contributions, and using volunteering as a pathway into data engineering roles. This episode is for DevOps professionals, aspiring data engineers, and career changers seeking pragmatic advice on transitions, promotions, and aligning work with meaningful impact." dateadded: 2025-01-12 duration: PT01H01M46S quotableClips: @@ -115,7 +106,7 @@ quotableClips: startOffset: 2423 url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2423 endOffset: 2584 -- name: 'Career Coaching Course: "Align Your Career With Who You Are"' +- name: 'Career Coaching Course: "Align Your Career With Who You Are" startOffset: 2584 url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2584 endOffset: 2704 @@ -822,7 +813,7 @@ transcript: sec: 2555 time: '42:35' who: Alexey -- header: 'Career Coaching Course: "Align Your Career With Who You Are"' +- header: 'Career Coaching Course: "Align Your Career With Who You Are" - line: Starting an NGO is not easy, at least not in Portugal. I’m not sure how it works in other countries, but you need at least nine people. It didn’t really start in March. I had already been doing a career coaching course for about two diff --git a/_podcast/from-game-ai-to-modern-ai-agents.md b/_podcast/from-game-ai-to-modern-ai-agents.md index a303ba0e..f3b2fe61 100644 --- a/_podcast/from-game-ai-to-modern-ai-agents.md +++ b/_podcast/from-game-ai-to-modern-ai-agents.md @@ -1,7 +1,6 @@ --- -title: 'From Game AI to LLM Agents: 20-Year Evolution of Multi-Agent Systems, Evolutionary - Algorithms & Modern AI Tooling' -short: Lessons from Two Decades of AI +title: "From Game AI to LLM Agents: 20-Year Evolution of Multi-Agent Systems, Evolutionary Algorithms & Modern AI Tooling" +short: "Lessons from Two Decades of AI" season: 21 episode: 7 guests: @@ -15,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/lessons-from-two-decades-of-ai-micheal-lanham/id1541710331?i=1000728604349 spotify: https://open.spotify.com/episode/7uhe5ZysRi07S6mb14nnox youtube: https://www.youtube.com/watch?v=DSxqUlumM3A -description: Discover 20 years of Game AI, Evolutionary Algorithms, and LLM agents—practical - AI tooling, architecture tips, and faster deployment for real projects. -intro: How did techniques born in game AI become the foundation for today's LLM-driven - agents, and what practical lessons does that 20-year evolution offer to engineers - and researchers? In this episode, AI engineer and best-selling author Micheal Lanham - walks through the lineage from game AI and multi-agent systems to modern LLM agents, - evolutionary algorithms, and contemporary AI tooling.

    Micheal brings hands-on - experience across games, graphics, GIS, enterprise software, and machine learning, - and is the author of Evolutionary Deep Learning, Hands-On Reinforcement Learning - for Games, and AI Agents in Action. He discusses how deep reinforcement learning, - evolutionary methods, and generative AI intersect to build intelligent systems, - and how industry practices from oil and gas to fintech shaped tooling and architectures - for multi-agent systems.

    Listeners will come away with a clearer view of - the technical continuity between game AI and current agent design, practical considerations - when applying evolutionary algorithms and reinforcement learning, and what modern - AI tooling enables for deploying LLM agents. This episode is useful for AI practitioners, - game developers, and anyone interested in the evolution of multi-agent systems, - evolutionary algorithms, and agent-based AI. +description: "Discover 20 years of Game AI, Evolutionary Algorithms, and LLM agents—practical AI tooling, architecture tips, and faster deployment for real projects." +topics: +- LLMs +- AI +- machine learning +- MLOps +- software engineering +intro: "How did techniques born in game AI become the foundation for today's LLM-driven agents, and what practical lessons does that 20-year evolution offer to engineers and researchers? In this episode, AI engineer and best-selling author Micheal Lanham walks through the lineage from game AI and multi-agent systems to modern LLM agents, evolutionary algorithms, and contemporary AI tooling.

    Micheal brings hands-on experience across games, graphics, GIS, enterprise software, and machine learning, and is the author of Evolutionary Deep Learning, Hands-On Reinforcement Learning for Games, and AI Agents in Action. He discusses how deep reinforcement learning, evolutionary methods, and generative AI intersect to build intelligent systems, and how industry practices from oil and gas to fintech shaped tooling and architectures for multi-agent systems.

    Listeners will come away with a clearer view of the technical continuity between game AI and current agent design, practical considerations when applying evolutionary algorithms and reinforcement learning, and what modern AI tooling enables for deploying LLM agents. This episode is useful for AI practitioners, game developers, and anyone interested in the evolution of multi-agent systems, evolutionary algorithms, and agent-based AI." dateadded: 2025-10-01 duration: PT01H48S quotableClips: diff --git a/_podcast/from-iot-data-engineering-to-leading-data-architect.md b/_podcast/from-iot-data-engineering-to-leading-data-architect.md index 7cb8dce2..a06f1083 100644 --- a/_podcast/from-iot-data-engineering-to-leading-data-architect.md +++ b/_podcast/from-iot-data-engineering-to-leading-data-architect.md @@ -1,6 +1,6 @@ --- -title: 'From Hands-On IoT Data Engineering to Leading Data Architecture: Pipelines, Cloud Adaptation & Analytics Modeling' -short: From Data Manager to Data Architect +title: "From Hands-On IoT Data Engineering to Leading Data Architecture: Pipelines, Cloud Adaptation & Analytics Modeling" +short: "From Data Manager to Data Architect" season: 15 episode: 8 guests: diff --git a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md index 5002806f..7f396bab 100644 --- a/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md +++ b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md @@ -1,7 +1,6 @@ --- -title: 'From Collider Physics to Data Science: Research Software Engineering, Interview - Prep & Mentorship' -short: Large Hadron Collider and Mentorship +title: "From Collider Physics to Data Science: Research Software Engineering, Interview Prep & Mentorship" +short: "Large Hadron Collider and Mentorship" season: 19 episode: 5 guests: @@ -15,24 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/large-hadron-collider-and-mentorship-anastasia-karavdina/id1541710331?i=1000677930293 spotify: https://open.spotify.com/episode/6AZ26Q8O4VBkC9YtUNzhab?si=75154323e14d4dca youtube: https://www.youtube.com/watch?v=kV0ZDy2UtJA -description: Discover how collider physics skills power data science careers and research - software engineering; gain interview prep tactics, mentorship tips, and growth. -intro: How do you move from collider physics to industry data science while keeping - rigorous research software engineering practices, succeeding in interviews, and - giving or getting effective mentorship? In this episode Anastasia Karavdina — a - particle physicist turned data scientist who worked on Large Hadron Collider experiments - and later built AI solutions at Blue Yonder and Kaufland e-commerce — walks through - that journey.

    We start with collider basics (particle acceleration, detector - imaging, event volumes, and roles in large collaborations) to show the data scale - and statistical thinking that map to industry. Anastasia explains dual hardware-and-analysis - roles, how multivariate analysis translates to machine learning, and concrete research - software engineering practices like version control and CI/CD. She also covers interview - prep (technical fit, behavioral stories, cultural fit in Germany) and evolving hiring - expectations, plus supply chain AI use cases. Finally, she discusses mentoring — - how she started, structuring mentorship, paid vs. free options, and platforms like - MentorCruise.

    Listen to learn practical steps for translating physics expertise - into data science careers, applying RSE workflows, preparing interview narratives, - and finding mentorship to accelerate your next move. +description: "Discover how collider physics skills power data science careers and research software engineering; gain interview prep tactics, mentorship tips, and career growth." +topics: +- machine learning +- data science +- MLOps +- software engineering +- career transition +- academia +intro: "How do you move from collider physics to industry data science while keeping rigorous research software engineering practices, succeeding in interviews, and giving or getting effective mentorship? In this episode Anastasia Karavdina — a particle physicist turned data scientist who worked on Large Hadron Collider experiments and later built AI solutions at Blue Yonder and Kaufland e-commerce — walks through that journey.

    We start with collider basics (particle acceleration, detector imaging, event volumes, and roles in large collaborations) to show the data scale and statistical thinking that map to industry. Anastasia explains dual hardware-and-analysis roles, how multivariate analysis translates to machine learning, and concrete research software engineering practices like version control and CI/CD. She also covers interview prep (technical fit, behavioral stories, cultural fit in Germany) and evolving hiring expectations, plus supply chain AI use cases. Finally, she discusses mentoring — how she started, structuring mentorship, paid vs. free options, and platforms like MentorCruise.

    Listen to learn practical steps for translating physics expertise into data science careers, applying RSE workflows, preparing interview narratives, and finding mentorship to accelerate your next move." dateadded: 2024-12-17 duration: PT01H01M22S quotableClips: diff --git a/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md index fbf2e5ef..edc5a5d2 100644 --- a/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md +++ b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md @@ -1,6 +1,6 @@ --- -title: 'Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook' -short: From Digital Marketing to Analytics Engineering +title: "Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook" +short: "From Digital Marketing to Analytics Engineering" season: 11 episode: 7 guests: diff --git a/_podcast/from-marketing-to-product-owner-in-search.md b/_podcast/from-marketing-to-product-owner-in-search.md index fbf2e5ef..edc5a5d2 100644 --- a/_podcast/from-marketing-to-product-owner-in-search.md +++ b/_podcast/from-marketing-to-product-owner-in-search.md @@ -1,6 +1,6 @@ --- -title: 'Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook' -short: From Digital Marketing to Analytics Engineering +title: "Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook" +short: "From Digital Marketing to Analytics Engineering" season: 11 episode: 7 guests: diff --git a/_podcast/from-math-graduate-to-data-analytics.md b/_podcast/from-math-graduate-to-data-analytics.md index 0a3f1eee..11d1ee55 100644 --- a/_podcast/from-math-graduate-to-data-analytics.md +++ b/_podcast/from-math-graduate-to-data-analytics.md @@ -1,6 +1,6 @@ --- -title: 'How to Break into Data Analytics: Networking, Portfolio, SQL & Interview Prep' -short: From Math Teacher to Analytics Engineer +title: "How to Break into Data Analytics: Networking, Portfolio, SQL & Interview Prep" +short: "From Math Teacher to Analytics Engineer" season: 7 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/153XI6DvtNWHYzSAv2UTqw youtube: https://www.youtube.com/watch?v=qh6-HDhw2xY -description: 'Discover data analytics: build a portfolio, master SQL & networking, interview prep, cold outreach and project READMEs to land job offers faster.' -intro: 'How do you actually break into data analytics — and what combination of networking, portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan Pablo Murillo, an AI and data professional now at Google with prior roles as an Amazon Business Intelligence Engineer and data scientist at T-Mobile, walks through a practical path from math grad to analytics roles.

    We cover the full playbook: where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present projects for hiring managers. Juan addresses bootcamp trade-offs, networking wins from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, finding contract or pro bono work, and resume/STAR interview prep. He also discusses role realities for BI and analytics engineering and employer branding to build credibility.

    Listen for actionable steps and specific tactics—how to structure three portfolio projects, message templates for outreach, and interview preparation tips—to help you break into data analytics, improve SQL interview readiness, and turn public work into job opportunities.' +description: "Discover data analytics: build a portfolio, master SQL & networking, interview prep, cold outreach and project READMEs to land job offers faster." +intro: "How do you actually break into data analytics — and what combination of networking, portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan Pablo Murillo, an AI and data professional now at Google with prior roles as an Amazon Business Intelligence Engineer and data scientist at T-Mobile, walks through a practical path from math grad to analytics roles.

    We cover the full playbook: where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present projects for hiring managers. Juan addresses bootcamp trade-offs, networking wins from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, finding contract or pro bono work, and resume/STAR interview prep. He also discusses role realities for BI and analytics engineering and employer branding to build credibility.

    Listen for actionable steps and specific tactics—how to structure three portfolio projects, message templates for outreach, and interview preparation tips—to help you break into data analytics, improve SQL interview readiness, and turn public work into job opportunities." topics: - career transition - data analytics diff --git a/_podcast/from-physics-to-computer-vision-career-transition.md b/_podcast/from-physics-to-computer-vision-career-transition.md index b6cb54ba..03d24358 100644 --- a/_podcast/from-physics-to-computer-vision-career-transition.md +++ b/_podcast/from-physics-to-computer-vision-career-transition.md @@ -1,6 +1,6 @@ --- -title: 'Switch to Computer Vision & Deep Learning: Roadmap, Kaggle Projects, Mentors & Interview Prep' -short: From Physics to Machine Learning +title: "Switch to Computer Vision & Deep Learning: Roadmap, Kaggle Projects, Mentors & Interview Prep" +short: "From Physics to Machine Learning" season: 3 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4Kk7xXfD5t2VHnLDHpdW1y apple: https://podcasts.apple.com/us/podcast/from-physics-to-machine-learning-tatiana-gabruseva/id1541710331?i=1000521740775 -description: 'Master computer vision & deep learning with a clear roadmap: Kaggle projects, mentorship strategies and interview prep to land roles and build deployed models.' -intro: How do you switch into computer vision and deep learning from a non-industry background — and build a portfolio that lands interviews? In this episode, Tatiana Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master now working as a Senior ML Engineer at Cork University Hospital, maps a practical career-change roadmap. Drawing on her move from a physics PhD during maternity leave, Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), hands-on projects (Kaggle competitions, internships, Omdena-style collaborations, end-to-end pet projects with data collection, labeling, deployment and Docker), and where to start Kaggle with minimal Python.

    You’ll hear tactical advice on mentorship — finding and nurturing long-term mentors — plus networking, team building for competitions and papers, and overcoming impostor syndrome with mock interviews and LeetCode practice. She also shares prioritization strategies (Pareto, outsourcing), mental rehearsal techniques, boundary setting, and self-care to avoid burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, and connect with the data science community to accelerate a switch into computer vision and deep learning +description: "Master computer vision & deep learning with a clear roadmap: Kaggle projects, mentorship strategies and interview prep to land roles and build deployed models." +intro: "How do you switch into computer vision and deep learning from a non-industry background — and build a portfolio that lands interviews? In this episode, Tatiana Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master now working as a Senior ML Engineer at Cork University Hospital, maps a practical career-change roadmap. Drawing on her move from a physics PhD during maternity leave, Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), hands-on projects (Kaggle competitions, internships, Omdena-style collaborations, end-to-end pet projects with data collection, labeling, deployment and Docker), and where to start Kaggle with minimal Python.

    You’ll hear tactical advice on mentorship — finding and nurturing long-term mentors — plus networking, team building for competitions and papers, and overcoming impostor syndrome with mock interviews and LeetCode practice. She also shares prioritization strategies (Pareto, outsourcing), mental rehearsal techniques, boundary setting, and self-care to avoid burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, and connect with the data science community to accelerate a switch into computer vision and deep learning" topics: - career transition - physics diff --git a/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md index c54e31fb..d8ee6be4 100644 --- a/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md +++ b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md @@ -1,7 +1,6 @@ --- -title: 'From Radio Astronomy to Applied ML: MEERKAT Data Pipelines, Multi-Wavelength - Cross-Matching & Production-Grade ML Systems' -short: From Astronomy to Applied ML +title: "From Radio Astronomy to Applied ML: MEERKAT Data Pipelines, Multi-Wavelength Cross-Matching & Production-Grade ML Systems" +short: "From Astronomy to Applied ML" season: 21 episode: 5 guests: @@ -15,23 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/from-astronomy-to-applied-ml-daniel-egbo/id1541710331?i=1000728601772 spotify: https://open.spotify.com/episode/0hV7d1zSKO7ykGDZxjXyJ8 youtube: https://www.youtube.com/watch?v=b92gwrsVQtg -description: 'Discover MEERKAT radio astronomy pipelines and machine learning: build - production ML, master multi-wavelength cross-match, accelerate discovery.' -intro: How do you transform raw radio astronomy observations into reliable, production-grade - machine learning systems that enable multi-wavelength science? In this episode we - talk with Daniel Egbo — an astrophysicist turned machine learning engineer and AI - ambassador (Arize, Tavily) and PhD candidate at the University of Cape Town — about - bridging radio astronomy and applied ML. Daniel explains the challenges of working - with MEERKAT data pipelines, strategies for multi-wavelength cross-matching, and - the engineering practices needed to take models from research to production.

    - You’ll hear about end-to-end ML and LLM applications with an emphasis on reliability, - practical evaluation, and knowledge-retrieval assistants, plus how data science - techniques apply to astronomy workflows. Whether you’re building pipelines for radio - telescopes, tackling cross-matching across optical and radio catalogs, or aiming - to deploy robust production-grade ML systems, this episode offers concrete perspectives - on data handling, evaluation, and operationalizing models in scientific contexts. - Listen to gain actionable insights for integrating astrophysical datasets with modern - ML tooling and improving model reliability in real-world deployments. +description: "Discover MEERKAT radio astronomy pipelines and machine learning: build production ML, master multi-wavelength cross-match, accelerate discovery." +topics: +- astroinformatics +- MLOps +- LLMs +- data engineering +- machine learning +- academia +- career transition +intro: "How do you transform raw radio astronomy observations into reliable, production-grade machine learning systems that enable multi-wavelength science? In this episode we talk with Daniel Egbo — an astrophysicist turned machine learning engineer and AI ambassador (Arize, Tavily) and PhD candidate at the University of Cape Town — about bridging radio astronomy and applied ML. Daniel explains the challenges of working with MEERKAT data pipelines, strategies for multi-wavelength cross-matching, and the engineering practices needed to take models from research to production.

    You’ll hear about end-to-end ML and LLM applications with an emphasis on reliability, practical evaluation, and knowledge-retrieval assistants, plus how data science techniques apply to astronomy workflows. Whether you’re building pipelines for radio telescopes, tackling cross-matching across optical and radio catalogs, or aiming to deploy robust production-grade ML systems, this episode offers concrete perspectives on data handling, evaluation, and operationalizing models in scientific contexts. Listen to gain actionable insights for integrating astrophysical datasets with modern ML tooling and improving model reliability in real-world deployments." dateadded: 2025-09-30 duration: PT01H04M35S quotableClips: diff --git a/_podcast/from-semiconductor-data-to-applied-machine-learning.md b/_podcast/from-semiconductor-data-to-applied-machine-learning.md index 0cd32820..6c8d50fc 100644 --- a/_podcast/from-semiconductor-data-to-applied-machine-learning.md +++ b/_podcast/from-semiconductor-data-to-applied-machine-learning.md @@ -1,7 +1,6 @@ --- -title: 'From Classical Guitar to Production ML: Nonlinear Career Path Through Semiconductors, - Yield Analytics & Community-Driven Learning' -short: 'From Semiconductors to Machine Learning: A Career in Data and Teaching' +title: "From Classical Guitar to Production ML: Nonlinear Career Path Through Semiconductors, Yield Analytics & Community-Driven Learning" +short: "From Semiconductors to Machine Learning: A Career in Data and Teaching" season: 21 episode: 8 guests: @@ -15,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/from-semiconductors-to-machine-learning-a-career-in/id1541710331?i=1000731197034 spotify: https://open.spotify.com/episode/1znRtNRf5IUYcBblJYH53r youtube: https://www.youtube.com/watch?v=B2tzuUg5uZs -description: Discover a nonlinear path from classical guitar to production ML, semiconductors - & yield analytics. Learn actionable career tactics and community-driven learning. -intro: How do you move from playing classical guitar to applying machine learning - in semiconductor yield analytics? In this episode Dashel Ruiz Perez — a data analyst, - ML engineer, and educator — walks us through a nonlinear career path that spans - nearly a decade at Microchip Technology and now teaching programming and data skills - through ThriveDX. With roles across production, process, yield, and software engineering, - Dashel explains how hands-on production experience informs production analytics - and ML engineering work in semiconductor manufacturing.

    We cover practical - topics including translating manufacturing problems into data science projects, - building models for yield optimization, and the role of software engineering in - production analytics. Dashel also discusses learning pathways — from degrees in - computer science and data analytics at Western Governors University to graduating - from ML Zoomcamp — and how community-driven learning accelerates skill acquisition. -

    Listeners will gain actionable guidance on career transition strategies, - concrete examples of applying machine learning and data analytics in semiconductor - contexts, and resources for growing technical skills through community and formal - training. This episode is useful for engineers, data analysts, and anyone considering - a switch into ML, AI, or semiconductor yield analytics. +description: "Discover a nonlinear path from classical guitar to production ML, semiconductors & yield analytics. Learn actionable career tactics and community-driven learning." +topics: +- machine learning +- MLOps +- data science +- tools +- career transition +intro: "How do you move from playing classical guitar to applying machine learning in semiconductor yield analytics? In this episode Dashel Ruiz Perez — a data analyst, ML engineer, and educator — walks us through a nonlinear career path that spans nearly a decade at Microchip Technology and now teaching programming and data skills through ThriveDX. With roles across production, process, yield, and software engineering, Dashel explains how hands-on production experience informs production analytics and ML engineering work in semiconductor manufacturing.

    We cover practical topics including translating manufacturing problems into data science projects, building models for yield optimization, and the role of software engineering in production analytics. Dashel also discusses learning pathways — from degrees in computer science and data analytics at Western Governors University to graduating from ML Zoomcamp — and how community-driven learning accelerates skill acquisition.

    Listeners will gain actionable guidance on career transition strategies, concrete examples of applying machine learning and data analytics in semiconductor contexts, and resources for growing technical skills through community and formal training. This episode is useful for engineers, data analysts, and anyone considering a switch into ML, AI, or semiconductor yield analytics." dateadded: 2025-10-21 duration: PT01H13M08S quotableClips: diff --git a/_podcast/from-software-engineer-to-machine-learning.md b/_podcast/from-software-engineer-to-machine-learning.md index 1b4c0b93..6dc3109e 100644 --- a/_podcast/from-software-engineer-to-machine-learning.md +++ b/_podcast/from-software-engineer-to-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'From Software Engineering to Machine Learning: 7 Lessons, Tools, MLOps & Project Roadmap' -short: From Software Engineering to Machine Learning +title: "From Software Engineering to Machine Learning: 7 Lessons, Tools, MLOps & Project Roadmap" +short: "From Software Engineering to Machine Learning" season: 4 episode: 1 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/0PHDZPGyXgyDM9HH7QzVdZ apple: https://podcasts.apple.com/us/podcast/from-software-engineering-to-machine-learning-santiago/id1541710331?i=1000526870384 -description: 'Learn practical machine learning for software engineers: 7 lessons, Python tools, MLOps & a project roadmap to build, deploy and monitor real ML systems.' -intro: 'How do you move from software engineering into practical machine learning without getting stuck on theory or math? In this episode, Santiago Valdarrama — Director of Computer Vision and a computer scientist with two decades of software experience — walks through a pragmatic roadmap for software engineers transitioning to machine learning.

    We cover seven practical lessons for getting started (start projects, think long-term, teach and join communities, build real projects, prioritize coding, analyze problems first, and favor pragmatism), core ML tooling (Python, NumPy, Pandas, Matplotlib, scikit-learn), and recommended learning resources (Google ML Crash Course, Kaggle, Deep Learning with Python, Hands-On Machine Learning). Santiago compares problem-based vs top-down learning, outlines a course roadmap for engineers, and explains ML engineering skills: data pipelines, modeling, deployment, monitoring, plus MLOps fundamentals like APIs, Docker, and cloud providers.

    Listen to gain an actionable project roadmap, tools checklist, and concrete strategies to conquer math anxiety and ship ML systems — practical guidance for engineers who want to build, deploy, and maintain real machine learning solutions.' +description: "Learn practical machine learning for software engineers: 7 lessons, Python tools, MLOps & a project roadmap to build, deploy and monitor real ML systems." +topics: +- machine learning +- MLOps +- software engineering +- career transition +- tools +intro: "How do you move from software engineering into practical machine learning without getting stuck on theory or math? In this episode, Santiago Valdarrama — Director of Computer Vision and a computer scientist with two decades of software experience — walks through a pragmatic roadmap for software engineers transitioning to machine learning.

    We cover seven practical lessons for getting started (start projects, think long-term, teach and join communities, build real projects, prioritize coding, analyze problems first, and favor pragmatism), core ML tooling (Python, NumPy, Pandas, Matplotlib, scikit-learn), and recommended learning resources (Google ML Crash Course, Kaggle, Deep Learning with Python, Hands-On Machine Learning). Santiago compares problem-based vs top-down learning, outlines a course roadmap for engineers, and explains ML engineering skills: data pipelines, modeling, deployment, monitoring, plus MLOps fundamentals like APIs, Docker, and cloud providers.

    Listen to gain an actionable project roadmap, tools checklist, and concrete strategies to conquer math anxiety and ship ML systems — practical guidance for engineers who want to build, deploy, and maintain real machine learning solutions." dateadded: 2021-06-25 duration: PT00H59M24S diff --git a/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md index 99466af1..a9f4534a 100644 --- a/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md +++ b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md @@ -1,6 +1,6 @@ --- -title: 'How to Become a Data Engineer: Skills, MLOps, Pipelines, SQL, CI/CD & Cloud' -short: From Data Science to Data Engineering +title: "How to Become a Data Engineer: Skills, MLOps, Pipelines, SQL, CI/CD & Cloud" +short: "From Data Science to Data Engineering" season: 7 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4R9F5B4f8vf5r5yQEmwYiu youtube: https://www.youtube.com/watch?v=3TTu-hYzxeg -description: 'Master data engineering, MLOps and pipelines: learn CI/CD, cloud cost control and SQL/Python skills to switch careers and accelerate growth now.' -intro: In this episode, Ellen König—Head of Engineering at alcemy—shares her journey from software and data science to data engineering leadership. She explains why many professionals make the switch, the skills that matter most (from DevOps and CI/CD to collaboration), and how to prepare through side projects and software fundamentals.

    Ellen also breaks down key tools like Git, Docker, and Airflow, discusses the realities of cloud costs and team structures, and offers practical advice for anyone planning a transition into data engineering +description: "Master data engineering, MLOps and pipelines: learn CI/CD, cloud cost control and SQL/Python skills to switch careers and accelerate growth now." +intro: "In this episode, Ellen König—Head of Engineering at alcemy—shares her journey from software and data science to data engineering leadership. She explains why many professionals make the switch, the skills that matter most (from DevOps and CI/CD to collaboration), and how to prepare through side projects and software fundamentals.

    Ellen also breaks down key tools like Git, Docker, and Airflow, discusses the realities of cloud costs and team structures, and offers practical advice for anyone planning a transition into data engineering" topics: - data science - data engineering diff --git a/_podcast/from-software-engineering-to-leading-data-science-teams.md b/_podcast/from-software-engineering-to-leading-data-science-teams.md index e75550ad..f2967128 100644 --- a/_podcast/from-software-engineering-to-leading-data-science-teams.md +++ b/_podcast/from-software-engineering-to-leading-data-science-teams.md @@ -1,6 +1,6 @@ --- -title: 'Transitioning from Software Engineer to Data Science Manager: Search, ML & Leadership' -short: From Software Engineer to Data Science Manager +title: "Transitioning from Software Engineer to Data Science Manager: Search, ML & Leadership" +short: "From Software Engineer to Data Science Manager" season: 12 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3vOUwe4WtNQFXHRgTcyMtg?si=87o3XW_EQZ-n68nhAeV8Xw youtube: https://www.youtube.com/watch?v=xyTfqIWeKf8 -description: 'Learn to transition into a Data Science Manager: master search engineering, machine learning and leadership to hire, scale teams and measure business impact.' -intro: How do you move from hands-on software engineering into leading data science teams while staying effective on search and machine learning projects? In this episode Sadat Anwar — a people-centric Data Science Manager and former software engineer fluent in Java, Scala and Python — maps his path from an electronics and informatics background to research in computer vision at Fraunhofer and production search work at OLX.

    We cover practical search engineering topics (Solr autoscaling, decoupling search from a monolith, Kotlin services with Python ML satellites), early ML projects and experimentation strategies (master’s thesis on neural nets, 20% time wins, “act before you think”), and engineering safety nets like feature flags, backups and monitoring. Sadat also walks through the promotion/hiring process, documenting leadership evidence, people management challenges (conflict resolution, hiring, motivation loss when stepping away from code), and transitioning into data science management with NLP, trust & safety and fraud detection responsibilities.

    Listen for concrete advice on measuring managerial impact, leveraging EM experience to lead data science teams, and tactical steps for engineers aiming to become data science managers in search and ML domains +description: "Learn to transition into a Data Science Manager: master search engineering, machine learning and leadership to hire, scale teams and measure business impact." +intro: "How do you move from hands-on software engineering into leading data science teams while staying effective on search and machine learning projects? In this episode Sadat Anwar — a people-centric Data Science Manager and former software engineer fluent in Java, Scala and Python — maps his path from an electronics and informatics background to research in computer vision at Fraunhofer and production search work at OLX.

    We cover practical search engineering topics (Solr autoscaling, decoupling search from a monolith, Kotlin services with Python ML satellites), early ML projects and experimentation strategies (master’s thesis on neural nets, 20% time wins, “act before you think”), and engineering safety nets like feature flags, backups and monitoring. Sadat also walks through the promotion/hiring process, documenting leadership evidence, people management challenges (conflict resolution, hiring, motivation loss when stepping away from code), and transitioning into data science management with NLP, trust & safety and fraud detection responsibilities.

    Listen for concrete advice on measuring managerial impact, leveraging EM experience to lead data science teams, and tactical steps for engineers aiming to become data science managers in search and ML domains" topics: - career transition - software engineering @@ -138,7 +138,7 @@ quotableClips: startOffset: 3516 url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3516 endOffset: 3620 -- name: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It"' +- name: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It" startOffset: 3620 url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3620 endOffset: 3672 @@ -1500,7 +1500,7 @@ transcript: sec: 3584 time: '59:44' who: Sadat -- header: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It"' +- header: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It" - line: When is the next Search Meetup meeting? sec: 3620 time: '1:00:20' diff --git a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md index 88c94674..ee534e9e 100644 --- a/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md +++ b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md @@ -1,7 +1,6 @@ --- -title: 'From Software Engineer to VP of Machine Learning: Stakeholder Buy-In, Rapid - POCs and Full-Stack Skills' -short: The Unwritten Rules for Success in Machine Learning +title: "From Software Engineer to VP of Machine Learning: Stakeholder Buy-In, Rapid POCs and Full-Stack Skills" +short: "The Unwritten Rules for Success in Machine Learning" season: 16 episode: 6 guests: @@ -15,22 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/the-unwritten-rules-for-success-in-machine-learning/id1541710331?i=1000635206953 spotify: https://open.spotify.com/episode/2c8E0hZ02osih7ljEB6I6f?si=lSPp07r4TgmpGQey0cUjsA youtube: https://www.youtube.com/watch?v=su2M058m3Lw -description: Discover how to win stakeholder buy-in, build rapid POCs and scale machine - learning with full-stack skills—accelerate to VP-level impact and leadership. -intro: How do you move from a hands-on software engineer to a VP of Machine Learning - while getting stakeholders to say “yes,” delivering rapid POCs, and building the - full-stack skills teams need? In this episode Jack Blandin walks through that transition. - Jack began as a Software Engineer in 2015, shifted into Data Science and Machine - Learning in 2017, and has held ML and leadership roles at Fi, Wayfair, Trunk Club, - and GoHealth—managing teams of 2 to 15. He’s now VP of Data Science & Machine Learning - at Fi, finishing a PhD focused on ML, reinforcement learning, and algorithmic fairness, - and launching a hiring marketplace for data and ML professionals.

    We dig - into practical strategies for stakeholder buy-in, how to scope and run rapid proofs - of concept that prove value, and which full-stack skills accelerate career growth - in ML and data science leadership. Listeners will come away with actionable approaches - to design fast, business-focused POCs, communicate technical tradeoffs to non-technical - stakeholders, and level up skill sets that bridge engineering and product — useful - for anyone aiming to scale into ML management or improve ML engineering outcomes. +description: "Discover how to win stakeholder buy-in, build rapid POCs and scale machine learning with full-stack skills—accelerate to VP-level impact and leadership." +topics: +- machine learning +- MLOps +- tools +- career transition +- leadership +- software engineering +intro: "How do you move from a hands-on software engineer to a VP of Machine Learning while getting stakeholders to say “yes,” delivering rapid POCs, and building the full-stack skills teams need? In this episode Jack Blandin walks through that transition. Jack began as a Software Engineer in 2015, shifted into Data Science and Machine Learning in 2017, and has held ML and leadership roles at Fi, Wayfair, Trunk Club, and GoHealth—managing teams of 2 to 15. He’s now VP of Data Science & Machine Learning at Fi, finishing a PhD focused on ML, reinforcement learning, and algorithmic fairness, and launching a hiring marketplace for data and ML professionals.

    We dig into practical strategies for stakeholder buy-in, how to scope and run rapid proofs of concept that prove value, and which full-stack skills accelerate career growth in ML and data science leadership. Listeners will come away with actionable approaches to design fast, business-focused POCs, communicate technical tradeoffs to non-technical stakeholders, and level up skill sets that bridge engineering and product — useful for anyone aiming to scale into ML management or improve ML engineering outcomes." dateadded: 2023-11-20 duration: PT00H53M23S quotableClips: diff --git a/_podcast/from-startup-engineering-to-freelance-data-science.md b/_podcast/from-startup-engineering-to-freelance-data-science.md index 65c5de1b..49ef2f10 100644 --- a/_podcast/from-startup-engineering-to-freelance-data-science.md +++ b/_podcast/from-startup-engineering-to-freelance-data-science.md @@ -1,6 +1,6 @@ --- -title: 'Freelance Data Scientist Playbook: MLOps, Model Monitoring, Upwork & Startup Skills' -short: Lessons Learned from Freelancing and Working in a Start-up +title: "Freelance Data Scientist Playbook: MLOps, Model Monitoring, Upwork & Startup Skills" +short: "Lessons Learned from Freelancing and Working in a Start-up" season: 14 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4ehGduC0p734UtwPr5HANq?si=rEC_XP-4RSKYh0TtSQBtlw youtube: https://www.youtube.com/watch?v=-Gj7SaI-QW4 -description: 'Discover MLOps, model monitoring & Upwork tips to build a freelance data scientist portfolio: pricing, onboarding, tools to land clients including MLflow' -intro: How do you transition from startup engineering to a sustainable freelance data science practice while handling MLOps, model monitoring, and client work on Upwork? In this episode, Antonis Stellas — a freelance data scientist at Nanometrisis with a background in applied mathematics, physics and a professional doctorate working on industry consultancy — lays out a practical playbook.

    Antonis walks through startup-honed skills (cross-functional roles, lean build-measure-learn, communication and business know-how), concrete MLOps tooling and patterns (MLflow, Prefect, Grafana), and model monitoring essentials like data drift, concept drift and using Evidently AI. He shares a hands-on course project (semiconductor prediction), streaming examples (YouTube metrics into BigQuery/Looker), Kafka/Confluent emphasis, and an open-source Evidently how-to from Hacktoberfest.

    For freelancers, Antonis details starting on Upwork — profile building, proposal iteration, pricing strategy, onboarding workflows, invoicing and balancing startup commitments. Listen to get actionable guidance on building a portfolio, selecting projects, monitoring production models, and practical steps to find and retain clients as a freelance data scientist +description: "Discover MLOps, model monitoring & Upwork tips to build a freelance data scientist portfolio: pricing, onboarding, tools to land clients including MLflow" +intro: "How do you transition from startup engineering to a sustainable freelance data science practice while handling MLOps, model monitoring, and client work on Upwork? In this episode, Antonis Stellas — a freelance data scientist at Nanometrisis with a background in applied mathematics, physics and a professional doctorate working on industry consultancy — lays out a practical playbook.

    Antonis walks through startup-honed skills (cross-functional roles, lean build-measure-learn, communication and business know-how), concrete MLOps tooling and patterns (MLflow, Prefect, Grafana), and model monitoring essentials like data drift, concept drift and using Evidently AI. He shares a hands-on course project (semiconductor prediction), streaming examples (YouTube metrics into BigQuery/Looker), Kafka/Confluent emphasis, and an open-source Evidently how-to from Hacktoberfest.

    For freelancers, Antonis details starting on Upwork — profile building, proposal iteration, pricing strategy, onboarding workflows, invoicing and balancing startup commitments. Listen to get actionable guidance on building a portfolio, selecting projects, monitoring production models, and practical steps to find and retain clients as a freelance data scientist" topics: - freelance - startups diff --git a/_podcast/generative-ai-chatbots-in-production-security.md b/_podcast/generative-ai-chatbots-in-production-security.md index 8dfd70c2..d385a196 100644 --- a/_podcast/generative-ai-chatbots-in-production-security.md +++ b/_podcast/generative-ai-chatbots-in-production-security.md @@ -1,7 +1,6 @@ --- -title: 'Hardening Generative AI Chatbots: Prevent Prompt Injection, Data Exfiltration - & Hallucinations' -short: 'AI in Industry: Trust, Return on Investment and Future' +title: "Hardening Generative AI Chatbots: Prevent Prompt Injection, Data Exfiltration & Hallucinations" +short: "AI in Industry: Trust, Return on Investment and Future" season: 19 episode: 6 guests: @@ -15,23 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/ai-in-industry-trust-return-on-investment-and-future/id1541710331?i=1000679505962 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=a99463e34ffb48f1 youtube: https://www.youtube.com/watch?v=bT7-HRNCltk -description: Learn to harden generative AI chatbots against prompt injection and data - exfiltration—defenses, detection, and techniques to reduce hallucinations. -intro: How do you harden generative AI chatbots against prompt injection, data exfiltration, - and dangerous hallucinations? In this episode Maria Sukhareva — a principal key - expert in AI at Siemens with 15+ years working at the intersection of linguistics - and computational AI — walks through real-world risks, attack findings, and practical - defenses for chatbot security.

    We trace Maria’s path from linguist to industry - expert and her role advising on technology and risk, then dive into a large-scale - chatbot hacking exercise and the common failures that lead to legal exposure and - financial incidents. Key topics include prompt injection and knowledge-base exfiltration - techniques, hallucination causes and their impact on trust, and mitigations such - as output validation, query analysis, layered defenses, and the use of non-LLM classifiers. - We also cover human-in-the-loop workflows, AI-as-assistant moderation tools, and - prompt customization for controlled machine translation.

    If you’re building - or deploying generative AI systems, this episode offers practical, production-focused - guidance on chatbot security, AI safety, and improving accuracy and trust in deployed - models. +description: "Learn to harden generative AI chatbots against prompt injection and data exfiltration—defenses, detection, and techniques to reduce hallucinations." +topics: +- AI +- LLMs +- NLP +- MLOps +- production +- AI red teaming +- security +intro: "How do you harden generative AI chatbots against prompt injection, data exfiltration, and dangerous hallucinations? In this episode Maria Sukhareva — a principal key expert in AI at Siemens with 15+ years working at the intersection of linguistics and computational AI — walks through real-world risks, attack findings, and practical defenses for chatbot security.

    We trace Maria’s path from linguist to industry expert and her role advising on technology and risk, then dive into a large-scale chatbot hacking exercise and the common failures that lead to legal exposure and financial incidents. Key topics include prompt injection and knowledge-base exfiltration techniques, hallucination causes and their impact on trust, and mitigations such as output validation, query analysis, layered defenses, and the use of non-LLM classifiers. We also cover human-in-the-loop workflows, AI-as-assistant moderation tools, and prompt customization for controlled machine translation.

    If you’re building or deploying generative AI systems, this episode offers practical, production-focused guidance on chatbot security, AI safety, and improving accuracy and trust in deployed models." dateadded: 2024-12-17 duration: PT00H59M53S quotableClips: @@ -48,7 +40,7 @@ quotableClips: startOffset: 251 url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=251 endOffset: 342 -- name: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts"' +- name: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts" startOffset: 342 url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=342 endOffset: 568 @@ -197,7 +189,7 @@ transcript: sec: 328 time: '5:28' who: Alexey -- header: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts"' +- header: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts" - line: Exactly. AI has become very accessible. Previously, being an AI expert required coding skills. Now, simply knowing how to use a keyboard and craft prompts can position someone as a "prompt engineer" or even an AI expert. diff --git a/_podcast/get-data-analytics-and-data-engineering-job.md b/_podcast/get-data-analytics-and-data-engineering-job.md index e0ab0de5..57ed4edf 100644 --- a/_podcast/get-data-analytics-and-data-engineering-job.md +++ b/_podcast/get-data-analytics-and-data-engineering-job.md @@ -1,6 +1,6 @@ --- -title: 'How I Landed a Data Engineering Job: Bootcamp, Docker, Airflow, AWS & Interview Tips' -short: From Academia to Data Analytics and Engineering +title: "How I Landed a Data Engineering Job: Bootcamp, Docker, Airflow, AWS & Interview Tips" +short: "From Academia to Data Analytics and Engineering" season: 8 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1kDpXugcmDdVJ6qUAiNnHQ?si=aa62cc4dce5f41b2 youtube: https://www.youtube.com/watch?v=0wANfIvum4U -description: Learn data engineering with Docker and Airflow—bootcamp ROI, interview tips, portfolio tips and salary tactics to land a data engineering role -intro: How do you go from neuroscience research to a data engineering role — and what practical steps and skills actually get you hired? In this episode, Gloria Quiceno, Senior Analytics Engineer at ICE, walks through her transition from neuroscience labs to rebuilding enterprise data platforms (including a BI rebuild that saved €250K), and the concrete tools and tactics that landed her a data engineering job. We cover her early lab automation and scripting experience, learning MATLAB/R and Python, a first industry role as a business data analyst, and a four-month job-search timeline from bootcamp graduation to offer. Gloria explains building reproducible pipelines with Docker, orchestrating workflows with Airflow and AWS (including Step Functions), designing ETL/ELT and Snowflake integrations, capstone projects like a Twitter data pipeline, and volunteer ML practice with Omdena. She also shares application strategy (tracking ~130 applications), handling live coding and take-home interviews, GDPR recruitment rights, salary negotiation, and portfolio advice to stand out. Listen to get actionable interview tips, portfolio project ideas, and hands-on guidance for landing a data engineering job using bootcamps, Docker, Airflow, and AWS +description: "Learn data engineering with Docker and Airflow—bootcamp ROI, interview tips, portfolio tips and salary tactics to land a data engineering role" +intro: "How do you go from neuroscience research to a data engineering role — and what practical steps and skills actually get you hired? In this episode, Gloria Quiceno, Senior Analytics Engineer at ICE, walks through her transition from neuroscience labs to rebuilding enterprise data platforms (including a BI rebuild that saved €250K), and the concrete tools and tactics that landed her a data engineering job. We cover her early lab automation and scripting experience, learning MATLAB/R and Python, a first industry role as a business data analyst, and a four-month job-search timeline from bootcamp graduation to offer. Gloria explains building reproducible pipelines with Docker, orchestrating workflows with Airflow and AWS (including Step Functions), designing ETL/ELT and Snowflake integrations, capstone projects like a Twitter data pipeline, and volunteer ML practice with Omdena. She also shares application strategy (tracking ~130 applications), handling live coding and take-home interviews, GDPR recruitment rights, salary negotiation, and portfolio advice to stand out. Listen to get actionable interview tips, portfolio project ideas, and hands-on guidance for landing a data engineering job using bootcamps, Docker, Airflow, and AWS" topics: - career switch - data engineering diff --git a/_podcast/get-data-engineering-job-prep-and-interview.md b/_podcast/get-data-engineering-job-prep-and-interview.md index 6ea2e9be..0acd043f 100644 --- a/_podcast/get-data-engineering-job-prep-and-interview.md +++ b/_podcast/get-data-engineering-job-prep-and-interview.md @@ -1,6 +1,6 @@ --- -title: 'Data Engineering Job Prep & Interview Guide: Python, SQL, Portfolio & Job Search Tips' -short: Getting a Data Engineering Job (Summary and Q&A) +title: "Data Engineering Job Prep & Interview Guide: Python, SQL, Portfolio & Job Search Tips" +short: "Getting a Data Engineering Job (Summary and Q&A)" season: 9 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1SaZ0QXAIhcdH1gfaNoN4Z?si=OvvNFdTpSu2MCCDOWdYgJQ youtube: https://www.youtube.com/watch?v=asnt7xlyZXQ -description: 'Master data engineering job prep: Python, SQL tips, portfolio strategy, interview formats and job search tactics to land offers faster. quick wins' -intro: 'How do you actually get a data engineering job today — and which skills hiring teams care about most? In this episode, Jeff Katz, a Machine Learning Engineer at AppFolio and longtime instructor/founder of Jigsaw Labs and Flatiron School curriculum lead, distills a webinar on hiring demand into practical advice for job seekers. Drawing on applied AI and data engineering experience plus open-source contributions, Jeff walks through the core data engineering skills employers expect: deep Python and SQL, Docker, Airflow, and data warehouse fundamentals.

    You’ll hear concrete guidance on portfolio strategy (personal projects and open source), code quality and OOP patterns, the application funnel (LinkedIn → resume → interviews), behavioral and technical interview formats (SQL LeetCode, Python problems, take-home projects), and essential database concepts (views, OLTP vs OLAP). The episode also covers learning resources, transitioning from BI, certification vs skills trade-offs, remote work realities, and how to leverage non-coding experience. Listen to learn a practical roadmap for interviews, portfolio building, and job search tactics to increase your chances of landing a data engineering role.' +description: "Master data engineering job prep: Python, SQL tips, portfolio strategy, interview formats and job search tactics to land offers faster. quick wins" +intro: "How do you actually get a data engineering job today — and which skills hiring teams care about most? In this episode, Jeff Katz, a Machine Learning Engineer at AppFolio and longtime instructor/founder of Jigsaw Labs and Flatiron School curriculum lead, distills a webinar on hiring demand into practical advice for job seekers. Drawing on applied AI and data engineering experience plus open-source contributions, Jeff walks through the core data engineering skills employers expect: deep Python and SQL, Docker, Airflow, and data warehouse fundamentals.

    You’ll hear concrete guidance on portfolio strategy (personal projects and open source), code quality and OOP patterns, the application funnel (LinkedIn → resume → interviews), behavioral and technical interview formats (SQL LeetCode, Python problems, take-home projects), and essential database concepts (views, OLTP vs OLAP). The episode also covers learning resources, transitioning from BI, certification vs skills trade-offs, remote work realities, and how to leverage non-coding experience. Listen to learn a practical roadmap for interviews, portfolio building, and job search tactics to increase your chances of landing a data engineering role." topics: - data engineering - job search diff --git a/_podcast/get-data-scientist-job.md b/_podcast/get-data-scientist-job.md index 8762cb00..d925d39b 100644 --- a/_podcast/get-data-scientist-job.md +++ b/_podcast/get-data-scientist-job.md @@ -1,6 +1,6 @@ --- -title: 'Land Data Scientist Roles: Resumes, Portfolios, Interviews & Recruiter Workflow' -short: Standing out as a Data Scientist +title: "Land Data Scientist Roles: Resumes, Portfolios, Interviews & Recruiter Workflow" +short: "Standing out as a Data Scientist" season: 1 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2Yxay9HJmd6dvk34MHJ0K2 apple: https://podcasts.apple.com/us/podcast/standing-out-as-a-data-scientist-luke-whipps/id1541710331?i=1000502844994 -description: Master data scientist resumes, portfolios & interviews—insider recruiter workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster -intro: How do you actually land a data scientist role — from a resume that passes screening to a portfolio that wins interviews and an offer that closes? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast with 8+ years recruiting experience, walks through the recruiter workflow and practical steps data scientists can use to improve hiring outcomes.

    We cover Luke’s six-stage recruitment process (role definition to close), how to define data scientist roles across companies, and recruiter expectations for CV design, information hierarchy, and industry/use-case alignment. Learn how to structure portfolios to link tech stack to concrete projects, craft a clear career narrative that demonstrates business impact, and prepare for interviews and negotiations. Junior candidates will get guidance on choosing an industry and showing purpose; academics learn how to productize research for industry. You’ll also hear tactical advice on tailored applications, LinkedIn outreach, candidate funnel sizes, salary signals, job-title alignment, and acceptable tenure patterns.

    Listen to gain actionable tips for resumes, portfolios, interviews, and working effectively with recruiters to increase your chances of landing a data scientist role +description: "Master data scientist resumes, portfolios & interviews—insider recruiter workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster" +intro: "How do you actually land a data scientist role — from a resume that passes screening to a portfolio that wins interviews and an offer that closes? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast with 8+ years recruiting experience, walks through the recruiter workflow and practical steps data scientists can use to improve hiring outcomes.

    We cover Luke’s six-stage recruitment process (role definition to close), how to define data scientist roles across companies, and recruiter expectations for CV design, information hierarchy, and industry/use-case alignment. Learn how to structure portfolios to link tech stack to concrete projects, craft a clear career narrative that demonstrates business impact, and prepare for interviews and negotiations. Junior candidates will get guidance on choosing an industry and showing purpose; academics learn how to productize research for industry. You’ll also hear tactical advice on tailored applications, LinkedIn outreach, candidate funnel sizes, salary signals, job-title alignment, and acceptable tenure patterns.

    Listen to gain actionable tips for resumes, portfolios, interviews, and working effectively with recruiters to increase your chances of landing a data scientist role" topics: - data science - career growth diff --git a/_podcast/get-junior-data-job-and-transferable-skills.md b/_podcast/get-junior-data-job-and-transferable-skills.md index fd2bdf0e..b659c223 100644 --- a/_podcast/get-junior-data-job-and-transferable-skills.md +++ b/_podcast/get-junior-data-job-and-transferable-skills.md @@ -1,6 +1,6 @@ --- -title: 'Land Junior Data Jobs: CVs, Interviews, Transferable Skills & Overcome Imposter Syndrome' -short: Career Coaching +title: "Land Junior Data Jobs: CVs, Interviews, Transferable Skills & Overcome Imposter Syndrome" +short: "Career Coaching" season: 7 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3jMRuqU3ZEcSeoizuOU5q1 youtube: https://www.youtube.com/watch?v=_U8GrYJvmJM -description: 'Master landing junior data jobs: craft achievement-based CVs, highlight transferable skills, ace interviews and beat imposter syndrome with coach tips.' -intro: Struggling to land a junior data job—how do you turn non-linear experience into a recruiter-ready CV, prepare for interviews, and push past imposter syndrome? In this episode, Lindsay McQuade, a transformational coach with 20+ years across management consulting, higher education and tech and former Senior Career & Development Coach at SPICED Academy, guides listeners through practical steps for junior data roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED training rated 94% “very good/excellent”) to explain CV writing for data roles, achievement-based resumes, interview prep and negotiation. Topics include reframing past experience into evidence, identifying transferable skills for data analyst/scientist/engineer roles, tailoring applications by industry, the ikigai framework for career focus, and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, objective feedback strategies, and structured learning and T-shaped skills to build confidence. Tune in for clear job search strategy (balanced volume and targeted applications), how to choose a career coach, and practical LinkedIn networking tips to convert applications into interviews +description: "Master landing junior data jobs: craft achievement-based CVs, highlight transferable skills, ace interviews and beat imposter syndrome with coach tips." +intro: "Struggling to land a junior data job—how do you turn non-linear experience into a recruiter-ready CV, prepare for interviews, and push past imposter syndrome? In this episode, Lindsay McQuade, a transformational coach with 20+ years across management consulting, higher education and tech and former Senior Career & Development Coach at SPICED Academy, guides listeners through practical steps for junior data roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED training rated 94% “very good/excellent”) to explain CV writing for data roles, achievement-based resumes, interview prep and negotiation. Topics include reframing past experience into evidence, identifying transferable skills for data analyst/scientist/engineer roles, tailoring applications by industry, the ikigai framework for career focus, and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, objective feedback strategies, and structured learning and T-shaped skills to build confidence. Tune in for clear job search strategy (balanced volume and targeted applications), how to choose a career coach, and practical LinkedIn networking tips to convert applications into interviews" topics: - career growth dateadded: 2022-02-12 diff --git a/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md index 0b56a430..3652709a 100644 --- a/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md +++ b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0sm5qB1Cj4EJlbQ2giLtHR youtube: https://www.youtube.com/watch?v=i1NHRroQClQ -description: Learn hiring & management tactics for data science in B2B SaaS — hiring, onboarding, mentorship, and career growth strategies to build scalable analytics teams -intro: 'How do you hire, manage, and grow a high-impact data science team inside a B2B SaaS company? In this episode, Katie Bauer — Head of Data at GlossGenius and former data leader at Twitter and Reddit — walks through practical career frameworks and team-building strategies for product analysts, analytics engineers, marketing scientists, and data scientists. Katie traces her own trajectory from linguistics to analytics and explains what “craft” looks like in analytics: maintainability, documentation, and peer review. She breaks down IC versus management paths, how to mentor juniors through project-based learning, and when to buy versus build entry-level talent. You’ll hear concrete hiring and interview approaches for managers, how to assess strategy through case studies and trade-offs, and tips to help new hires succeed in their first month via proactive communication and async support channels. For leaders, Katie covers prioritization, raising data literacy, and fostering a data-driven culture. Listen to get actionable guidance on hiring data scientists, onboarding newcomers, developing senior talent, and scaling data teams in B2B SaaS.' +description: "Learn hiring & management tactics for data science in B2B SaaS — hiring, onboarding, mentorship, and career growth strategies to build scalable analytics teams" +intro: "How do you hire, manage, and grow a high-impact data science team inside a B2B SaaS company? In this episode, Katie Bauer — Head of Data at GlossGenius and former data leader at Twitter and Reddit — walks through practical career frameworks and team-building strategies for product analysts, analytics engineers, marketing scientists, and data scientists. Katie traces her own trajectory from linguistics to analytics and explains what “craft” looks like in analytics: maintainability, documentation, and peer review. She breaks down IC versus management paths, how to mentor juniors through project-based learning, and when to buy versus build entry-level talent. You’ll hear concrete hiring and interview approaches for managers, how to assess strategy through case studies and trade-offs, and tips to help new hires succeed in their first month via proactive communication and async support channels. For leaders, Katie covers prioritization, raising data literacy, and fostering a data-driven culture. Listen to get actionable guidance on hiring data scientists, onboarding newcomers, developing senior talent, and scaling data teams in B2B SaaS." topics: - data science - career development diff --git a/_podcast/hiring-data-scientists-and-analysts.md b/_podcast/hiring-data-scientists-and-analysts.md index 653c45a4..cc3ebe89 100644 --- a/_podcast/hiring-data-scientists-and-analysts.md +++ b/_podcast/hiring-data-scientists-and-analysts.md @@ -1,6 +1,6 @@ --- -title: 'Hiring Data Scientists & Analysts: Talent Pipelines, Job Specs, CV Screening & Salary Tips' -short: Recruiting Data Professionals +title: "Hiring Data Scientists & Analysts: Talent Pipelines, Job Specs, CV Screening & Salary Tips" +short: "Recruiting Data Professionals" season: 7 episode: 2 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/4LFZX7IfpdYkQ6si4ed0OR apple: https://podcasts.apple.com/us/podcast/recruiting-data-professionals-alicja-notowska/id1541710331?i=1000549307220 -description: 'Master hiring for data scientist & data analyst roles: craft job descriptions, build talent pipelines, screen CVs, negotiate salaries and land top hires.' -intro: How do you consistently find and hire the right data scientists and analysts in a competitive market? In this episode, Alicja Notowska — a talent acquisition specialist with 10+ years recruiting at Google, Zalando and now with embedded agency WeAreKeen — breaks down practical recruiting tactics for hiring data scientists and data analysts.

    We cover the full interview funnel and end-to-end recruiting responsibilities, crafting job specs that emphasise problems over perks, and using inclusive language plus AI tools to attract diverse candidates. Alicja explains sourcing channels (LinkedIn, GitHub, conferences, academia), building talent pipelines and 360° recruitment, and keyword strategies for CV screening. You’ll hear guidance on education signals (BSc/MSc/PhD), CV best practices (clear responsibilities, dates, avoid buzzwords), recruiter screening interviews, and evaluating portfolio projects and online course work.

    The episode also tackles salary conversations — bands, transparency and negotiation — managing hiring manager expectations, pathways for career changers, and offer etiquette. Listen to gain actionable tactics for job specs, CV screening, sourcing and salary negotiation when hiring data talent +description: "Master hiring for data scientist & data analyst roles: craft job descriptions, build talent pipelines, screen CVs, negotiate salaries and land top hires." +topics: +- data science +- data analytics +- job search +- career transition +- hiring +intro: "How do you consistently find and hire the right data scientists and analysts in a competitive market? In this episode, Alicja Notowska — a talent acquisition specialist with 10+ years recruiting at Google, Zalando and now with embedded agency WeAreKeen — breaks down practical recruiting tactics for hiring data scientists and data analysts.

    We cover the full interview funnel and end-to-end recruiting responsibilities, crafting job specs that emphasise problems over perks, and using inclusive language plus AI tools to attract diverse candidates. Alicja explains sourcing channels (LinkedIn, GitHub, conferences, academia), building talent pipelines and 360° recruitment, and keyword strategies for CV screening. You’ll hear guidance on education signals (BSc/MSc/PhD), CV best practices (clear responsibilities, dates, avoid buzzwords), recruiter screening interviews, and evaluating portfolio projects and online course work.

    The episode also tackles salary conversations — bands, transparency and negotiation — managing hiring manager expectations, pathways for career changers, and offer etiquette. Listen to gain actionable tactics for job specs, CV screening, sourcing and salary negotiation when hiring data talent" dateadded: 2022-01-29 duration: PT01H04M55S diff --git a/_podcast/hiring-for-data-engineering-jobs-in-europe.md b/_podcast/hiring-for-data-engineering-jobs-in-europe.md index 49819543..d0729702 100644 --- a/_podcast/hiring-for-data-engineering-jobs-in-europe.md +++ b/_podcast/hiring-for-data-engineering-jobs-in-europe.md @@ -1,6 +1,6 @@ --- -title: 'Hiring Data Engineers in Europe: Nicolas Rassam on Interviews, Skills & Career Switches' -short: Recruiting Data Engineers +title: "Hiring Data Engineers in Europe: Nicolas Rassam on Interviews, Skills & Career Switches" +short: "Recruiting Data Engineers" season: 8 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5ldkzYiHFvJCKoEyfAlvDs?si=WFJzcZ7fRCi1dzwapNGfzA youtube: https://www.youtube.com/watch?v=hylxiu4VGTo -description: 'Learn hiring strategies for data engineering in Europe: interview prep, resume tips (SQL/Python), career-switch paths and cloud fundamentals to win roles.' -intro: How do you hire data engineers in Europe today — and what should candidates and hiring managers actually focus on during interviews? In this episode, Nicolas Rassam, a Senior Talent Acquisition Partner at Helsing with 10+ years scaling AI and engineering teams at Onfido and Criteo, walks through the practical realities of hiring data engineers across Europe's competitive, borderless market.

    We cover why data engineering matters now, differences in European hiring footprints, and the rising demand for modern tooling. Nicolas breaks down common hiring challenges — title ambiguity, experience mismatches, and recruiter technical literacy — and explains how to evaluate transferable experience from software and BI roles. You'll get concrete guidance on level expectations (junior → senior), typical interview processes and assessments, resume essentials (SQL, Python, problem solving, outcomes), cloud fundamentals, when infrastructure/DevOps skills matter, portfolio/GitHub storytelling, and strategies for career switchers (internships, targeted projects). The episode also addresses hiring without degrees, industry fit for regulated data, and how targeted applications beat spray-and-pray. Listen to learn what to prepare for interviews, how to position projects, and what hiring teams really look for when recruiting data engineering talent in Europe +description: "Learn hiring strategies for data engineering in Europe: interview prep, resume tips (SQL/Python), career-switch paths and cloud fundamentals to win roles." +intro: "How do you hire data engineers in Europe today — and what should candidates and hiring managers actually focus on during interviews? In this episode, Nicolas Rassam, a Senior Talent Acquisition Partner at Helsing with 10+ years scaling AI and engineering teams at Onfido and Criteo, walks through the practical realities of hiring data engineers across Europe's competitive, borderless market.

    We cover why data engineering matters now, differences in European hiring footprints, and the rising demand for modern tooling. Nicolas breaks down common hiring challenges — title ambiguity, experience mismatches, and recruiter technical literacy — and explains how to evaluate transferable experience from software and BI roles. You'll get concrete guidance on level expectations (junior → senior), typical interview processes and assessments, resume essentials (SQL, Python, problem solving, outcomes), cloud fundamentals, when infrastructure/DevOps skills matter, portfolio/GitHub storytelling, and strategies for career switchers (internships, targeted projects). The episode also addresses hiring without degrees, industry fit for regulated data, and how targeted applications beat spray-and-pray. Listen to learn what to prepare for interviews, how to position projects, and what hiring teams really look for when recruiting data engineering talent in Europe" topics: - data engineering - career transition diff --git a/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md index 06c6f476..5a6e6598 100644 --- a/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md +++ b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md @@ -1,6 +1,6 @@ --- -title: 'How to Hire Data Scientists: Interview Questions, MLOps, AutoML Limits & Inclusive Hiring' -short: Hiring Data Science Talent +title: "How to Hire Data Scientists: Interview Questions, MLOps, AutoML Limits & Inclusive Hiring" +short: "Hiring Data Science Talent" season: 9 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7ddvA9zNTip5Bt6EYnMNty?si=4fee84a6ad43465d youtube: https://www.youtube.com/watch?v=Af9t9r2b0z0 -description: 'Learn to hire data scientists: interview questions, MLOps insights and inclusive hiring tactics to assess technical depth, AutoML limits and build better teams.' -intro: 'How do you hire the right data scientists today—balancing algorithmic depth, MLOps skills, and inclusive hiring practices? In this episode, Olga Ivina, Delivery Data Science Director at Microsoft and former Deloitte consultant with a Ph.D. and 16+ years in AI, walks through practical strategies for recruiting strong data science talent.

    Olga draws on her journey from applied mathematics and air pollution research to leading delivery teams to explain core hiring criteria: technical excellence, growth mindset, communication, and humility. We cover concrete interview questions and diagnostic problems that reveal algorithmic understanding and assumptions, how to structure coding and analytical tasks, and ways to assess role fit between mathematical expertise and engineering skills. The conversation also addresses the rise of MLOps, realistic limits of AutoML and the human-in-the-loop, career path trade-offs, and interviewing candidates with employment gaps.

    If you’re hiring data scientists or building interview processes, this episode delivers actionable frameworks, sample diagnostic questions, and inclusive hiring tips—language to avoid in job posts and strategies to attract diverse candidates—so you can evaluate both technical depth and practical delivery capability.' +description: "Learn to hire data scientists: interview questions, MLOps insights and inclusive hiring tactics to assess technical depth, AutoML limits and build better teams." +intro: "How do you hire the right data scientists today—balancing algorithmic depth, MLOps skills, and inclusive hiring practices? In this episode, Olga Ivina, Delivery Data Science Director at Microsoft and former Deloitte consultant with a Ph.D. and 16+ years in AI, walks through practical strategies for recruiting strong data science talent.

    Olga draws on her journey from applied mathematics and air pollution research to leading delivery teams to explain core hiring criteria: technical excellence, growth mindset, communication, and humility. We cover concrete interview questions and diagnostic problems that reveal algorithmic understanding and assumptions, how to structure coding and analytical tasks, and ways to assess role fit between mathematical expertise and engineering skills. The conversation also addresses the rise of MLOps, realistic limits of AutoML and the human-in-the-loop, career path trade-offs, and interviewing candidates with employment gaps.

    If you’re hiring data scientists or building interview processes, this episode delivers actionable frameworks, sample diagnostic questions, and inclusive hiring tips—language to avoid in job posts and strategies to attract diverse candidates—so you can evaluate both technical depth and practical delivery capability." topics: - data science - career growth diff --git a/_podcast/how-to-break-into-data-science.md b/_podcast/how-to-break-into-data-science.md index aa63439f..f9a36de9 100644 --- a/_podcast/how-to-break-into-data-science.md +++ b/_podcast/how-to-break-into-data-science.md @@ -1,6 +1,6 @@ --- -title: 'Data Science Career Playbook: Job Hunt, Portfolios, DALL·E 2 & Overcoming FOMO' -short: Data Scientists at Work +title: "Data Science Career Playbook: Job Hunt, Portfolios, DALL·E 2 & Overcoming FOMO" +short: "Data Scientists at Work" season: 9 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1RSUsWDOBDD4sNDruEbnEY youtube: https://www.youtube.com/watch?v=oUycqtMoYr8 -description: 'Master data science job hunt and portfolio tactics: actionable projects, recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster.' -intro: How do you actually break into data science, build a portfolio that gets interviews, and stay sane while every new AI model vies for your attention? In this episode Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI (founder of “So you want to be a data scientist?”) — walks through a practical career playbook for job hunting, portfolio building, and coping with FOMO and imposter syndrome.

    We cover Mısra’s career path from big data engineering to developer advocacy, what a data scientist’s day-to-day looks like, and the typical deliverables hiring managers expect (models, pipelines, reports, presentations). She explains role variants (consultant, in-house, freelance), tradeoffs between generalist and specialist tracks, and when a master’s or PhD matters. You’ll get concrete job-hunt tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why real-world datasets (like NYC Open Data) matter. The episode also includes a clear, high-level overview of DALL·E 2 and diffusion models, plus strategies for staying current (conferences vs social media) and knowing when a new framework is “good enough.”

    Listen to learn actionable steps to refine your portfolio, present data science value to stakeholders, and manage FOMO while advancing your career +description: "Master data science job hunt and portfolio tactics: actionable projects, recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster." +intro: "How do you actually break into data science, build a portfolio that gets interviews, and stay sane while every new AI model vies for your attention? In this episode Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI (founder of “So you want to be a data scientist?”) — walks through a practical career playbook for job hunting, portfolio building, and coping with FOMO and imposter syndrome.

    We cover Mısra’s career path from big data engineering to developer advocacy, what a data scientist’s day-to-day looks like, and the typical deliverables hiring managers expect (models, pipelines, reports, presentations). She explains role variants (consultant, in-house, freelance), tradeoffs between generalist and specialist tracks, and when a master’s or PhD matters. You’ll get concrete job-hunt tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why real-world datasets (like NYC Open Data) matter. The episode also includes a clear, high-level overview of DALL·E 2 and diffusion models, plus strategies for staying current (conferences vs social media) and knowing when a new framework is “good enough.”

    Listen to learn actionable steps to refine your portfolio, present data science value to stakeholders, and manage FOMO while advancing your career" topics: - data science - career growth @@ -78,7 +78,7 @@ quotableClips: startOffset: 2131 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2131 endOffset: 2412 -- name: 'Learning a New Framework: Knowing When It''s "Good Enough"' +- name: 'Learning a New Framework: Knowing When It''s "Good Enough" startOffset: 2412 url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2412 endOffset: 2567 @@ -893,7 +893,7 @@ transcript: sec: 2301 time: '38:21' who: Misra -- header: 'Learning a New Framework: Knowing When It''s "Good Enough"' +- header: 'Learning a New Framework: Knowing When It''s "Good Enough" - line: Let's say there is a new framework. You think this framework is useful so decide to pick it up and learn it a little bit better. We don't want to learn it perfectly – we know that this is not going to be a great way of spending our diff --git a/_podcast/how-to-grow-your-ml-engineering-career.md b/_podcast/how-to-grow-your-ml-engineering-career.md index 04db88fc..ffc43d49 100644 --- a/_podcast/how-to-grow-your-ml-engineering-career.md +++ b/_podcast/how-to-grow-your-ml-engineering-career.md @@ -1,6 +1,6 @@ --- -title: 'How to Grow Your ML Engineering Career: Platform Work, LLM Workflows & Debugging Skills' -short: How to Grow Your ML Engineering Career +title: "How to Grow Your ML Engineering Career: Platform Work, LLM Workflows & Debugging Skills" +short: "How to Grow Your ML Engineering Career" season: 12 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1mDlJi7vfLeJgIZStQ4G90?si=Spd04VwmSh2zZCgZzLIPbA youtube: https://www.youtube.com/watch?v=cUxZBXQgZaU -description: Discover career transitions into ML, prompt engineering and LLMs—practical debugging tips, transferable skills, hiring insights, and real platform lessons -intro: How do you move from web and game development into building machine learning platforms and working with LLMs—and what practical skills carry over? In this episode Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, gaming and online retail, and currently an ML Platform engineer and internal consultant at Zalando, answers that question through concrete examples and career lessons.

    We trace Krzysztof’s path from HTML5, Objective-C, Swift and Unity to Python, ML platform work (the zflow library and pipeline architecture), and hands-on experiments with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions between stacks and roles, platform consulting—training, onboarding and user support—prompt engineering tips, debugging strategies (rubber ducking, divide-and-conquer), and a real Postgres optimization troubleshooting case. He also discusses transferable skills like SQL, Git and shell, T-shaped expertise, hiring dynamics, and how to get unstuck with ChatGPT and problem decomposition.

    Listen to gain practical guidance on ML platforms, prompt engineering, debugging techniques, and career strategy for transitioning into ML and LLM work—plus actionable resources and prioritization tactics you can apply immediately +description: "Discover career transitions into ML, prompt engineering and LLMs—practical debugging tips, transferable skills, hiring insights, and real platform lessons" +intro: "How do you move from web and game development into building machine learning platforms and working with LLMs—and what practical skills carry over? In this episode Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, gaming and online retail, and currently an ML Platform engineer and internal consultant at Zalando, answers that question through concrete examples and career lessons.

    We trace Krzysztof’s path from HTML5, Objective-C, Swift and Unity to Python, ML platform work (the zflow library and pipeline architecture), and hands-on experiments with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions between stacks and roles, platform consulting—training, onboarding and user support—prompt engineering tips, debugging strategies (rubber ducking, divide-and-conquer), and a real Postgres optimization troubleshooting case. He also discusses transferable skills like SQL, Git and shell, T-shaped expertise, hiring dynamics, and how to get unstuck with ChatGPT and problem decomposition.

    Listen to gain practical guidance on ML platforms, prompt engineering, debugging techniques, and career strategy for transitioning into ML and LLM work—plus actionable resources and prioritization tactics you can apply immediately" topics: - machine learning - career transitions diff --git a/_podcast/how-to-stand-out-in-data-science.md b/_podcast/how-to-stand-out-in-data-science.md index 62b0a53d..58d56e51 100644 --- a/_podcast/how-to-stand-out-in-data-science.md +++ b/_podcast/how-to-stand-out-in-data-science.md @@ -1,6 +1,6 @@ --- -title: 'Data Science Career Playbook: Build Unique IoT Portfolios, Explainable AI, OSINT & LinkedIn Growth' -short: Hacking Your Data Career +title: "Data Science Career Playbook: Build Unique IoT Portfolios, Explainable AI, OSINT & LinkedIn Growth" +short: "Hacking Your Data Career" season: 8 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6oJsS0vhvAQasLNv3IklQ6 youtube: https://www.youtube.com/watch?v=RhSg8ill1So -description: 'Discover proven strategies to stand out in data science: build unique portfolio projects, master proactive task selection, and grow visibility with expert LinkedIn tactics.' -intro: In this episode, Marijn Markus—AI Lead and Managing Data Scientist at Capgemini—shares how to stand out in data science by combining curiosity, courage, and creativity. From his unconventional background in sociology and criminology, Marijn explains how diverse teams outperform homogeneous ones, why proactive problem-solving matters, and how to challenge hierarchy with data-driven insights.

    You'll learn how to build unique portfolio projects (like time series modeling from a coffee machine), apply OSINT concepts to modern analytics, and grow your visibility through a thoughtful LinkedIn strategy +description: "Discover proven strategies to stand out in data science: build unique portfolio projects, master proactive task selection, and grow visibility with expert LinkedIn tactics." +intro: "In this episode, Marijn Markus—AI Lead and Managing Data Scientist at Capgemini—shares how to stand out in data science by combining curiosity, courage, and creativity. From his unconventional background in sociology and criminology, Marijn explains how diverse teams outperform homogeneous ones, why proactive problem-solving matters, and how to challenge hierarchy with data-driven insights.

    You'll learn how to build unique portfolio projects (like time series modeling from a coffee machine), apply OSINT concepts to modern analytics, and grow your visibility through a thoughtful LinkedIn strategy" topics: - data science - career growth diff --git a/_podcast/how-to-switch-to-ml-tech-without-experience.md b/_podcast/how-to-switch-to-ml-tech-without-experience.md index 52cad159..f8f04b5b 100644 --- a/_podcast/how-to-switch-to-ml-tech-without-experience.md +++ b/_podcast/how-to-switch-to-ml-tech-without-experience.md @@ -1,6 +1,6 @@ --- -title: 'How to Switch to Tech: Community Meetups, Open Source Fellowships & Landing an Ecosia Internship' -short: From Roasting Coffee to Backend Development +title: "How to Switch to Tech: Community Meetups, Open Source Fellowships & Landing an Ecosia Internship" +short: "From Roasting Coffee to Backend Development" season: 8 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3AnUc03nLbIYS6ichWIrRE?si=momJMlwdTpKFkI0FYQilag youtube: https://www.youtube.com/watch?v=BKqmNdxsBko -description: 'Discover practical career switch tips: meetups, open source fellowship & landing an Ecosia internship—networking, study paths, funding, mentorship to get hired.' -intro: How do you switch to tech from a completely different career and actually land an internship at a mission-driven company? In this episode, Jessica Greene — Senior Machine Learning Engineer at Ecosia and co-organizer of PyLadies Berlin — walks through her journey from film and coffee roasting to machine learning, sharing concrete steps for a career change to tech. We cover the learning path Jessica used (Codecademy, Andrew Ng, FreeCodeCamp), funding and study time via Germany’s Bildungsgutschein, and hands-on experience through an open source fellowship (Rails Girls Summer of Code) and pair programming. You’ll hear how community meetups, PyLadies, and networking translated into an Ecosia internship, what interviewers notice (inquisitiveness, creating roles), and practical tips for building system skills (terminal, dual-boot Linux), overcoming imposter syndrome, and getting started with public speaking and event organizing. If you’re considering a switch to tech, this episode offers realistic guidance on open source fellowships, meetups, study resources, and interview strategies to help you build skills, confidence, and professional connections +description: "Discover practical career switch tips: meetups, open source fellowship & landing an Ecosia internship—networking, study paths, funding, mentorship to get hired." +intro: "How do you switch to tech from a completely different career and actually land an internship at a mission-driven company? In this episode, Jessica Greene — Senior Machine Learning Engineer at Ecosia and co-organizer of PyLadies Berlin — walks through her journey from film and coffee roasting to machine learning, sharing concrete steps for a career change to tech. We cover the learning path Jessica used (Codecademy, Andrew Ng, FreeCodeCamp), funding and study time via Germany’s Bildungsgutschein, and hands-on experience through an open source fellowship (Rails Girls Summer of Code) and pair programming. You’ll hear how community meetups, PyLadies, and networking translated into an Ecosia internship, what interviewers notice (inquisitiveness, creating roles), and practical tips for building system skills (terminal, dual-boot Linux), overcoming imposter syndrome, and getting started with public speaking and event organizing. If you’re considering a switch to tech, this episode offers realistic guidance on open source fellowships, meetups, study resources, and interview strategies to help you build skills, confidence, and professional connections" topics: - career switch - machine learning diff --git a/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md index 22e79f76..a3f7fd7e 100644 --- a/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md +++ b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md @@ -1,6 +1,6 @@ --- -title: 'Transition from QA to Machine Learning & Data Engineering: Projects, Cloud & Interview Prep' -short: From Testing Phones to Managing NLP Projects +title: "Transition from QA to Machine Learning & Data Engineering: Projects, Cloud & Interview Prep" +short: "From Testing Phones to Managing NLP Projects" season: 11 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1LMg70fGthIR2jF4JdmFkb?si=BmEfOtfgSEOpKvp5ENRA2g youtube: https://www.youtube.com/watch?v=-xumbiXOlA8 -description: 'Master the transition to machine learning & data engineering: build cloud-deployed projects, sharpen interview prep, and revamp your CV to land offers.' -intro: 'How do you move from a QA role into machine learning and data engineering—what projects, cloud skills, and interview prep actually make a difference? In this episode Alvaro Navas Peire walks through his journey from testing Android phones and QA checklists to quitting the industry, taking a gap year, and retraining in machine learning and data engineering. With an informatics engineering background and hands-on experience from postgraduate courses, Neuromatch, and DataTalks’ ML & DE Zoomcamps, Alvaro explains the structured learning path he followed and the portfolio projects (EDA, vegetable image classification, NLP) that proved useful for hiring teams.

    We cover practical topics: cloud deployment on Google Cloud, AWS and Azure; using cloud credits and Databricks; how to present projects without underselling them; technical note-taking and GitHub visibility; and role-play for interview soft skills. Alvaro also contrasts research-heavy ML with tooling-focused data engineering and shares CV, portfolio, and negotiation tips. Tune in if you’re planning a transition to machine learning or data engineering and need concrete guidance on projects, cloud experience, and interview preparation.' +description: "Master the transition to machine learning & data engineering: build cloud-deployed projects, sharpen interview prep, and revamp your CV to land offers." +intro: "How do you move from a QA role into machine learning and data engineering—what projects, cloud skills, and interview prep actually make a difference? In this episode Alvaro Navas Peire walks through his journey from testing Android phones and QA checklists to quitting the industry, taking a gap year, and retraining in machine learning and data engineering. With an informatics engineering background and hands-on experience from postgraduate courses, Neuromatch, and DataTalks’ ML & DE Zoomcamps, Alvaro explains the structured learning path he followed and the portfolio projects (EDA, vegetable image classification, NLP) that proved useful for hiring teams.

    We cover practical topics: cloud deployment on Google Cloud, AWS and Azure; using cloud credits and Databricks; how to present projects without underselling them; technical note-taking and GitHub visibility; and role-play for interview soft skills. Alvaro also contrasts research-heavy ML with tooling-focused data engineering and shares CV, portfolio, and negotiation tips. Tune in if you’re planning a transition to machine learning or data engineering and need concrete guidance on projects, cloud experience, and interview preparation." topics: - QA - machine learning diff --git a/_podcast/hugging-face-contributions-and-nlp-portfolio.md b/_podcast/hugging-face-contributions-and-nlp-portfolio.md index 4f33ff67..be08054b 100644 --- a/_podcast/hugging-face-contributions-and-nlp-portfolio.md +++ b/_podcast/hugging-face-contributions-and-nlp-portfolio.md @@ -1,6 +1,6 @@ --- -title: 'Contribute to Hugging Face & Build an NLP Portfolio: Open Source, Datasets, Spaces' -short: Developer Advocacy Engineer for Open-Source +title: "Contribute to Hugging Face & Build an NLP Portfolio: Open Source, Datasets, Spaces" +short: "Developer Advocacy Engineer for Open-Source" season: 9 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5k60LWIwnMpvaIbTaryRv4?si=liHqmXVYT-uB1PO4uB65OQ youtube: https://www.youtube.com/watch?v=SnEYvF-Ztb8 -description: 'Build an NLP portfolio on Hugging Face: contribute to open source, publish datasets, deploy Spaces demos, gain PR skills and boost hiring odds.' -intro: 'How do you go from beginner projects to contributing to Hugging Face and building an visible NLP portfolio? In this episode, Merve Noyan — Google Developer Expert in Machine Learning, grad student in Data Science, and NLP-focused ML engineer — walks through practical steps for contributing to open source, datasets, and Hugging Face Spaces.

    We cover Merve’s transition into NLP, finding open source via contribution sprints and good-first issues, and the nuts-and-bolts of datasets work: canonical datasets, scripts, and CI. Learn how the Hub, TensorFlow & Keras integrations, and model reproducibility features support a reproducible workflow and model registry concepts. Merve explains creating demo apps with Streamlit or Gradio on Spaces, using the Community tab and forums, and how workshops and sprints build confidence.

    You’ll also get concrete advice on starting contributions while working full-time, non-code contributions, structured programs like Google Summer of Code and Hacktoberfest, handling PR feedback, and what hiring managers look for on GitHub. Tune in to walk away with actionable steps to contribute to Hugging Face, publish datasets and demos, and build an NLP portfolio recruiters can evaluate.' +description: "Build an NLP portfolio on Hugging Face: contribute to open source, publish datasets, deploy Spaces demos, gain PR skills and boost hiring odds." +intro: "How do you go from beginner projects to contributing to Hugging Face and building an visible NLP portfolio? In this episode, Merve Noyan — Google Developer Expert in Machine Learning, grad student in Data Science, and NLP-focused ML engineer — walks through practical steps for contributing to open source, datasets, and Hugging Face Spaces.

    We cover Merve’s transition into NLP, finding open source via contribution sprints and good-first issues, and the nuts-and-bolts of datasets work: canonical datasets, scripts, and CI. Learn how the Hub, TensorFlow & Keras integrations, and model reproducibility features support a reproducible workflow and model registry concepts. Merve explains creating demo apps with Streamlit or Gradio on Spaces, using the Community tab and forums, and how workshops and sprints build confidence.

    You’ll also get concrete advice on starting contributions while working full-time, non-code contributions, structured programs like Google Summer of Code and Hacktoberfest, handling PR feedback, and what hiring managers look for on GitHub. Tune in to walk away with actionable steps to contribute to Hugging Face, publish datasets and demos, and build an NLP portfolio recruiters can evaluate." topics: - machine learning - NLP diff --git a/_podcast/human-centered-ai-automatic-speech-recognition.md b/_podcast/human-centered-ai-automatic-speech-recognition.md index 2abf0ff6..2e2315c1 100644 --- a/_podcast/human-centered-ai-automatic-speech-recognition.md +++ b/_podcast/human-centered-ai-automatic-speech-recognition.md @@ -1,6 +1,6 @@ --- -title: 'Human-Centered Speech Recognition: ASR for Disordered Speech and Accents' -short: Human-Centered AI for Disordered Speech Recognition +title: "Human-Centered Speech Recognition: ASR for Disordered Speech and Accents" +short: "Human-Centered AI for Disordered Speech Recognition" season: 19 episode: 2 guests: @@ -14,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/human-centered-ai-for-disordered-speech-recognition/id1541710331?i=1000671805368 spotify: https://open.spotify.com/show/0pck8zuiXdI0OrCg86DAPy?si=ac857db69d484277 youtube: https://www.youtube.com/watch?v=yTZ4cddD7DU -description: Discover ASR solutions for disordered speech and accents—boost recognition - accuracy, reduce bias, and design accessible human-centered models now. -intro: How can automatic speech recognition (ASR) better serve people with disordered - speech and diverse accents? In this episode Katarzyna Foremniak, a computational - linguist with over 10 years in NLP who has built language models for Audi and Porsche - and teaches at the University of Warsaw, examines human-centered ASR for atypical - and accented speech. We trace her move from linguistics to computational approaches - and cover core phonetics and morpho-syntax concepts that matter for speech recognition. -

    Key topics include distinctions between accents and speech disorders, limitations - of standard ASR datasets, strategies for disordered speech recognition such as specialized - datasets, data augmentation and synthetic variations, multimodal ASR with lip-reading, - and transfer learning for fine-tuning with limited data. We also discuss data collection - challenges (GDPR, clinical data), bilingualism effects, stammering and fluency, - pronunciation issues like Polish consonant clusters, and practical workflows including - Amazon Transcribe plus LLM post-processing. Deployment tradeoffs—model size, on-device - setups, automotive voice use cases—and assistive applications round out the conversation. -

    Listeners interested in speech recognition, disordered speech, accents, - and ethical data practices will gain practical technical strategies and a clearer - view of research and deployment priorities. +description: "Discover ASR solutions for disordered speech and accents—boost recognition accuracy, reduce bias, and design accessible human-centered models now." +topics: +- AI +- NLP +- LLMs +- machine learning +- data governance +intro: "How can automatic speech recognition (ASR) better serve people with disordered speech and diverse accents? In this episode Katarzyna Foremniak, a computational linguist with over 10 years in NLP who has built language models for Audi and Porsche and teaches at the University of Warsaw, examines human-centered ASR for atypical and accented speech. We trace her move from linguistics to computational approaches and cover core phonetics and morpho-syntax concepts that matter for speech recognition.

    Key topics include distinctions between accents and speech disorders, limitations of standard ASR datasets, strategies for disordered speech recognition such as specialized datasets, data augmentation and synthetic variations, multimodal ASR with lip-reading, and transfer learning for fine-tuning with limited data. We also discuss data collection challenges (GDPR, clinical data), bilingualism effects, stammering and fluency, pronunciation issues like Polish consonant clusters, and practical workflows including Amazon Transcribe plus LLM post-processing. Deployment tradeoffs—model size, on-device setups, automotive voice use cases—and assistive applications round out the conversation.

    Listeners interested in speech recognition, disordered speech, accents, and ethical data practices will gain practical technical strategies and a clearer view of research and deployment priorities." dateadded: 2024-10-10 duration: PT00H57M19S quotableClips: diff --git a/_podcast/human-centered-mlops-and-model-monitoring.md b/_podcast/human-centered-mlops-and-model-monitoring.md index 0bd24562..2e5ee422 100644 --- a/_podcast/human-centered-mlops-and-model-monitoring.md +++ b/_podcast/human-centered-mlops-and-model-monitoring.md @@ -1,6 +1,6 @@ --- -title: 'Master Human-Centered MLOps: Stakeholder Buy-In, Monitoring, Debugging & Incident Response' -short: Humans in the Loop +title: "Master Human-Centered MLOps: Stakeholder Buy-In, Monitoring, Debugging & Incident Response" +short: "Humans in the Loop" season: 4 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/23VxmAEkKUs1kjaludRQAR apple: https://podcasts.apple.com/us/podcast/humans-in-the-loop-lina-weichbrodt/id1541710331?i=1000530535704 -description: 'Master human-centered MLOps: actionable stakeholder buy-in tactics, model monitoring and incident response playbooks to debug and ship reliable ML.' -intro: 'How do you make MLOps human-centered so stakeholders actually trust models and teams can monitor, debug, and respond to incidents? In this episode, Lina Weichbrodt — a generalist machine learning developer who prototypes data-driven products end-to-end (design, implementation, A/B tests, operations) — walks through practical MLOps strategies that prioritize people as much as pipelines.

    We cover a project intake checklist (business case, KPIs, alternatives), how to evaluate whether AI is needed, and scoping problems so outcomes are visible in the UI. Lina explains stakeholder engagement techniques (pairing, availability, converting fears into mitigations), demos vs reporting for buy-in, and building trust through domain understanding and data issue support. You’ll get concrete guidance on incident preparedness and ML incident response: service levels, impact assessment, post-mortems, Five Whys root-cause debugging, and turning findings into tickets. We also dive into model monitoring and detection (live test sets, small A/B tests, feature drift, data monitoring), observability practices, explainability vs debugging, and a credit-scoring case study to illustrate prioritization. Listen to learn repeatable, human-centered tactics for stakeholder buy-in, model monitoring, ML debugging, and incident response.' +description: "Master human-centered MLOps: actionable stakeholder buy-in tactics, model monitoring and incident response playbooks to debug and ship reliable ML." +intro: "How do you make MLOps human-centered so stakeholders actually trust models and teams can monitor, debug, and respond to incidents? In this episode, Lina Weichbrodt — a generalist machine learning developer who prototypes data-driven products end-to-end (design, implementation, A/B tests, operations) — walks through practical MLOps strategies that prioritize people as much as pipelines.

    We cover a project intake checklist (business case, KPIs, alternatives), how to evaluate whether AI is needed, and scoping problems so outcomes are visible in the UI. Lina explains stakeholder engagement techniques (pairing, availability, converting fears into mitigations), demos vs reporting for buy-in, and building trust through domain understanding and data issue support. You’ll get concrete guidance on incident preparedness and ML incident response: service levels, impact assessment, post-mortems, Five Whys root-cause debugging, and turning findings into tickets. We also dive into model monitoring and detection (live test sets, small A/B tests, feature drift, data monitoring), observability practices, explainability vs debugging, and a credit-scoring case study to illustrate prioritization. Listen to learn repeatable, human-centered tactics for stakeholder buy-in, model monitoring, ML debugging, and incident response." topics: - MLOps - machine learning diff --git a/_podcast/industrial-data-small-data-production-machine-learning.md b/_podcast/industrial-data-small-data-production-machine-learning.md index 4d754421..88515ff5 100644 --- a/_podcast/industrial-data-small-data-production-machine-learning.md +++ b/_podcast/industrial-data-small-data-production-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Master Industrial Data: Synthetic Tabular Data, Small-Data Modeling, Sensors & MLOps' -short: Navigating Industrial Data Challenges +title: "Master Industrial Data: Synthetic Tabular Data, Small-Data Modeling, Sensors & MLOps" +short: "Navigating Industrial Data Challenges" season: 13 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1o6rtfFydBVoc0ER5ZUiRQ?si=rkgzEFquSfql4Za6cyjX2g youtube: https://www.youtube.com/watch?v=rwuud5wr3J4 -description: 'Master industrial data: learn synthetic tabular data and small-data modeling for sensors & MLOps—optimize QC, predictive maintenance and deploy models faster.' -intro: How do you build reliable machine learning when your datasets are generated by production lines, tiny R&D campaigns, or long-running quality tests instead of millions of web events? In this episode, Rosona Eldred — a mathematician-turned-machine learning engineer leading synthetic tabular data work in an AI Innovation team — walks us through mastering industrial data, from sensors and traceability to small-data modeling and MLOps trade-offs.

    We explore what makes industrial data unique (R&D experiments, pilot plants, full production), concrete process examples like blue-paint scale-up and packing-peanuts manufacturing, and long-term quality tests such as the Florida weathering trial. Rosona breaks down sensor choices, batching and granularity challenges, inline versus destructive quality measurements, and how anomaly detection should feed human decisioning. She also covers regulatory and sustainability tracking, reusing historical experiments for reformulation, proxy metrics, optimization trade-offs, and practical methods for tiny-data problems — statistical techniques, transfer learning, and leveraging domain experts. Finally, she contrasts sparse R&D models with streaming, production-scale MLOps.

    Listen to gain concrete strategies for synthetic tabular data, small-data modeling, sensor-driven monitoring, and when to adopt production MLOps versus lightweight R&D workflows +description: "Master industrial data: learn synthetic tabular data and small-data modeling for sensors & MLOps—optimize QC, predictive maintenance and deploy models faster." +intro: "How do you build reliable machine learning when your datasets are generated by production lines, tiny R&D campaigns, or long-running quality tests instead of millions of web events? In this episode, Rosona Eldred — a mathematician-turned-machine learning engineer leading synthetic tabular data work in an AI Innovation team — walks us through mastering industrial data, from sensors and traceability to small-data modeling and MLOps trade-offs.

    We explore what makes industrial data unique (R&D experiments, pilot plants, full production), concrete process examples like blue-paint scale-up and packing-peanuts manufacturing, and long-term quality tests such as the Florida weathering trial. Rosona breaks down sensor choices, batching and granularity challenges, inline versus destructive quality measurements, and how anomaly detection should feed human decisioning. She also covers regulatory and sustainability tracking, reusing historical experiments for reformulation, proxy metrics, optimization trade-offs, and practical methods for tiny-data problems — statistical techniques, transfer learning, and leveraging domain experts. Finally, she contrasts sparse R&D models with streaming, production-scale MLOps.

    Listen to gain concrete strategies for synthetic tabular data, small-data modeling, sensor-driven monitoring, and when to adopt production MLOps versus lightweight R&D workflows" topics: - industrial data - synthetic tabular data diff --git a/_podcast/interpretable-machine-learning.md b/_podcast/interpretable-machine-learning.md index 5b3f3442..4ba2fea3 100644 --- a/_podcast/interpretable-machine-learning.md +++ b/_podcast/interpretable-machine-learning.md @@ -1,5 +1,5 @@ --- -title: 'Interpretable Machine Learning: SHAP, Conformal Prediction and Model Trust' +title: "Interpretable Machine Learning: SHAP, Conformal Prediction and Model Trust" season: 16 episode: 7 guests: @@ -13,24 +13,16 @@ links: apple: https://podcasts.apple.com/us/podcast/cracking-the-code-machine-learning-made/id1541710331?i=1000636448000 spotify: https://open.spotify.com/episode/3SjDB0E2of9IS9TXn2Fof3?si=FwWH99FGTgmL1OGI3-sLAg youtube: https://www.youtube.com/watch?v=LBuGzyOkx7c -description: 'Discover interpretable machine learning: learn SHAP, Conformal Prediction, - calibrated uncertainty and model trust to debug models and boost reliability.' -intro: How can you reliably trust a machine learning model’s predictions in real-world - settings? In this episode Christoph Molnar — statistician, machine learner, and - author of Interpretable Machine Learning — walks through practical approaches for - building model trust. Drawing on his experience from Kaggle competitions to authoring - a technical book, Christoph explains the trade-offs between interpretability and - accuracy and shows how interpretability techniques help debug models.

    Key - topics include a SHAP deep dive with practical Python examples for attributing predictions, - conformal prediction for calibrated uncertainty and creating prediction sets, and - the difference between explainable AI and interpretable machine learning. He also - discusses using interpretability to debug models, maintain hands-on skills through - competitions, and document experiments for reproducible insights.

    If you - want concrete tools to evaluate model trust—how to quantify uncertainty, interpret - feature effects with SHAP, and produce reliable prediction sets with conformal methods—this - episode offers clear, actionable guidance and directions for further reading. Ideal - for data scientists and ML practitioners focused on interpretable machine learning, - model debugging, and trustworthy AI. +description: "Discover interpretable machine learning: learn SHAP, Conformal Prediction, calibrated uncertainty and model trust to debug models and boost reliability." + +topics: +- machine learning +- data science +- practices +- tools +- career transition +- interpretability +intro: "How can you reliably trust a machine learning model’s predictions in real-world settings? In this episode Christoph Molnar — statistician, machine learner, and author of Interpretable Machine Learning — walks through practical approaches for building model trust. Drawing on his experience from Kaggle competitions to authoring a technical book, Christoph explains the trade-offs between interpretability and accuracy and shows how interpretability techniques help debug models.

    Key topics include a SHAP deep dive with practical Python examples for attributing predictions, conformal prediction for calibrated uncertainty and creating prediction sets, and the difference between explainable AI and interpretable machine learning. He also discusses using interpretability to debug models, maintain hands-on skills through competitions, and document experiments for reproducible insights.

    If you want concrete tools to evaluate model trust—how to quantify uncertainty, interpret feature effects with SHAP, and produce reliable prediction sets with conformal methods—this episode offers clear, actionable guidance and directions for further reading. Ideal for data scientists and ML practitioners focused on interpretable machine learning, model debugging, and trustworthy AI." dateadded: 2023-11-27 duration: PT00H56M20S quotableClips: diff --git a/_podcast/investing-in-open-source-developer-tools.md b/_podcast/investing-in-open-source-developer-tools.md index 662122aa..33e3d400 100644 --- a/_podcast/investing-in-open-source-developer-tools.md +++ b/_podcast/investing-in-open-source-developer-tools.md @@ -1,6 +1,6 @@ --- -title: 'Early-Stage Investing in Open Source Developer Tools: Deal Sourcing, Due Diligence & Commercialization Models' -short: Investing in Open-Source Data Tools +title: "Early-Stage Investing in Open Source Developer Tools: Deal Sourcing, Due Diligence & Commercialization Models" +short: "Investing in Open-Source Data Tools" season: 15 episode: 2 guests: diff --git a/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md index 0af70e1e..84f6e67e 100644 --- a/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md +++ b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md @@ -1,6 +1,6 @@ --- -title: 'Tech Job Search Strategy: Portfolio Projects, Resume Tips and Networking' -short: Accelerating The Job Hunt for The Perfect Job in Tech +title: "Tech Job Search Strategy: Portfolio Projects, Resume Tips and Networking" +short: "Accelerating The Job Hunt for The Perfect Job in Tech" season: 17 episode: 6 guests: @@ -14,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/accelerating-the-job-hunt-for-the-perfect-job-in/id1541710331?i=1000643971899 spotify: https://open.spotify.com/episode/7giHGC86pjtIYrLOvwP7g4?si=NB9w6S6QTfCBHB_n93LkBQ youtube: https://www.youtube.com/watch?v=PchwbIs0tOg -description: 'Learn a four-pillar tech job search: build portfolio projects, sharpen - your resume and network strategically to land ML/data roles faster with outreach - tactics.' -intro: 'How do you turn portfolio projects, a sharper resume, and targeted networking - into a successful tech job search? In this episode Sarah Mestiri — Data Scientist - and Certified Career & Interview Coach with 6+ years in tech across startups, international - firms and financial services (FIS) — walks through a practical job search strategy - for career changers and return-to-work professionals. Sarah outlines a four-pillar - framework (goals, networking, CV, strategy) and shows how to define your ideal role, - choose a specialization (ML engineering, data engineering, MLOps), and validate - skills through projects versus courses. You’ll hear step-by-step advice on building - a top-5 target company list, crafting personalized outreach and informational interview - questions, and creating a weekly networking action plan that leverages weak ties - and referrals. The episode also covers resume tactics — prioritizing projects, skills, - and storytelling — self-research methods, assessments, part-time strategies, and - age or career-change considerations. Listen for actionable takeaways: how to build - portfolio projects that prove impact, write concise outreach messages, and structure - a job search you can maintain — plus recommended resources and follow-up support - (links and Slack) to help you execute.' +description: "Learn a four-pillar tech job search: build portfolio projects, sharpen your resume and network strategically to land ML/data roles faster with outreach tactics." +topics: +- MLOps +- data engineering +- machine learning +- career transition +- job search +intro: "How do you turn portfolio projects, a sharper resume, and targeted networking into a successful tech job search? In this episode Sarah Mestiri — Data Scientist and Certified Career & Interview Coach with 6+ years in tech across startups, international firms and financial services (FIS) — walks through a practical job search strategy for career changers and return-to-work professionals. Sarah outlines a four-pillar framework (goals, networking, CV, strategy) and shows how to define your ideal role, choose a specialization (ML engineering, data engineering, MLOps), and validate skills through projects versus courses. You’ll hear step-by-step advice on building a top-5 target company list, crafting personalized outreach and informational interview questions, and creating a weekly networking action plan that leverages weak ties and referrals. The episode also covers resume tactics — prioritizing projects, skills, and storytelling — self-research methods, assessments, part-time strategies, and age or career-change considerations. Listen for actionable takeaways: how to build portfolio projects that prove impact, write concise outreach messages, and structure a job search you can maintain — plus recommended resources and follow-up support (links and Slack) to help you execute." dateadded: 2024-02-03 duration: PT01H26S quotableClips: diff --git a/_podcast/kaggle-grandmaster-to-production-ml-and-education.md b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md index d0654822..810ca2c8 100644 --- a/_podcast/kaggle-grandmaster-to-production-ml-and-education.md +++ b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md @@ -1,7 +1,6 @@ --- -title: 'From Kaggle Grandmaster to Production ML: Competition Rigor, System Design - & Large-Scale Education' -short: Competitive Machine Learning and Teaching +title: "From Kaggle Grandmaster to Production ML: Competition Rigor, System Design & Large-Scale Education" +short: "Competitive Machine Learning and Teaching" season: 20 episode: 2 guests: @@ -15,24 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/competitive-machine-leaning-and-teaching-alexander/id1541710331?i=1000692309866 spotify: https://open.spotify.com/episode/6xsov9a1US8D8w5xKcjkNm youtube: https://www.youtube.com/watch?v=NfAJAr7FvyY&t -description: Discover Production ML, system design, and competition rigor from a Kaggle - Grandmaster—practical deployment tactics, model scaling tips, and education strategies -intro: How do you take the rigor and creativity that wins Kaggle competitions and - turn it into reliable, maintainable production ML? In this episode we explore that - question with Alexander Guschin — a Machine Learning Engineer with 10+ years of - experience, a Kaggle Grandmaster ranked 5th globally, a leader of DS and SE teams, - contributor to open-source ML tools, and instructor to 100K+ students.

    - Alexander breaks down the differences between competition modeling and production - constraints, practical approaches to system design for machine learning, and lessons - for scaling education and teams around technical content. Key topics include competition - rigor versus maintainability, production ML and model deployment considerations, - designing ML systems at scale, leveraging open-source tooling, and approaches to - teaching complex ML concepts to large audiences.

    Listeners will come away - with actionable perspective on translating research and contest solutions into production-ready - pipelines, questions to ask when designing ML systems, and guidance on building - reproducible workflows and scalable learning programs. Ideal for machine learning - engineers, technical leaders, and educators focused on production ML, MLOps, and - large-scale education. +description: "Discover Production ML, system design, and competition rigor from a Kaggle Grandmaster—practical deployment tactics, model scaling tips, and education strategies." +topics: +- machine learning +- MLOps +- data science +- open-source +- tools +- teaching +- career transition +intro: "How do you take the rigor and creativity that wins Kaggle competitions and turn it into reliable, maintainable production ML? In this episode we explore that question with Alexander Guschin — a Machine Learning Engineer with 10+ years of experience, a Kaggle Grandmaster ranked 5th globally, a leader of DS and SE teams, contributor to open-source ML tools, and instructor to 100K+ students.

    Alexander breaks down the differences between competition modeling and production constraints, practical approaches to system design for machine learning, and lessons for scaling education and teams around technical content. Key topics include competition rigor versus maintainability, production ML and model deployment considerations, designing ML systems at scale, leveraging open-source tooling, and approaches to teaching complex ML concepts to large audiences.

    Listeners will come away with actionable perspective on translating research and contest solutions into production-ready pipelines, questions to ask when designing ML systems, and guidance on building reproducible workflows and scalable learning programs. Ideal for machine learning engineers, technical leaders, and educators focused on production ML, MLOps, and large-scale education." dateadded: 2025-02-26 duration: PT01H05M09S quotableClips: diff --git a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md index 36d24146..1f5aed8f 100644 --- a/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md +++ b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md @@ -1,6 +1,6 @@ --- -title: 'Using Knowledge Graphs & LLMs for Automotive R&D: RAG, Graph ML & Crash Simulation' -short: Knowledge Graphs and LLMs Across Academia and Industry +title: "Using Knowledge Graphs & LLMs for Automotive R&D: RAG, Graph ML & Crash Simulation" +short: "Knowledge Graphs and LLMs Across Academia and Industry" season: 18 episode: 2 guests: @@ -14,25 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/knowledge-graphs-and-llms-across-academia-and/id1541710331?i=1000651561079 spotify: https://open.spotify.com/episode/1yDgx6uNaSQxKTjGU1qtIj?si=g0xQjWmDTRinzxhoYV3sdA youtube: https://www.youtube.com/watch?v=YncdlUscUOo -description: 'Learn Knowledge Graphs, LLMs & RAG for automotive R&D: optimize crash - simulation, apply Graph ML to FEA, reduce hallucination and speed prototyping' -intro: How can knowledge graphs and large language models (LLMs) be combined to accelerate - automotive R&D — from crash simulation insights to reproducible reports? In this - episode Anahita Pakiman, a data scientist-engineer who moved from mechanical engineering - and finite element analysis (FEA) into applied AI and now works as Senior Knowledge - Graph-Data Scientist Consultant at brox IT-Solutions, walks through practical strategies - and tradeoffs.

    We cover FEA vs machine learning, optimization and topology - in crash simulations, and why teams adopt Neo4j for semantic reporting and load-path - detection. Anahita explains graph vs tabular representations, moving from knowledge - graphs to computational graphs with NetworkX, and applying Graph Data Science and - Graph ML techniques like SimRank. She demonstrates grounding LLMs with retrieval-augmented - generation (RAG), contrasts embeddings and vector databases with KG semantics, and - shows Cypher-driven prompt templates. The episode also addresses trust, hallucination - and verification limits of LLM-extracted knowledge, plus the ADPT-LRN-PHYS project - for adaptive paper reading and graph visualization.

    Listen to learn concrete - approaches for combining knowledge graphs, RAG, graph ML and LLMs to improve crash - simulation analysis, semantic reporting, and deployable pipelines for automotive - R&D. +description: "Learn Knowledge Graphs, LLMs & RAG for automotive R&D: optimize crash simulation, apply Graph ML to FEA, reduce hallucination and speed prototyping" +topics: +- LLMs +- knowledge graphs +- graph ML +- retrieval-augmented generation +- embeddings +- vector databases +intro: "How can knowledge graphs and large language models (LLMs) be combined to accelerate automotive R&D — from crash simulation insights to reproducible reports? In this episode Anahita Pakiman, a data scientist-engineer who moved from mechanical engineering and finite element analysis (FEA) into applied AI and now works as Senior Knowledge Graph-Data Scientist Consultant at brox IT-Solutions, walks through practical strategies and tradeoffs.

    We cover FEA vs machine learning, optimization and topology in crash simulations, and why teams adopt Neo4j for semantic reporting and load-path detection. Anahita explains graph vs tabular representations, moving from knowledge graphs to computational graphs with NetworkX, and applying Graph Data Science and Graph ML techniques like SimRank. She demonstrates grounding LLMs with retrieval-augmented generation (RAG), contrasts embeddings and vector databases with KG semantics, and shows Cypher-driven prompt templates. The episode also addresses trust, hallucination and verification limits of LLM-extracted knowledge, plus the ADPT-LRN-PHYS project for adaptive paper reading and graph visualization.

    Listen to learn concrete approaches for combining knowledge graphs, RAG, graph ML and LLMs to improve crash simulation analysis, semantic reporting, and deployable pipelines for automotive R&D." dateadded: 2024-04-07 duration: PT00H59M24S quotableClips: diff --git a/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md index 294d905f..9bb636ff 100644 --- a/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md +++ b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md @@ -1,6 +1,6 @@ --- -title: 'Last-Mile Data Delivery for the Modern Data Stack: Build Data Products to Boost Adoption' -short: Conquering the Last Mile in Data +title: "Last-Mile Data Delivery for the Modern Data Stack: Build Data Products to Boost Adoption" +short: "Conquering the Last Mile in Data" season: 5 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6SGjBev8koFDRpDvLV76ZQ apple: https://podcasts.apple.com/us/podcast/conquering-the-last-mile-in-data-caitlin-moorman/id1541710331?i=1000539421886 -description: Learn last-mile data delivery, build data products for the modern data stack, boost adoption, embed analytics in decisions, and prove measurable ROI -intro: 'How do you turn a powerful modern data stack into analytics people actually use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove Recommerce and former data lead in crowdfunding and self-publishing, walks through the last-mile data delivery challenges that block adoption and offers practical approaches to build data products that drive decisions.

    We define the “last mile” in data delivery and contrast modern data stack capabilities with last-mile execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), treating data as a product, user research to diagnose poor adoption, and simplifying A/B testing reporting for decision-makers. Caitlin outlines a product-design mindset—outcome-first projects, persona-driven abstractions, low-fidelity prototyping, and embedding metrics in meetings—to prove impact and build advocacy. She also covers cultural barriers, measuring hard-to-track work with proxies, scoping narrow slices, recruiting advocates, and using growth marketing as an early use case.

    Listen to learn actionable frameworks and experiments you can use to improve data adoption, design usable data products, and measure tangible wins that create momentum in your organization.' +description: "Learn last-mile data delivery, build data products for the modern data stack, boost adoption, embed analytics in decisions, and prove measurable ROI" +intro: "How do you turn a powerful modern data stack into analytics people actually use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove Recommerce and former data lead in crowdfunding and self-publishing, walks through the last-mile data delivery challenges that block adoption and offers practical approaches to build data products that drive decisions.

    We define the “last mile” in data delivery and contrast modern data stack capabilities with last-mile execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), treating data as a product, user research to diagnose poor adoption, and simplifying A/B testing reporting for decision-makers. Caitlin outlines a product-design mindset—outcome-first projects, persona-driven abstractions, low-fidelity prototyping, and embedding metrics in meetings—to prove impact and build advocacy. She also covers cultural barriers, measuring hard-to-track work with proxies, scoping narrow slices, recruiting advocates, and using growth marketing as an early use case.

    Listen to learn actionable frameworks and experiments you can use to improve data adoption, design usable data products, and measure tangible wins that create momentum in your organization." topics: - data analytics - tools diff --git a/_podcast/launch-and-build-retail-startup.md b/_podcast/launch-and-build-retail-startup.md index 41594fb3..79debfff 100644 --- a/_podcast/launch-and-build-retail-startup.md +++ b/_podcast/launch-and-build-retail-startup.md @@ -1,6 +1,6 @@ --- -title: Build a Grocery Retail OS to Cut Supermarket Food Waste & Scale Your Startup -short: 'Launching a Startup: From Idea to First Hire' +title: "Build a Grocery Retail OS to Cut Supermarket Food Waste & Scale Your Startup" +short: "Launching a Startup: From Idea to First Hire" season: 4 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2zlqwEOamFD8YVGkf4VsFW apple: https://podcasts.apple.com/us/podcast/launching-a-startup-from-idea-to-first-hire-carmine-paolino/id1541710331?i=1000531945076 -description: Build a Grocery Retail OS to cut supermarket food waste, master JIT supply-chain forecasting, land pilots & investors, and scale your startup faster -intro: How do you build a grocery retail OS that actually cuts supermarket food waste while scaling a startup? In this episode, Carmine Paolino — CTO and co-founder of FreshFlow and former programmer/researcher in academia and data science — walks through translating technical expertise into a product that solves fresh-product challenges for retailers.

    We cover FreshFlow’s mission and early problem discovery (including Edeka and Volg pilots), customer discovery techniques like shadowing store teams and The Mom Test, and how their idea evolved from computer vision to an ordering and inventory forecasting platform. Carmine explains Entrepreneur First’s role in co-founder matching and fundraising, pilot timelines and sales cycle realities, and risks around investor selection and board dynamics. He also shares technical lessons (moving off Kubeflow to managed GCP services), hiring priorities, building a product roadmap toward a grocery retail OS, and leveraging just-in-time supply chain and forecasting to reduce food waste.

    Listen to learn practical guidance on pilot programs, prototype-before-pitch validation (banana ripeness demo), co-founder formation, and the operational and technical trade-offs when scaling a startup focused on supermarket food waste reduction +description: "Build a Grocery Retail OS to cut supermarket food waste, master JIT supply-chain forecasting, land pilots & investors, and scale your startup faster" +intro: "How do you build a grocery retail OS that actually cuts supermarket food waste while scaling a startup? In this episode, Carmine Paolino — CTO and co-founder of FreshFlow and former programmer/researcher in academia and data science — walks through translating technical expertise into a product that solves fresh-product challenges for retailers.

    We cover FreshFlow’s mission and early problem discovery (including Edeka and Volg pilots), customer discovery techniques like shadowing store teams and The Mom Test, and how their idea evolved from computer vision to an ordering and inventory forecasting platform. Carmine explains Entrepreneur First’s role in co-founder matching and fundraising, pilot timelines and sales cycle realities, and risks around investor selection and board dynamics. He also shares technical lessons (moving off Kubeflow to managed GCP services), hiring priorities, building a product roadmap toward a grocery retail OS, and leveraging just-in-time supply chain and forecasting to reduce food waste.

    Listen to learn practical guidance on pilot programs, prototype-before-pitch validation (banana ripeness demo), co-founder formation, and the operational and technical trade-offs when scaling a startup focused on supermarket food waste reduction" topics: - startup - founder diff --git a/_podcast/lean-mlops-for-startups.md b/_podcast/lean-mlops-for-startups.md index a89a8049..152960c3 100644 --- a/_podcast/lean-mlops-for-startups.md +++ b/_podcast/lean-mlops-for-startups.md @@ -1,7 +1,6 @@ --- -title: 'Lean MLOps for Startups: SaaS-First MVP Stack, Avoid Vendor Lock-In & Manage - Tech Debt' -short: MLOps in Corporations and Startups +title: "Lean MLOps for Startups: SaaS-First MVP Stack, Avoid Vendor Lock-In & Manage Tech Debt" +short: "MLOps in Corporations and Startups" season: 20 episode: 4 guests: @@ -15,22 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/mlops-in-corporations-and-startups-nemanja-radojkovic/id1541710331?i=1000699195928 spotify: https://open.spotify.com/episode/6V8gkTSz7LuPjQYC4rO019 youtube: https://www.youtube.com/watch?v=DX9c__a4jzg -description: 'Learn lean MLOps for startups: build a SaaS-first MVP stack, avoid vendor - lock-in, and manage tech debt to ship faster, cut costs, and scale safely.' -intro: How can a startup implement Lean MLOps that gets models into production quickly - without incurring vendor lock-in or crushing tech debt? In this episode Nemanja - Radojkovic — an Electrical Engineer turned Data Scientist and MLOps Engineer, former - consultant at Big4 and boutique firms, DataCamp course author, and teacher of Python - and machine learning — walks through practical strategies for building a SaaS-first - MVP stack while preserving future flexibility.

    We dig into the core trade-offs - of a SaaS-first approach for an MVP, patterns to avoid vendor lock-in, and pragmatic - ways to manage accumulating tech debt in machine learning systems. Nemanja draws - on hands-on experience across data science, MLOps, and product environments to explain - how startups can choose tooling, limit integration risk, and plan safe migration - paths as needs change.

    Listeners will come away with concrete considerations - for designing a lean MLOps stack, assessing SaaS versus self-hosted options, and - thinking ahead about maintainability and portability — essential guidance for founders, - ML engineers, and product teams building production-ready ML on a startup timeline. +description: "Learn lean MLOps for startups: build a SaaS-first MVP stack, avoid vendor lock-in, and manage tech debt to ship faster, cut costs, and scale safely." +topics: +- MLOps +- data engineering +- tools +- production +- career transition +- startups + +intro: "How can a startup implement Lean MLOps that gets models into production quickly without incurring vendor lock-in or crushing tech debt? In this episode Nemanja Radojkovic — an Electrical Engineer turned Data Scientist and MLOps Engineer, former consultant at Big4 and boutique firms, DataCamp course author, and teacher of Python and machine learning — walks through practical strategies for building a SaaS-first MVP stack while preserving future flexibility.

    We dig into the core trade-offs of a SaaS-first approach for an MVP, patterns to avoid vendor lock-in, and pragmatic ways to manage accumulating tech debt in machine learning systems. Nemanja draws on hands-on experience across data science, MLOps, and product environments to explain how startups can choose tooling, limit integration risk, and plan safe migration paths as needs change.

    Listeners will come away with concrete considerations for designing a lean MLOps stack, assessing SaaS versus self-hosted options, and thinking ahead about maintainability and portability — essential guidance for founders, ML engineers, and product teams building production-ready ML on a startup timeline." dateadded: 2025-03-15 duration: PT01H01M06S quotableClips: diff --git a/_podcast/learning-machine-learning-self-taught-bioinformatics.md b/_podcast/learning-machine-learning-self-taught-bioinformatics.md index 30fe5135..109fef0e 100644 --- a/_podcast/learning-machine-learning-self-taught-bioinformatics.md +++ b/_podcast/learning-machine-learning-self-taught-bioinformatics.md @@ -1,6 +1,6 @@ --- -title: 'How to Teach Yourself Bioinformatics & ML: Project-First Learning, Resources, and MLOps' -short: Mastering Self-Learning in Machine Learning +title: "How to Teach Yourself Bioinformatics & ML: Project-First Learning, Resources, and MLOps" +short: "Mastering Self-Learning in Machine Learning" season: 13 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2XdKHrmVuytXd5kzLVSbFn?si=ETbkUdT2Q1yJlKCI-d9Rcg youtube: https://www.youtube.com/watch?v=Kc3Puh3UCRQ -description: Learn bioinformatics & machine learning via project-first workflows, dataset-first ideation, study hacks and MLOps deployment tips to gain practical skills -intro: How do you teach yourself bioinformatics and machine learning in a way that leads to real projects and deployable models? In this episode, Aaisha Muhammad — a self-taught bioinformatician, machine learning engineer and scientific illustrator from Johannesburg and a Datatalks.Club ML Zoomcamp graduate — walks through a project-first path for learning bioinformatics and ML. We cover prioritization and avoiding FOMO, open curricula like OSSU, skill mapping with ML Zoomcamp, and practical resource evaluation (free vs paid, syllabus skimming, instructor credibility). Aaisha explains dataset-first project ideation, finding datasets and papers via Google Scholar and PubMed, and building capstone projects such as frog toxicity and landscape classifiers. You’ll hear pragmatic study tactics — self-imposed deadlines, note-taking, time tracking, community study groups, and using ChatGPT as a study companion — plus strategies to approach PhD-level papers while avoiding burnout. For engineers interested in production, the conversation addresses deployment and MLOps basics including Docker and Kubernetes. Tune in to gain concrete guidance on projects, vetted resources, and the study habits that make self-directed bioinformatics and ML learning sustainable +description: "Learn bioinformatics & machine learning via project-first workflows, dataset-first ideation, study hacks and MLOps deployment tips to gain practical skills" +intro: "How do you teach yourself bioinformatics and machine learning in a way that leads to real projects and deployable models? In this episode, Aaisha Muhammad — a self-taught bioinformatician, machine learning engineer and scientific illustrator from Johannesburg and a Datatalks.Club ML Zoomcamp graduate — walks through a project-first path for learning bioinformatics and ML. We cover prioritization and avoiding FOMO, open curricula like OSSU, skill mapping with ML Zoomcamp, and practical resource evaluation (free vs paid, syllabus skimming, instructor credibility). Aaisha explains dataset-first project ideation, finding datasets and papers via Google Scholar and PubMed, and building capstone projects such as frog toxicity and landscape classifiers. You’ll hear pragmatic study tactics — self-imposed deadlines, note-taking, time tracking, community study groups, and using ChatGPT as a study companion — plus strategies to approach PhD-level papers while avoiding burnout. For engineers interested in production, the conversation addresses deployment and MLOps basics including Docker and Kubernetes. Tune in to gain concrete guidance on projects, vetted resources, and the study habits that make self-directed bioinformatics and ML learning sustainable" topics: - bioinformatics - machine learning diff --git a/_podcast/machine-learning-data-science-interview-prep.md b/_podcast/machine-learning-data-science-interview-prep.md index 63ae191a..58f683bf 100644 --- a/_podcast/machine-learning-data-science-interview-prep.md +++ b/_podcast/machine-learning-data-science-interview-prep.md @@ -1,6 +1,6 @@ --- -title: 'Master Machine Learning & Data Science Interviews: Recruiter-Proven Stages, Prep & Resources' -short: Master Machine Learning & Data Science Interviews +title: "Master Machine Learning & Data Science Interviews: Recruiter-Proven Stages, Prep & Resources" +short: "Master Machine Learning & Data Science Interviews" season: 12 episode: 6 guests: @@ -15,7 +15,7 @@ links: spotify: https://open.spotify.com/episode/3JAmnWie8pS58Kok9Sjr2V?si=FDpX4O74Qi2kqzMGumqMpw youtube: https://www.youtube.com/watch?v=NnZjlMowkWA -intro: How do you reliably prepare for ML and data science technical interviews — from the initial recruiter screen to coding and scenario-based rounds? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast, draws on 8+ years recruiting data scientists and AI professionals to lay out recruiter-proven interview stages and practical prep tactics.

    Luke walks through the full interview lifecycle — Stage Zero recruiter screening and role-fit filtering, the intro interview for relationship building, and the technical rounds that include binary, scenario, example, and coding components. He explains how to research interviewers, craft elevator pitches and STAR stories, and align expectations with recruiters so you prepare to the right depth. You’ll learn how to prioritize fundamentals before secondary skills, use question-flow strategies to probe deeper, and balance theory versus practical math in machine learning interviews.

    The episode also covers recovering from failed interviews, targeted internal applications and outreach, and concrete practice resources like LeetCode, HackerRank, Codeforces, and Educative. If you’re preparing for ML technical interviews or data science interviews, this episode gives recruiter-led structure, concrete prep priorities, and resource recommendations to maximize your chances in each interview stage +intro: "How do you reliably prepare for ML and data science technical interviews — from the initial recruiter screen to coding and scenario-based rounds? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast, draws on 8+ years recruiting data scientists and AI professionals to lay out recruiter-proven interview stages and practical prep tactics.

    Luke walks through the full interview lifecycle — Stage Zero recruiter screening and role-fit filtering, the intro interview for relationship building, and the technical rounds that include binary, scenario, example, and coding components. He explains how to research interviewers, craft elevator pitches and STAR stories, and align expectations with recruiters so you prepare to the right depth. You’ll learn how to prioritize fundamentals before secondary skills, use question-flow strategies to probe deeper, and balance theory versus practical math in machine learning interviews.

    The episode also covers recovering from failed interviews, targeted internal applications and outreach, and concrete practice resources like LeetCode, HackerRank, Codeforces, and Educative. If you’re preparing for ML technical interviews or data science interviews, this episode gives recruiter-led structure, concrete prep priorities, and resource recommendations to maximize your chances in each interview stage" topics: - job search - career growth diff --git a/_podcast/machine-learning-decision-optimization.md b/_podcast/machine-learning-decision-optimization.md index 53af8623..1c0ae1e8 100644 --- a/_podcast/machine-learning-decision-optimization.md +++ b/_podcast/machine-learning-decision-optimization.md @@ -1,6 +1,6 @@ --- -title: 'Optimize Decisions with ML: Prescriptive & Robust Optimization for Supply Chain and Pricing' -short: Decision Optimization +title: "Optimize Decisions with ML: Prescriptive & Robust Optimization for Supply Chain and Pricing" +short: "Decision Optimization" season: 2 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/42eAhI6F31DZ96Mnq2I4bJ apple: https://podcasts.apple.com/us/podcast/translating-ml-predictions-into-better-real-world-results/id1541710331?i=1000509855317 -description: 'Learn prescriptive analytics & robust optimization for supply chain pricing: align ML predictions to decisions, scale models, pick solvers, and boost revenue.' -intro: 'How do you turn machine learning predictions into better real-world decisions—especially under uncertainty in supply chains and pricing? In this episode, Dan Becker, Founder & CEO of Decision AI and former Google data scientist and Product Director at DataRobot, walks through prescriptive analytics and decision optimization for practical business impact. With a background that includes top Kaggle performance and contributions to TensorFlow and Keras, Dan explains how to formulate optimization problems, choose objectives and constraints, and integrate ML forecasts into prescriptive and robust optimization models.

    We cover robust vs. stochastic optimization, aligning loss functions with business objectives, and the solvers and tools that make this work—OR-Tools, Gurobi, Pyomo and open-source options. Dan also digs into scalability, approximation techniques, and deployment: pipelines, monitoring, and feedback loops. Use cases include supply chain optimization, resource allocation, and pricing/bidding strategies, plus operational, legal, and ethical constraints. Listeners will get practical guidance on evaluation metrics, common pitfalls like mis-specified objectives and overfitting decisions, and the cross-functional skills needed—data science, operations research, and software engineering—to get started with prescriptive optimization projects.' +description: "Learn prescriptive analytics & robust optimization for supply chain pricing: align ML predictions to decisions, scale models, pick solvers, and boost revenue." +intro: "How do you turn machine learning predictions into better real-world decisions—especially under uncertainty in supply chains and pricing? In this episode, Dan Becker, Founder & CEO of Decision AI and former Google data scientist and Product Director at DataRobot, walks through prescriptive analytics and decision optimization for practical business impact. With a background that includes top Kaggle performance and contributions to TensorFlow and Keras, Dan explains how to formulate optimization problems, choose objectives and constraints, and integrate ML forecasts into prescriptive and robust optimization models.

    We cover robust vs. stochastic optimization, aligning loss functions with business objectives, and the solvers and tools that make this work—OR-Tools, Gurobi, Pyomo and open-source options. Dan also digs into scalability, approximation techniques, and deployment: pipelines, monitoring, and feedback loops. Use cases include supply chain optimization, resource allocation, and pricing/bidding strategies, plus operational, legal, and ethical constraints. Listeners will get practical guidance on evaluation metrics, common pitfalls like mis-specified objectives and overfitting decisions, and the cross-functional skills needed—data science, operations research, and software engineering—to get started with prescriptive optimization projects." topics: - machine learning - decision optimization diff --git a/_podcast/machine-learning-engineering-production-best-practices.md b/_podcast/machine-learning-engineering-production-best-practices.md index 0d8be281..12326afe 100644 --- a/_podcast/machine-learning-engineering-production-best-practices.md +++ b/_podcast/machine-learning-engineering-production-best-practices.md @@ -1,6 +1,6 @@ --- -title: 'Practical Machine Learning Engineering for Production: Ship Maintainable Models, Avoid Complexity' -short: Running from Complexity +title: "Practical Machine Learning Engineering for Production: Ship Maintainable Models, Avoid Complexity" +short: "Running from Complexity" season: 4 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/2TxcU3eF7hjkAEzAJcYMAg apple: https://podcasts.apple.com/us/podcast/running-from-complexity-ben-wilson/id1541710331?i=1000529834651 -description: 'Learn practical ML engineering to ship maintainable machine learning models to production: avoid complexity, use prototypes, explainability, testing.' -intro: 'Are your ML projects collapsing under their own complexity—or never making it to production at all? In this episode, Ben Wilson, Practice Lead Resident Solutions Architect at Databricks and author of an upcoming Manning book, walks through practical machine learning engineering strategies for shipping maintainable models and avoiding needless complexity. Drawing on 12 years across industries, Ben emphasizes prioritizing maintainability over novelty: refactoring monolithic code into modular, testable components, running timeboxed experiments and bake-offs, and choosing SQL or statistical solutions before jumping to deep learning.

    We cover why production failures often stem from lack of business buy-in and “search-driven” complexity, how to involve subject-matter experts and executives to simplify designs, and techniques for explainability that translate model behavior into business terms. Ben also discusses team composition (statistics plus ML engineering skills), agile sprints for feature engineering and testing, the IKEA effect of emotional attachment to complex systems, and pitfalls in reproducing academic papers in production. Listen to learn concrete practices—from experimentation limits to mentoring and deployment tradeoffs—that help you move ideas into production and keep models reliable and maintainable.' +description: "Learn practical ML engineering to ship maintainable machine learning models to production: avoid complexity, use prototypes, explainability, testing." +intro: "Are your ML projects collapsing under their own complexity—or never making it to production at all? In this episode, Ben Wilson, Practice Lead Resident Solutions Architect at Databricks and author of an upcoming Manning book, walks through practical machine learning engineering strategies for shipping maintainable models and avoiding needless complexity. Drawing on 12 years across industries, Ben emphasizes prioritizing maintainability over novelty: refactoring monolithic code into modular, testable components, running timeboxed experiments and bake-offs, and choosing SQL or statistical solutions before jumping to deep learning.

    We cover why production failures often stem from lack of business buy-in and “search-driven” complexity, how to involve subject-matter experts and executives to simplify designs, and techniques for explainability that translate model behavior into business terms. Ben also discusses team composition (statistics plus ML engineering skills), agile sprints for feature engineering and testing, the IKEA effect of emotional attachment to complex systems, and pitfalls in reproducing academic papers in production. Listen to learn concrete practices—from experimentation limits to mentoring and deployment tradeoffs—that help you move ideas into production and keep models reliable and maintainable." topics: - machine learning - career growth diff --git a/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md index e9cdc16d..02c968d7 100644 --- a/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md +++ b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md @@ -1,6 +1,6 @@ --- -title: 'Asteroid Mining: Using ML & Hyperspectral Spectroscopy to Detect Water for ISRU' -short: Using Data for Asteroid Mining +title: "Asteroid Mining: Using ML & Hyperspectral Spectroscopy to Detect Water for ISRU" +short: "Using Data for Asteroid Mining" season: 9 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7wjKCbCsD4ytuNrE8JrH2B?si=1WPAtw6PSZGVib0qSsoLvA youtube: https://www.youtube.com/watch?v=YxijEUoDCfw -description: 'Discover asteroid mining: machine learning & hyperspectral spectroscopy to detect water for ISRU—learn detection methods, datasets, mission design & tools.' -intro: How can we reliably detect water on near-Earth asteroids using machine learning and hyperspectral spectroscopy to enable in-situ resource utilization (ISRU)? In this episode Daynan Crull—co-founder of Karman+ and lead of its science and technology effort—walks through the science and engineering needed to find and characterize asteroid water for space missions. Drawing on his background in remote sensing and ML, Daynan explains hyperspectral infrared signatures for water detection, spectral classification approaches, and the limits of ground truth from returned samples and meteorites. Along the way we cover relevant astronomical data types (images, hyperspectral bands, time series), asteroid features like photometry and rotation, observability challenges, and ML tasks from signal processing to orbit linking and synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), sampling and extraction concepts, economic use cases for water-as-fuel, and the cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, machine learning for water detection, and the datasets and infrastructure to get involved in ISRU research and missions +description: "Discover asteroid mining: machine learning & hyperspectral spectroscopy to detect water for ISRU—learn detection methods, datasets, mission design & tools." +intro: "How can we reliably detect water on near-Earth asteroids using machine learning and hyperspectral spectroscopy to enable in-situ resource utilization (ISRU)? In this episode Daynan Crull—co-founder of Karman+ and lead of its science and technology effort—walks through the science and engineering needed to find and characterize asteroid water for space missions. Drawing on his background in remote sensing and ML, Daynan explains hyperspectral infrared signatures for water detection, spectral classification approaches, and the limits of ground truth from returned samples and meteorites. Along the way we cover relevant astronomical data types (images, hyperspectral bands, time series), asteroid features like photometry and rotation, observability challenges, and ML tasks from signal processing to orbit linking and synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), sampling and extraction concepts, economic use cases for water-as-fuel, and the cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, machine learning for water detection, and the datasets and infrastructure to get involved in ISRU research and missions" topics: - machine learning - astronomy diff --git a/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md index e9567b5b..875c2b1f 100644 --- a/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md +++ b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md @@ -1,6 +1,6 @@ --- -title: 'Marketing Data Science: Attribution, Media Mix Modeling, Uplift & Cookieless Tracking' -short: Machine Learning in Marketing +title: "Marketing Data Science: Attribution, Media Mix Modeling, Uplift & Cookieless Tracking" +short: "Machine Learning in Marketing" season: 9 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0rc8zZjdxr5ncxqH9RDqBV?si=49feb89374554f65 youtube: https://www.youtube.com/watch?v=jsAxUd_bZpw -description: Learn attribution, media mix modeling & cookieless tracking to measure uplift, TV/offline impact and automate MMM for faster acquisition & retention -intro: How can marketing teams reliably measure ad impact, allocate budget across channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin-based mathematician and data scientist specializing in statistical learning, time series, Bayesian and geometric methods — walks through practical marketing data science approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless tracking.

    We cover attribution basics and multi-channel ambiguity, MMM techniques including regression, ad-stock and saturation, and campaign uplift estimation using time-series counterfactuals. Juan explains measuring TV and offline channels, the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention and purchase-frequency modeling. You’ll also hear about uplift A/B testing design, modeling benchmarks (start simple), MMM retraining cadence, learning decay rates with Bayesian regression, and building a marketing data function with the right data integrations and cross-functional collaboration.

    If you want actionable guidance on attribution models, media mix optimization, privacy-aware tracking, and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, common pitfalls, and learning resources to help practitioners improve measurement and decision-making +description: "Learn attribution, media mix modeling & cookieless tracking to measure uplift, TV/offline impact and automate MMM for faster acquisition & retention" +intro: "How can marketing teams reliably measure ad impact, allocate budget across channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin-based mathematician and data scientist specializing in statistical learning, time series, Bayesian and geometric methods — walks through practical marketing data science approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless tracking.

    We cover attribution basics and multi-channel ambiguity, MMM techniques including regression, ad-stock and saturation, and campaign uplift estimation using time-series counterfactuals. Juan explains measuring TV and offline channels, the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention and purchase-frequency modeling. You’ll also hear about uplift A/B testing design, modeling benchmarks (start simple), MMM retraining cadence, learning decay rates with Bayesian regression, and building a marketing data function with the right data integrations and cross-functional collaboration.

    If you want actionable guidance on attribution models, media mix optimization, privacy-aware tracking, and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, common pitfalls, and learning resources to help practitioners improve measurement and decision-making" topics: - marketing - machine learning diff --git a/_podcast/machine-learning-system-design-interview.md b/_podcast/machine-learning-system-design-interview.md index 6fd1d4fd..5177ebcf 100644 --- a/_podcast/machine-learning-system-design-interview.md +++ b/_podcast/machine-learning-system-design-interview.md @@ -1,6 +1,6 @@ --- -title: 'ML System Design Interviews: Production ML, Fraud Detection, Features, A/B Testing & MLOps' -short: Machine Learning System Design Interview +title: "ML System Design Interviews: Production ML, Fraud Detection, Features, A/B Testing & MLOps" +short: "Machine Learning System Design Interview" season: 7 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5tSLFOh8PGe1NFFz1of9Xe youtube: https://www.youtube.com/watch?v=0RsmRjar66E -description: 'Master ML system design: fraud detection, feature engineering & A/B testing to ace interviews, build robust production models, monitoring and MLOps.' -intro: 'How do you approach ML system design interviews that probe production constraints, fraud detection trade-offs, and MLOps realities? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through what interviewers look for and how candidates should structure answers for real-world ML problems.

    We cover concrete topics you can use in interviews and on the job: distinguishing software vs. ML system design; a fraud detection case study (probabilities, loss functions, real-time requirements); label noise, class imbalance, and feature engineering trade-offs; end-to-end pipeline items like metrics, baselines, A/B testing, and validating in production; monitoring, distribution shift, fallbacks, and production robustness; serving models, embeddings, and MLOps roles; plus when to avoid ML and practical checklist items for core projects. Valerii also shares interview tactics — signposting depth, stating assumptions, iterative baselines — and guidance for new grads and career progression toward system design roles.

    Listen to learn actionable frameworks, example trade-offs, and preparation strategies to improve your ML system design interviews and production ML decisions.' +description: "Master ML system design: fraud detection, feature engineering & A/B testing to ace interviews, build robust production models, monitoring and MLOps." +intro: "How do you approach ML system design interviews that probe production constraints, fraud detection trade-offs, and MLOps realities? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through what interviewers look for and how candidates should structure answers for real-world ML problems.

    We cover concrete topics you can use in interviews and on the job: distinguishing software vs. ML system design; a fraud detection case study (probabilities, loss functions, real-time requirements); label noise, class imbalance, and feature engineering trade-offs; end-to-end pipeline items like metrics, baselines, A/B testing, and validating in production; monitoring, distribution shift, fallbacks, and production robustness; serving models, embeddings, and MLOps roles; plus when to avoid ML and practical checklist items for core projects. Valerii also shares interview tactics — signposting depth, stating assumptions, iterative baselines — and guidance for new grads and career progression toward system design roles.

    Listen to learn actionable frameworks, example trade-offs, and preparation strategies to improve your ML system design interviews and production ML decisions." topics: - machine learning - career growth diff --git a/_podcast/make-money-with-machine-learning-roles-skills.md b/_podcast/make-money-with-machine-learning-roles-skills.md index 8bae891f..05143183 100644 --- a/_podcast/make-money-with-machine-learning-roles-skills.md +++ b/_podcast/make-money-with-machine-learning-roles-skills.md @@ -1,6 +1,6 @@ --- -title: 'Monetize Machine Learning: Convert Models to ARR/MRR with ML Product & MLOps Strategy' -short: New Roles and Key Skills to Monetize Machine Learning +title: "Monetize Machine Learning: Convert Models to ARR/MRR with ML Product & MLOps Strategy" +short: "New Roles and Key Skills to Monetize Machine Learning" season: 2 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5u2WuUB8GBNE9qDsNR6mby apple: https://podcasts.apple.com/us/podcast/new-roles-key-skills-to-monetize-machine-learning-vin/id1541710331?i=1000512720281 -description: 'Master monetize machine learning: convert ML models into ARR/MRR using MLOps and team roles to drive revenue, adoption and measurable business impact.' -intro: How do you turn machine learning models into recurring revenue—ARR and MRR—rather than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner and engineer strategist who has brought products to market with ARR in the $100’s of millions, breaks down practical steps to monetize machine learning.

    We explore why a revenue-first mindset changes ML strategy, how to translate models into C-suite metrics like ARR/MRR, and when to prioritize revenue versus cost-savings. Vin outlines the three core team roles for monetization, the research artifacts and experimental process that make models production-ready, and real category-creation examples from companies such as Amazon, Google, and Stitch Fix. For startups he explains the “angry users + data scientists” product recipe.

    You’ll also get frameworks for ML product management—turning strategy into researchable use cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, and product metrics for adoption (usage, task time, decision quality, pricing impact). This episode delivers actionable guidance for leaders, product managers, and engineers seeking to convert ML into sustainable ARR and MRR +description: "Master monetize machine learning: convert ML models into ARR/MRR using MLOps and team roles to drive revenue, adoption and measurable business impact." +intro: "How do you turn machine learning models into recurring revenue—ARR and MRR—rather than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner and engineer strategist who has brought products to market with ARR in the $100’s of millions, breaks down practical steps to monetize machine learning.

    We explore why a revenue-first mindset changes ML strategy, how to translate models into C-suite metrics like ARR/MRR, and when to prioritize revenue versus cost-savings. Vin outlines the three core team roles for monetization, the research artifacts and experimental process that make models production-ready, and real category-creation examples from companies such as Amazon, Google, and Stitch Fix. For startups he explains the “angry users + data scientists” product recipe.

    You’ll also get frameworks for ML product management—turning strategy into researchable use cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, and product metrics for adoption (usage, task time, decision quality, pricing impact). This episode delivers actionable guidance for leaders, product managers, and engineers seeking to convert ML into sustainable ARR and MRR" topics: - machine learning - monetization diff --git a/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md index 6b4e7d50..2a1aff25 100644 --- a/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md +++ b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md @@ -1,6 +1,6 @@ --- -title: 'How to Find a Mentor and Become One: Mentoring Strategies for Tech Careers' -short: Mentoring +title: "How to Find a Mentor and Become One: Mentoring Strategies for Tech Careers" +short: "Mentoring" season: 1 episode: 5 guests: @@ -15,8 +15,14 @@ links: spotify: TODO apple: TODO -description: 'Discover practical mentoring strategies for tech careers: find mentors, master cold outreach, run effective sessions, start paid mentorship & boost leadership.' -intro: 'Struggling to find a mentor — or wondering how to become one — in a fast-moving tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI leader with 15+ years driving enterprise data transformations and a career arc from mining engineering to data engineering and leadership, walks through practical mentoring strategies for tech professionals. We define mentoring (purpose, scope, types), explore early models like Thoughtworks’ sponsorship, and show how to find mentors through networks, platforms, and cold outreach — with concrete outreach best practices: specificity, background, and follow-up. Rahul covers preparing mentoring sessions (goals, agendas), mentoring formats (one-off advice vs long-term relationships), and how to start as a mentor using simple first steps and platforms. Topics include benefits of mentoring, transferable workplace guidance, developing people skills (empathy, listening), balancing technical work and leadership, tackling imposter syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining development plans. Listen to gain actionable steps, templates, and mindset shifts to both secure meaningful mentorship and build a sustainable mentoring practice in your tech career.' +description: "Discover practical mentoring strategies for tech careers: find mentors, master cold outreach, run effective sessions, start paid mentorship & boost leadership." +topics: +- mentoring +- career development +- career transition +- leadership +- data engineering +intro: "Struggling to find a mentor — or wondering how to become one — in a fast-moving tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI leader with 15+ years driving enterprise data transformations and a career arc from mining engineering to data engineering and leadership, walks through practical mentoring strategies for tech professionals. We define mentoring (purpose, scope, types), explore early models like Thoughtworks’ sponsorship, and show how to find mentors through networks, platforms, and cold outreach — with concrete outreach best practices: specificity, background, and follow-up. Rahul covers preparing mentoring sessions (goals, agendas), mentoring formats (one-off advice vs long-term relationships), and how to start as a mentor using simple first steps and platforms. Topics include benefits of mentoring, transferable workplace guidance, developing people skills (empathy, listening), balancing technical work and leadership, tackling imposter syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining development plans. Listen to gain actionable steps, templates, and mindset shifts to both secure meaningful mentorship and build a sustainable mentoring practice in your tech career." dateadded: 2021-02-23 @@ -61,7 +67,7 @@ quotableClips: startOffset: 1680 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1680 endOffset: 1840 -- name: 'Developing People Skills: Empathy, Listening, and Avoiding the "Advice Monster"' +- name: 'Developing People Skills: Empathy, Listening, and Avoiding the "Advice Monster" startOffset: 1840 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1840 endOffset: 2010 diff --git a/_podcast/mindful-data-strategy-for-business-impact.md b/_podcast/mindful-data-strategy-for-business-impact.md index 6e9e72bb..49beb8ec 100644 --- a/_podcast/mindful-data-strategy-for-business-impact.md +++ b/_podcast/mindful-data-strategy-for-business-impact.md @@ -1,7 +1,6 @@ --- -title: 'Mindful Data Strategy for Business Impact: Wabi-Sabi Approach, Data Trust - & Maintenance-Innovation Balance' -short: 'Mindful Data Strategy: From Pipelines to Business Impact' +title: "Mindful Data Strategy for Business Impact: Wabi-Sabi Approach, Data Trust & Maintenance-Innovation Balance" +short: "Mindful Data Strategy: From Pipelines to Business Impact" season: 21 episode: 2 guests: @@ -15,23 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/54B0xvUI1eQjXW0s1eqgbI youtube: https://www.youtube.com/watch?v=B76J4QkZPWs -description: 'Discover a mindful data strategy to build data trust and balance maintenance-innovation - with a Wabi-Sabi approach: practical tactics to boost business impact.' -intro: How do you build a data strategy that drives business impact without chasing - perfection? In this episode Lior Barak — author of Data Is Like a Plate of Hummus, - co-host of the WHAT the Data?! podcast, and founder of Tale About Data — explores - a mindful data strategy that accepts imperfection, prioritizes data trust, and balances - maintenance with innovation.

    Lior draws on 12+ years building data teams - and helping organizations use data for growth, with a particular focus on practical - strategies for non-business functions. Key topics include the Wabi-Sabi approach - to data (valuing usable, imperfect datasets), establishing data trust and governance, - and how to allocate resources between ongoing data maintenance and forward-looking - innovation. The conversation also touches on setting realistic expectations, reducing - technical debt, and aligning data work to measurable business outcomes.

    - If you’re responsible for data strategy, analytics, or data product decisions, this - episode provides concrete perspectives on building resilient, impact-driven data - practices—helping you prioritize work that increases trust, lowers risk, and creates - sustained business value. +description: "Discover a mindful data strategy to build data trust and balance maintenance-innovation with a Wabi-Sabi approach: practical tactics to boost business impact." +topics: +- data strategy +- data governance +- data engineering +- product management +- career transition +intro: "How do you build a data strategy that drives business impact without chasing perfection? In this episode Lior Barak — author of Data Is Like a Plate of Hummus, co-host of the WHAT the Data?! podcast, and founder of Tale About Data — explores a mindful data strategy that accepts imperfection, prioritizes data trust, and balances maintenance with innovation.

    Lior draws on 12+ years building data teams and helping organizations use data for growth, with a particular focus on practical strategies for non-business functions. Key topics include the Wabi-Sabi approach to data (valuing usable, imperfect datasets), establishing data trust and governance, and how to allocate resources between ongoing data maintenance and forward-looking innovation. The conversation also touches on setting realistic expectations, reducing technical debt, and aligning data work to measurable business outcomes.

    If you’re responsible for data strategy, analytics, or data product decisions, this episode provides concrete perspectives on building resilient, impact-driven data practices—helping you prioritize work that increases trust, lowers risk, and creates sustained business value." dateadded: 2025-08-18 duration: PT01H06M05S quotableClips: diff --git a/_podcast/ml-engineering-kpis-and-metrics-strategy.md b/_podcast/ml-engineering-kpis-and-metrics-strategy.md index 348f4e22..b3396021 100644 --- a/_podcast/ml-engineering-kpis-and-metrics-strategy.md +++ b/_podcast/ml-engineering-kpis-and-metrics-strategy.md @@ -1,6 +1,6 @@ --- -title: 'KPI Design & Metrics Strategy: Prioritize Impact, Avoid Vanity Metrics, and Prove ROI' -short: 'Defining Success: Metrics and KPIs' +title: "KPI Design & Metrics Strategy: Prioritize Impact, Avoid Vanity Metrics, and Prove ROI" +short: "Defining Success: Metrics and KPIs" season: 5 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5kTD7LjoXos1fm2LPD7nJc apple: https://podcasts.apple.com/us/podcast/defining-success-metrics-and-kpis-adam-sroka/id1541710331?i=1000535667935 -description: Discover KPI design, metrics strategy & ROI proof - avoid vanity metrics, build dashboards, prioritize impact, and measure experiments to prove value -intro: How do you design KPIs that prioritize real impact, avoid vanity metrics, and actually prove ROI? In this episode, Dr. Adam Sroka — Head of Machine Learning Engineering at Origami Energy, with a background from a Physics PhD to data science, reinforcement learning, and consultancy — walks through a practical metrics strategy for data and product teams.

    We cover why metrics matter (Drucker, merit functions), how to make metrics comparable (units), and concrete examples like weighted revenue for sales pipelines and burn-down/maintainability metrics for professional services. Adam explains top-down KPI alignment, avoiding vanity metrics and KPI gaming, and using derived/composite KPIs to capture margin trade-offs. You’ll hear a workshop case for grocery retail, guidance on KPI prioritization and review cadence, and tips for operationalizing metrics through dashboards, executive communication, and a North Star metric. We also dig into threshold, health & hygiene metrics, translating model performance into £/time saved, and robust experiment and model validation (A/B, randomization, backtesting, uplift).

    If you’re responsible for KPI design, metrics strategy, or proving ROI from data work, this episode gives actionable frameworks to measure impact and reduce measurement risk +description: "Discover KPI design, metrics strategy & ROI proof - avoid vanity metrics, build dashboards, prioritize impact, and measure experiments to prove value" +intro: "How do you design KPIs that prioritize real impact, avoid vanity metrics, and actually prove ROI? In this episode, Dr. Adam Sroka — Head of Machine Learning Engineering at Origami Energy, with a background from a Physics PhD to data science, reinforcement learning, and consultancy — walks through a practical metrics strategy for data and product teams.

    We cover why metrics matter (Drucker, merit functions), how to make metrics comparable (units), and concrete examples like weighted revenue for sales pipelines and burn-down/maintainability metrics for professional services. Adam explains top-down KPI alignment, avoiding vanity metrics and KPI gaming, and using derived/composite KPIs to capture margin trade-offs. You’ll hear a workshop case for grocery retail, guidance on KPI prioritization and review cadence, and tips for operationalizing metrics through dashboards, executive communication, and a North Star metric. We also dig into threshold, health & hygiene metrics, translating model performance into £/time saved, and robust experiment and model validation (A/B, randomization, backtesting, uplift).

    If you’re responsible for KPI design, metrics strategy, or proving ROI from data work, this episode gives actionable frameworks to measure impact and reduce measurement risk" topics: - machine learning - leadership diff --git a/_podcast/ml-product-manager-and-mlops-platform-strategy.md b/_podcast/ml-product-manager-and-mlops-platform-strategy.md index 9a4e730b..2107a686 100644 --- a/_podcast/ml-product-manager-and-mlops-platform-strategy.md +++ b/_podcast/ml-product-manager-and-mlops-platform-strategy.md @@ -1,6 +1,6 @@ --- -title: 'Become an ML Product Manager: MLOps Platforms, Observability & Adoption' -short: Product Management for Machine Learning +title: "Become an ML Product Manager: MLOps Platforms, Observability & Adoption" +short: "Product Management for Machine Learning" season: 6 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/7zfH4hagZKwoIWmee0AXBd apple: https://podcasts.apple.com/us/podcast/product-management-for-machine-learning-geo-jolly/id1541710331?i=1000545301034 -description: 'Learn ML Product Manager tactics: MLOps platform strategy, observability KPIs & adoption playbooks to own roadmaps, governance, and stakeholder ROI.' -intro: How do you become an ML product manager and build MLOps platforms that teams actually use? In this episode, Geo Jolly, a Technical PM at Glovo with a background from web/dev to data science and product management, walks through the practical skills and decisions that define the role.

    We cover MLOps platform strategy and vendor evaluation, treating internal platform users as customers, and the real productivity costs of poor tooling UX. Geo outlines product manager responsibilities—roadmaps, specs, backlog prioritization—and explains outcome-driven problem definition, avoiding solution bias, and running workshops and interviews to break down complex problems. You’ll hear about ML observability and KPIs to measure platform impact, release governance and rollout timing, adoption strategy for internal stakeholders, and engineering roles needed for platform delivery (CI/CD, K8s, syseng). Practical topics also include model validation and ML quality assurance, embedded data scientists as power users, Agile approaches for data science, and concrete transition paths from data scientist or Scrum Master into technical ML product roles.

    Listen to gain actionable guidance on MLOps platforms, observability, adoption strategy, and the technical literacy required to succeed as an ML product manager +description: "Learn ML Product Manager tactics: MLOps platform strategy, observability KPIs & adoption playbooks to own roadmaps, governance, and stakeholder ROI." +intro: "How do you become an ML product manager and build MLOps platforms that teams actually use? In this episode, Geo Jolly, a Technical PM at Glovo with a background from web/dev to data science and product management, walks through the practical skills and decisions that define the role.

    We cover MLOps platform strategy and vendor evaluation, treating internal platform users as customers, and the real productivity costs of poor tooling UX. Geo outlines product manager responsibilities—roadmaps, specs, backlog prioritization—and explains outcome-driven problem definition, avoiding solution bias, and running workshops and interviews to break down complex problems. You’ll hear about ML observability and KPIs to measure platform impact, release governance and rollout timing, adoption strategy for internal stakeholders, and engineering roles needed for platform delivery (CI/CD, K8s, syseng). Practical topics also include model validation and ML quality assurance, embedded data scientists as power users, Agile approaches for data science, and concrete transition paths from data scientist or Scrum Master into technical ML product roles.

    Listen to gain actionable guidance on MLOps platforms, observability, adoption strategy, and the technical literacy required to succeed as an ML product manager" topics: - product management - machine learning diff --git a/_podcast/ml-system-design.md b/_podcast/ml-system-design.md index 5b3ea9dc..197624a1 100644 --- a/_podcast/ml-system-design.md +++ b/_podcast/ml-system-design.md @@ -1,6 +1,6 @@ --- -title: 'ML System Design Playbook: Fail-Fast Design Docs, Modular Architecture & Data Drift Monitoring' -short: Why Machine Learning Design is Broken +title: "ML System Design Playbook: Fail-Fast Design Docs, Modular Architecture & Data Drift Monitoring" +short: "Why Machine Learning Design is Broken" season: 15 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3KfKptkWIa1hW1hSOvBQaO youtube: https://www.youtube.com/watch?v=6YBMU6475KQ -description: 'Master ML system design: fail-fast design docs, modular architecture & data drift monitoring to cut risk, assign ownership, speed experiments.' -intro: How do you design ML systems that fail fast, scale with modular architecture, and survive data drift in production? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through a practical playbook for ML system design.

    We cover why fail-fast design docs act like blueprints to prevent wasted work, how shared and chapter-based design docs enable alignment and versioning, and the maintenance challenges of treating design docs as living artifacts. Valerii explains assigning ownership and mapping the bus factor for risk assessment, incentivizing documentation, and using a 16-chapter ML design template to standardize architecture. On the operational side we dig into monitoring strategies for data drift, concept drift, and prediction drift, plus fallback strategies — redundancy, simple baselines, and serving reliability — to keep models robust. He also points to tools and resources including Evidently AI, templates, and the book.

    Listen to gain concrete tactics for fail-fast design docs, modular architecture, data drift monitoring, and baseline solutions you can apply to reduce risk and accelerate ML delivery +description: "Master ML system design: fail-fast design docs, modular architecture & data drift monitoring to cut risk, assign ownership, speed experiments." +intro: "How do you design ML systems that fail fast, scale with modular architecture, and survive data drift in production? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through a practical playbook for ML system design.

    We cover why fail-fast design docs act like blueprints to prevent wasted work, how shared and chapter-based design docs enable alignment and versioning, and the maintenance challenges of treating design docs as living artifacts. Valerii explains assigning ownership and mapping the bus factor for risk assessment, incentivizing documentation, and using a 16-chapter ML design template to standardize architecture. On the operational side we dig into monitoring strategies for data drift, concept drift, and prediction drift, plus fallback strategies — redundancy, simple baselines, and serving reliability — to keep models robust. He also points to tools and resources including Evidently AI, templates, and the book.

    Listen to gain concrete tactics for fail-fast design docs, modular architecture, data drift monitoring, and baseline solutions you can apply to reduce risk and accelerate ML delivery" topics: - machine learning - system design diff --git a/_podcast/mlops-and-ml-engineering-in-finance.md b/_podcast/mlops-and-ml-engineering-in-finance.md index d1275620..9c88de08 100644 --- a/_podcast/mlops-and-ml-engineering-in-finance.md +++ b/_podcast/mlops-and-ml-engineering-in-finance.md @@ -1,6 +1,6 @@ --- -title: 'MLOps in Finance: Regulated Deployment, CI/CD and Model Governance' -short: Machine Learning Engineering in Finance +title: "MLOps in Finance: Regulated Deployment, CI/CD and Model Governance" +short: "Machine Learning Engineering in Finance" season: 17 episode: 5 guests: @@ -14,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/machine-learning-engineering-in-finance-nemanja-radojkovic/id1541710331?i=1000643322929 spotify: https://open.spotify.com/episode/3yQtA8EAndau1yhCFPfwtj?si=ZutO4mLlRfOz_Zgw4GujiQ youtube: https://www.youtube.com/watch?v=Nl4aibeFwiI -description: 'Learn MLOps for finance: model governance, compliant deployments, monitoring, - and MVP ML Ops tactics to build production-ready, auditable models.' -intro: 'How do you deploy machine learning in heavily regulated finance environments - while keeping CI/CD pipelines, model governance, and operational risk under control? - In this episode Nemanja Radojkovic—an electrical engineer turned data scientist - and MLOps practitioner who moved from Belgrade to Leuven—walks through real-world - constraints and pragmatic solutions for MLOps in finance.

    Drawing on his - PhD background, consulting experience, and teaching, Nemanja covers finance use - cases such as compliance, AML, fraud detection, and document/email automation, then - drills into ML engineering responsibilities: deployment choices, CI/CD, release - management, and building trust with governance and approvals. We examine legacy - and regulatory constraints, on-premises platforms (Hadoop, OpenShift), and low-cost - MLOps strategies: minimal viable setups (dev/test/prod, monitoring, model registry, - data versioning, reproducible pipelines) and tactical workarounds like S3-based - registries.

    Listeners will gain actionable guidance on adapting ML workflows - to corporate DevOps, prioritizing MLOps on a shoestring, standardizing deployment - patterns and platform reuse (FastAPI, internal libraries), and the core skills needed - for ML engineering and production readiness. Ideal for ML engineers and data teams - tackling regulated deployment, CI/CD, and model governance in finance.' +description: "Learn MLOps for finance: model governance, compliant deployments, monitoring, and MVP ML Ops tactics to build production-ready, auditable models." +topics: +- MLOps +- machine learning +- data engineering +- production +- career transition +intro: "How do you deploy machine learning in heavily regulated finance environments while keeping CI/CD pipelines, model governance, and operational risk under control? In this episode Nemanja Radojkovic—an electrical engineer turned data scientist and MLOps practitioner who moved from Belgrade to Leuven—walks through real-world constraints and pragmatic solutions for MLOps in finance.

    Drawing on his PhD background, consulting experience, and teaching, Nemanja covers finance use cases such as compliance, AML, fraud detection, and document/email automation, then drills into ML engineering responsibilities: deployment choices, CI/CD, release management, and building trust with governance and approvals. We examine legacy and regulatory constraints, on-premises platforms (Hadoop, OpenShift), and low-cost MLOps strategies: minimal viable setups (dev/test/prod, monitoring, model registry, data versioning, reproducible pipelines) and tactical workarounds like S3-based registries.

    Listeners will gain actionable guidance on adapting ML workflows to corporate DevOps, prioritizing MLOps on a shoestring, standardizing deployment patterns and platform reuse (FastAPI, internal libraries), and the core skills needed for ML engineering and production readiness. Ideal for ML engineers and data teams tackling regulated deployment, CI/CD, and model governance in finance." dateadded: 2024-01-29 duration: PT00H58M04S quotableClips: diff --git a/_podcast/mlops-at-scale-reproducibility-adoption.md b/_podcast/mlops-at-scale-reproducibility-adoption.md index c0e570b6..683bfa50 100644 --- a/_podcast/mlops-at-scale-reproducibility-adoption.md +++ b/_podcast/mlops-at-scale-reproducibility-adoption.md @@ -1,6 +1,6 @@ --- -title: 'MLOps at Scale: CI/CD, Reproducibility, Model Monitoring & Adoption Strategies' -short: MLOps as a Team +title: "MLOps at Scale: CI/CD, Reproducibility, Model Monitoring & Adoption Strategies" +short: "MLOps as a Team" season: 19 episode: 4 guests: @@ -14,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/mlops-as-a-team-rapha%C3%ABl-hoogvliets/id1541710331?i=1000676238840 spotify: https://open.spotify.com/episode/0Dl372MFGvN0zDa1YQx7oe?si=eCy-a4fkRtOaEe21-KDHXQ youtube: https://youtube.com/watch?v=rMq63r3zi4c -description: Learn MLOps CI/CD and model monitoring to scale reliable deployments, - accelerate delivery, ensure reproducibility, and drive model adoption in production. -intro: 'How do you run MLOps at scale so models stay deployed, reproducible, and actually - adopted? In this episode Raphaël Hoogvliets—who leads a 12-engineer team at Eneco - and brings a career arc from agriculture into data science and MLOps—walks through - practical approaches for CI/CD for ML, reproducibility, model monitoring, and adoption - strategy.

    We cover the core trade-offs between speed and robustness, design - choices for long-term maintainability, and the team coordination needed to scale - ML: evangelists, tech translators, and technical leads. Raphaël explains why a centralized - MLOps platform team often works as an enabling layer, how MLOps should support product - teams, and how to drive adoption through iteration, feedback loops, and developer - experience. You’ll hear concrete practices—CI, repo structure, parameterization, - testing—plus reproducibility tactics like data versioning, traceability, and experiment - capture. We also discuss KPIs (deployment frequency and impact tracking), skill - mix, dependency management, container strategies, and real success and failure stories. -

    Listen to learn actionable priorities for getting started (start with CI/CD - and solve tangible pain points), and how to measure and sustain model value through - monitoring and operational processes.' +description: "Learn MLOps CI/CD and model monitoring to scale reliable deployments, accelerate delivery, ensure reproducibility, and drive model adoption in production." +topics: +- MLOps +- data science +- machine learning +- tools +- data governance +intro: "How do you run MLOps at scale so models stay deployed, reproducible, and actually adopted? In this episode Raphaël Hoogvliets—who leads a 12-engineer team at Eneco and brings a career arc from agriculture into data science and MLOps—walks through practical approaches for CI/CD for ML, reproducibility, model monitoring, and adoption strategy.

    We cover the core trade-offs between speed and robustness, design choices for long-term maintainability, and the team coordination needed to scale ML: evangelists, tech translators, and technical leads. Raphaël explains why a centralized MLOps platform team often works as an enabling layer, how MLOps should support product teams, and how to drive adoption through iteration, feedback loops, and developer experience. You’ll hear concrete practices—CI, repo structure, parameterization, testing—plus reproducibility tactics like data versioning, traceability, and experiment capture. We also discuss KPIs (deployment frequency and impact tracking), skill mix, dependency management, container strategies, and real success and failure stories.

    Listen to learn actionable priorities for getting started (start with CI/CD and solve tangible pain points), and how to measure and sustain model value through monitoring and operational processes." dateadded: 2024-11-16 duration: PT01H04M07S quotableClips: diff --git a/_podcast/mlops-community-building-and-meetups.md b/_podcast/mlops-community-building-and-meetups.md index 59022b9f..3b196360 100644 --- a/_podcast/mlops-community-building-and-meetups.md +++ b/_podcast/mlops-community-building-and-meetups.md @@ -1,6 +1,6 @@ --- -title: 'MLOps Community Playbook: Launch, Grow & Retain Meetups, Members, and Contributors' -short: Building Online Tech Communities +title: "MLOps Community Playbook: Launch, Grow & Retain Meetups, Members, and Contributors" +short: "Building Online Tech Communities" season: 2 episode: 12 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/58Xe9PCfdz26CVuYKtZWUE apple: https://podcasts.apple.com/us/podcast/building-online-tech-communities-demetrios-brinkmann/id1541710331?i=1000515510103 -description: 'Master MLOps meetups: launch communities, recruit contributors, and boost member retention with LinkedIn outreach, content strategy, and practical checklists.' -intro: 'How do you launch, grow, and retain an MLOps community that moves from meetups to a sustainable, contributor-led ecosystem? In this episode, Demetrios Brinkmann — who has led the MLOps community since April 2020 and now runs the largest active group with 2,500+ Slack members and 25k YouTube views — walks through a practical community playbook for MLOps meetups, members, and contributors.

    We trace his origin story and pivot to meetups and podcasting, then dive into concrete tactics: recruiting speakers with sales techniques, LinkedIn outreach and cold DMs, weekly meetup and content strategies, editing and YouTube clips, and milestone growth from 500 to 3k members. Demetrios also addresses moderation challenges, evolving from founder-led to peer-to-peer governance, cultivating core contributors and advisory groups, and building belonging through Q&A, social channels, and non-technical spaces.

    Listeners will get actionable retention strategies (giveaways, multi-format content, avoiding gamification), practical checklists for platform, purpose, audience, and content, and tips for member connections like Random Coffee and sprints. If you’re building an MLOps community or scaling technical meetups, this episode offers a focused, tactical roadmap. Find next steps at mlops.community.' +description: "Master MLOps meetups: launch communities, recruit contributors, and boost member retention with LinkedIn outreach, content strategy, and practical checklists." +intro: "How do you launch, grow, and retain an MLOps community that moves from meetups to a sustainable, contributor-led ecosystem? In this episode, Demetrios Brinkmann — who has led the MLOps community since April 2020 and now runs the largest active group with 2,500+ Slack members and 25k YouTube views — walks through a practical community playbook for MLOps meetups, members, and contributors.

    We trace his origin story and pivot to meetups and podcasting, then dive into concrete tactics: recruiting speakers with sales techniques, LinkedIn outreach and cold DMs, weekly meetup and content strategies, editing and YouTube clips, and milestone growth from 500 to 3k members. Demetrios also addresses moderation challenges, evolving from founder-led to peer-to-peer governance, cultivating core contributors and advisory groups, and building belonging through Q&A, social channels, and non-technical spaces.

    Listeners will get actionable retention strategies (giveaways, multi-format content, avoiding gamification), practical checklists for platform, purpose, audience, and content, and tips for member connections like Random Coffee and sprints. If you’re building an MLOps community or scaling technical meetups, this episode offers a focused, tactical roadmap. Find next steps at mlops.community." topics: - MLOps - community building diff --git a/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md index cd53e7b5..2a0a3d99 100644 --- a/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md +++ b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md @@ -1,6 +1,6 @@ --- -title: 'Feature Stores for MLOps: Real-Time Feature Engineering, Feast & Tecton Guide' -short: Feature Stores in MLOps Explained +title: "Feature Stores for MLOps: Real-Time Feature Engineering, Feast & Tecton Guide" +short: "Feature Stores in MLOps Explained" season: 2 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/05YnfTWbplXwOwicR2doy3 apple: https://podcasts.apple.com/us/podcast/feature-stores-cutting-through-the-hype-willem-pienaar/id1541710331?i=1000508782957 -description: Discover feature store use cases, real-time features with Feast & Tecton, build scalable MLOps to speed production, cut duplication and detect drift -intro: How do you reliably build and serve real-time features for production ML without rework, duplication, or training/serving skew? In this episode, Willem Pienaar — engineering lead at Tecton and creator of Feast — walks through what feature stores solve in MLOps and how they enable real-time feature engineering. We define feature stores, compare feature creation vs retrieval (SQL, Python, APIs, on-demand transforms), and illustrate a production real-time fraud detection lookup. Willem separates hype from value, explains organizational challenges like team silos and speed to production, and outlines the platform role across materialization, serving, and validation.

    You’ll get practical coverage of Feast (open-source) and Tecton (enterprise), architecture components (transform engine, storage, serving, registry, monitoring), and when online tabular use cases require a feature store versus when it’s overkill. The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch (Flink, Spark), validation and monitoring (drift detection, Great Expectations, TFDV), backfilling strategies, ownership and governance, and getting started resources (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete next steps for productionizing features in your MLOps stack +description: "Discover feature store use cases, real-time features with Feast & Tecton, build scalable MLOps to speed production, cut duplication and detect drift" +intro: "How do you reliably build and serve real-time features for production ML without rework, duplication, or training/serving skew? In this episode, Willem Pienaar — engineering lead at Tecton and creator of Feast — walks through what feature stores solve in MLOps and how they enable real-time feature engineering. We define feature stores, compare feature creation vs retrieval (SQL, Python, APIs, on-demand transforms), and illustrate a production real-time fraud detection lookup. Willem separates hype from value, explains organizational challenges like team silos and speed to production, and outlines the platform role across materialization, serving, and validation.

    You’ll get practical coverage of Feast (open-source) and Tecton (enterprise), architecture components (transform engine, storage, serving, registry, monitoring), and when online tabular use cases require a feature store versus when it’s overkill. The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch (Flink, Spark), validation and monitoring (drift detection, Great Expectations, TFDV), backfilling strategies, ownership and governance, and getting started resources (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete next steps for productionizing features in your MLOps stack" topics: - machine learning - MLOps diff --git a/_podcast/mlops-kubeflow-model-monitoring.md b/_podcast/mlops-kubeflow-model-monitoring.md index 579a708c..16a88b5a 100644 --- a/_podcast/mlops-kubeflow-model-monitoring.md +++ b/_podcast/mlops-kubeflow-model-monitoring.md @@ -1,6 +1,6 @@ --- -title: 'Mastering MLOps: Kubeflow Pipelines, Model Monitoring & Automated Retraining' -short: The Rise of MLOps +title: "Mastering MLOps: Kubeflow Pipelines, Model Monitoring & Automated Retraining" +short: "The Rise of MLOps" season: 2 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3YPvzGQnfxl7Mo1VKE0l1K apple: https://podcasts.apple.com/us/podcast/the-rise-of-mlops-theofilos-papapanagiotou/id1541710331?i=1000507907719 -description: Master MLOps with Kubeflow pipelines and automated retraining—detect drift, accelerate deployment, and boost production model reliability for faster iteration -intro: How do you build reliable, production-ready ML pipelines that detect model drift, monitor fairness, and trigger automated retraining? In this episode, Theofilos Papapanagiotou — a systems engineer with 20 years’ experience (from Unix engineering to ML engineering) now helping companies run ML workloads and a Kubeflow enthusiast — walks through practical MLOps strategies and tooling.

    We define MLOps as culture, process, and technology and contrast it with DevOps across the model lifecycle. Key topics include model monitoring for drift and fairness, inference sensors and a Prometheus/Grafana monitoring stack, commoditizing inference monitoring for faster iteration, and maturity levels from manual training to automated retraining. Theofilos dives into the Kubeflow ecosystem — Pipelines, KFServing, Feast, Katib, and integrations with TFX — plus hyperparameter search, notebook→pipeline workflows, MLMD metadata for data and model versioning, and tradeoffs for small teams and edge/mobile deployments.

    If you’re implementing Kubeflow pipelines, setting up model monitoring, or planning automated retraining, this episode offers practical guidance, maturity roadmaps, and resources to help you move from prototypes to reproducible, production ML +description: "Master MLOps with Kubeflow pipelines and automated retraining—detect drift, accelerate deployment, and boost production model reliability for faster iteration" +intro: "How do you build reliable, production-ready ML pipelines that detect model drift, monitor fairness, and trigger automated retraining? In this episode, Theofilos Papapanagiotou — a systems engineer with 20 years’ experience (from Unix engineering to ML engineering) now helping companies run ML workloads and a Kubeflow enthusiast — walks through practical MLOps strategies and tooling.

    We define MLOps as culture, process, and technology and contrast it with DevOps across the model lifecycle. Key topics include model monitoring for drift and fairness, inference sensors and a Prometheus/Grafana monitoring stack, commoditizing inference monitoring for faster iteration, and maturity levels from manual training to automated retraining. Theofilos dives into the Kubeflow ecosystem — Pipelines, KFServing, Feast, Katib, and integrations with TFX — plus hyperparameter search, notebook→pipeline workflows, MLMD metadata for data and model versioning, and tradeoffs for small teams and edge/mobile deployments.

    If you’re implementing Kubeflow pipelines, setting up model monitoring, or planning automated retraining, this episode offers practical guidance, maturity roadmaps, and resources to help you move from prototypes to reproducible, production ML" topics: - MLOps - machine learning diff --git a/_podcast/mlops-model-monitoring-data-observability.md b/_podcast/mlops-model-monitoring-data-observability.md index b1e2adc0..ca7c1132 100644 --- a/_podcast/mlops-model-monitoring-data-observability.md +++ b/_podcast/mlops-model-monitoring-data-observability.md @@ -1,6 +1,6 @@ --- -title: 'MLOps Architect Guide: Production Model Monitoring, Data Observability & Tooling' -short: MLOps Architect +title: "MLOps Architect Guide: Production Model Monitoring, Data Observability & Tooling" +short: "MLOps Architect" season: 10 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5gz5lnS7onwRUtbcmpOSuU?si=8cbe799f284c4623 youtube: https://www.youtube.com/watch?v=p1gVaS4Zx5M -description: Master MLOps, model monitoring & data observability with guidance on production observability, ETL root causes, tooling trade-offs, ONNX, build vs buy -intro: How do you keep machine learning models reliable in production — what should you monitor, where do upstream failures originate, and which tooling decisions actually matter? In this episode, Danny Leybzon, MLOps Architect at WhyLabs and computational statistics alum of UCLA, walks through the practical challenges of production model monitoring, data observability, and tooling trade-offs. Drawing on his path from analyst and product roles at Qubole to field engineering at Imply and now advising customers on observability, Danny defines the MLOps Architect as a technical-business bridge and explains how to prioritize production-first monitoring efforts.

    Topics covered include scope of observability across ETL and data pipelines, data profiling architecture (WhyLogs, profiles, Apache Druid), build vs buy decisions, platform-agnostic integrations and ONNX interoperability, and trends around cloud-native stacks and vendor lock-in. He also offers hiring and career perspectives for MLOps roles and research priorities like fairness and segmentation. Listen to get concrete guidance on designing model monitoring, choosing observability tooling, and identifying upstream root causes so you can reduce incidents and improve model reliability in production +description: "Master MLOps, model monitoring & data observability with guidance on production observability, ETL root causes, tooling trade-offs, ONNX, build vs buy" +intro: "How do you keep machine learning models reliable in production — what should you monitor, where do upstream failures originate, and which tooling decisions actually matter? In this episode, Danny Leybzon, MLOps Architect at WhyLabs and computational statistics alum of UCLA, walks through the practical challenges of production model monitoring, data observability, and tooling trade-offs. Drawing on his path from analyst and product roles at Qubole to field engineering at Imply and now advising customers on observability, Danny defines the MLOps Architect as a technical-business bridge and explains how to prioritize production-first monitoring efforts.

    Topics covered include scope of observability across ETL and data pipelines, data profiling architecture (WhyLogs, profiles, Apache Druid), build vs buy decisions, platform-agnostic integrations and ONNX interoperability, and trends around cloud-native stacks and vendor lock-in. He also offers hiring and career perspectives for MLOps roles and research priorities like fairness and segmentation. Listen to get concrete guidance on designing model monitoring, choosing observability tooling, and identifying upstream root causes so you can reduce incidents and improve model reliability in production" topics: - MLOps - tools @@ -82,7 +82,7 @@ quotableClips: startOffset: 1739 url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1739 endOffset: 1839 -- name: 'Market Education: Shift from "why monitor" to "how to monitor"' +- name: 'Market Education: Shift from "why monitor" to "how to monitor" startOffset: 1839 url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1839 endOffset: 1910 @@ -769,7 +769,7 @@ transcript: sec: 1816 time: '30:16' who: Danny -- header: 'Market Education: Shift from "why monitor" to "how to monitor"' +- header: 'Market Education: Shift from "why monitor" to "how to monitor" - line: Okay. You said you’re trying to be pre-emptive and this is also part of your role, right? You are trying to spread awareness about this problem by talking on different podcasts about model monitoring, why it’s important, what can go diff --git a/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md index 28e79ff4..c8cf07ad 100644 --- a/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md +++ b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md @@ -1,6 +1,6 @@ --- -title: 'Modern Data Pipeline Architecture: Ingestion, Orchestration, Transformation & MLOps Systems' -short: Modern Data Pipelines +title: "Modern Data Pipeline Architecture: Ingestion, Orchestration, Transformation & MLOps Systems" +short: "Modern Data Pipelines" season: 14 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0inhE28kLI4T1AsSjgwnL8?si=WeFES7dXRxqSK_SKonBejw youtube: https://www.youtube.com/watch?v=kSTfhQ_SZgc -description: Master modern data pipelines with dbt transforms and Airflow orchestration—streamline ingestion, speed feature engineering and analytics delivery -intro: How do you build a modern data pipeline that reliably moves raw events through ingestion, dbt transformations, Airflow orchestration and into production ML and analytics? In this episode, Santona Tuli — a former CERN researcher turned ML and data engineering lead at Upsolver — walks through practical patterns and trade-offs for end-to-end pipelines. Drawing on experience from particle-physics event analysis to NLP and workflow authoring with Airflow, Santona explains where ingestion engines and declarative SQL frameworks fit, and when dbt belongs in the stack.

    Topics include Upsolver vs dbt (pipeline authoring, execution engine and ingestion focus), differences between ML pipelines and analytics pipelines, MLOps vs DataOps, and dbt’s role in analytics engineering. We cover tooling (orchestrators, Spark, Kafka/Kinesis, feature stores, vector DBs), modern data stack choices like Snowflake and Databricks, lakehouse and staging patterns, and ingestion pre-processing needs such as deduplication, ordering guarantees and PII masking. You’ll also hear about transformation and data modeling (entities, foreign keys, business mappings), marts and dashboards, feature engineering and model serving, persona-driven pipeline design, and career-learning recommendations. Listen to gain concrete design guidance, tooling trade-offs, and resources to build scalable data and MLOps pipelines +description: "Master modern data pipelines with dbt transforms and Airflow orchestration—streamline ingestion, speed feature engineering and analytics delivery" +intro: "How do you build a modern data pipeline that reliably moves raw events through ingestion, dbt transformations, Airflow orchestration and into production ML and analytics? In this episode, Santona Tuli — a former CERN researcher turned ML and data engineering lead at Upsolver — walks through practical patterns and trade-offs for end-to-end pipelines. Drawing on experience from particle-physics event analysis to NLP and workflow authoring with Airflow, Santona explains where ingestion engines and declarative SQL frameworks fit, and when dbt belongs in the stack.

    Topics include Upsolver vs dbt (pipeline authoring, execution engine and ingestion focus), differences between ML pipelines and analytics pipelines, MLOps vs DataOps, and dbt’s role in analytics engineering. We cover tooling (orchestrators, Spark, Kafka/Kinesis, feature stores, vector DBs), modern data stack choices like Snowflake and Databricks, lakehouse and staging patterns, and ingestion pre-processing needs such as deduplication, ordering guarantees and PII masking. You’ll also hear about transformation and data modeling (entities, foreign keys, business mappings), marts and dashboards, feature engineering and model serving, persona-driven pipeline design, and career-learning recommendations. Listen to gain concrete design guidance, tooling trade-offs, and resources to build scalable data and MLOps pipelines" topics: - data engineering - MLOps diff --git a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md index d84f692c..82dafce6 100644 --- a/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md +++ b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md @@ -1,7 +1,6 @@ --- -title: 'Modern Search Systems: Vector Databases, LLMs and Semantic Retrieval' -short: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search - Technologies' +title: "Modern Search Systems: Vector Databases, LLMs and Semantic Retrieval" +short: "Searching Beyond the Surface: Navigating Challenges and Innovations in Search Technologies" season: 17 episode: 2 guests: @@ -15,25 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/navigating-challenges-and-innovations-in-search/id1541710331?i=1000639476594 spotify: https://open.spotify.com/episode/7mUMvxP4Efyeh0lhF5CvT6?si=7qqKrsMfQxaZy435s3XIEA youtube: https://www.youtube.com/watch?v=_fbe1QyJ1PY -description: 'Learn vector databases, LLMs & semantic retrieval: RAG, embeddings and - vector search tactics to build accurate chatbots, personalized search and better - ranking.' -intro: How do modern search systems combine vector databases, LLMs, and semantic retrieval - to deliver relevant, reliable results—and when should you adopt each component? - In this episode Atita Arora walks through that question from both historical and - practical angles. A long-time contributor to information retrieval projects (including - Apache OpenNLP and Quepid) and author of posts on vectors in e-commerce and the - open-source Chorus implementation, Atita brings hands-on experience plus ongoing - research into evaluating RAG systems and a commitment to user-centric metrics and - inclusivity.

    We cover the evolution from Solr/Lucene and the Semantic Web - era to NLP for query-content matching; practical vector topics such as Qdrant, plug-and-play - vector search, and migration tradeoffs; and end-to-end RAG pipelines—Whisper transcripts, - chunking and embedding strategies, LangChain orchestration, prompt design, citations, - and multi-level evaluation with human-in-the-loop testing. You’ll also hear about - session-based recommendations, personalization approaches, and curated learning - resources like Intro to Information Retrieval and Vector Hub. Listen to gain actionable - guidance on building and evaluating vector search and retrieval-augmented generation - systems while avoiding common pitfalls like LLM hallucinations. +description: "Learn vector databases, LLMs & semantic retrieval: RAG, embeddings and vector search tactics to build accurate chatbots, personalized search and better ranking." +topics: +- NLP +- LLMs +- MLOps +- machine learning +- data engineering +intro: "How do modern search systems combine vector databases, LLMs, and semantic retrieval to deliver relevant, reliable results—and when should you adopt each component? In this episode Atita Arora walks through that question from both historical and practical angles. A long-time contributor to information retrieval projects (including Apache OpenNLP and Quepid) and author of posts on vectors in e-commerce and the open-source Chorus implementation, Atita brings hands-on experience plus ongoing research into evaluating RAG systems and a commitment to user-centric metrics and inclusivity.

    We cover the evolution from Solr/Lucene and the Semantic Web era to NLP for query-content matching; practical vector topics such as Qdrant, plug-and-play vector search, and migration tradeoffs; and end-to-end RAG pipelines—Whisper transcripts, chunking and embedding strategies, LangChain orchestration, prompt design, citations, and multi-level evaluation with human-in-the-loop testing. You’ll also hear about session-based recommendations, personalization approaches, and curated learning resources like Intro to Information Retrieval and Vector Hub. Listen to gain actionable guidance on building and evaluating vector search and retrieval-augmented generation systems while avoiding common pitfalls like LLM hallucinations." dateadded: 2024-01-07 duration: PT00H59M13S quotableClips: diff --git a/_podcast/nlp-dataset-creation-annotation-tools-workflows.md b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md index 06385664..8e78edc2 100644 --- a/_podcast/nlp-dataset-creation-annotation-tools-workflows.md +++ b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md @@ -1,6 +1,6 @@ --- -title: 'Practical Guide to Dataset Creation & Annotation for NLP: Active Learning, Weak Supervision, Tools' -short: Dataset Creation and Curation +title: "Practical Guide to Dataset Creation & Annotation for NLP: Active Learning, Weak Supervision, Tools" +short: "Dataset Creation and Curation" season: 10 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/26K8JrQXKwLpQelo4n4Kdi?si=e2ad35c4941446c4 youtube: https://www.youtube.com/watch?v=QggWydGrWoo -description: 'Discover dataset creation, annotation & active learning: practical annotation UX, quality metrics, prototyping tips and tooling to accelerate NLP models.' -intro: How do you create high-quality NLP datasets without breaking the budget? In this episode Christiaan Swart — an NLP practitioner with six years’ experience across email, complaints, pharma, and sales who cofounded Comtura (born from sales call transcription and CRM integration) — walks through practical methods for dataset creation and annotation.

    We cover automated, manual, and hybrid pipelines; stakeholder alignment to de-risk projects; in-house vs. crowdsourcing trade-offs; and building a living annotation guidebook for ambiguous cases. Chris explains model-assisted annotation (pre-labeling and interpretability layers), capturing expert knowledge, establishing human baselines, and improving annotation UX and productivity. You’ll also hear about annotation quality metrics (inter-annotator agreement, throughput, fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, Label Studio, Snorkel, and Rubrics. Quick-start tips using IPython widgets and Fast.ai, plus privacy and multilingual considerations (GDPR, anonymization), round out the conversation.

    Listen to learn actionable strategies for cost-effective dataset creation, annotation workflows, and tool choices that speed model development and produce reliable training data +description: "Discover dataset creation, annotation & active learning: practical annotation UX, quality metrics, prototyping tips and tooling to accelerate NLP models." +intro: "How do you create high-quality NLP datasets without breaking the budget? In this episode Christiaan Swart — an NLP practitioner with six years’ experience across email, complaints, pharma, and sales who cofounded Comtura (born from sales call transcription and CRM integration) — walks through practical methods for dataset creation and annotation.

    We cover automated, manual, and hybrid pipelines; stakeholder alignment to de-risk projects; in-house vs. crowdsourcing trade-offs; and building a living annotation guidebook for ambiguous cases. Chris explains model-assisted annotation (pre-labeling and interpretability layers), capturing expert knowledge, establishing human baselines, and improving annotation UX and productivity. You’ll also hear about annotation quality metrics (inter-annotator agreement, throughput, fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, Label Studio, Snorkel, and Rubrics. Quick-start tips using IPython widgets and Fast.ai, plus privacy and multilingual considerations (GDPR, anonymization), round out the conversation.

    Listen to learn actionable strategies for cost-effective dataset creation, annotation workflows, and tool choices that speed model development and produce reliable training data" topics: - NLP - data diff --git a/_podcast/nlp-team-hiring-and-production-mlops.md b/_podcast/nlp-team-hiring-and-production-mlops.md index bab136f1..ccb0b3ec 100644 --- a/_podcast/nlp-team-hiring-and-production-mlops.md +++ b/_podcast/nlp-team-hiring-and-production-mlops.md @@ -1,6 +1,6 @@ --- -title: 'Lead NLP Teams: Hiring, Production Pipelines, MLOps & LLM Tradeoffs (GPT-3, spaCy)' -short: Leading NLP Teams +title: "Lead NLP Teams: Hiring, Production Pipelines, MLOps & LLM Tradeoffs (GPT-3, spaCy)" +short: "Leading NLP Teams" season: 6 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0jE1rpmLCYkD3GnUa2E7E3 apple: https://podcasts.apple.com/us/podcast/leading-nlp-teams-ivan-bilan/id1541710331?i=1000546053682 -description: Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 & spaCy tactics to deploy, monitor and scale reliable LLM systems -intro: How do you structure an NLP team and build reliable production pipelines while weighing the tradeoffs between GPT-3 and in-house models? In this episode, Ivan Bilan, Engineering Manager at Personio working on Identity and Access Management, walks through practical answers from his transition from linguistics to production NLP and MLOps.

    We cover hiring and team models (centralized vs cross-disciplinary), what to look for in NLP engineers (tokenization, linguistics, deployment skills), and when to bring in linguists or conversational designers. Ivan breaks down the anatomy of an NLP production pipeline—data annotation, task engineering, testing, deployment, observability—and contrasts using GPT-3 with building in-house pipelines and open-source tools like spaCy and Hugging Face for MVPs. He discusses inference optimization, privacy and bias risks with large language models, benchmarking limits, and practical microservice patterns for data-intensive apps.

    Listen to learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production +description: "Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 & spaCy tactics to deploy, monitor and scale reliable LLM systems" +intro: "How do you structure an NLP team and build reliable production pipelines while weighing the tradeoffs between GPT-3 and in-house models? In this episode, Ivan Bilan, Engineering Manager at Personio working on Identity and Access Management, walks through practical answers from his transition from linguistics to production NLP and MLOps.

    We cover hiring and team models (centralized vs cross-disciplinary), what to look for in NLP engineers (tokenization, linguistics, deployment skills), and when to bring in linguists or conversational designers. Ivan breaks down the anatomy of an NLP production pipeline—data annotation, task engineering, testing, deployment, observability—and contrasts using GPT-3 with building in-house pipelines and open-source tools like spaCy and Hugging Face for MVPs. He discusses inference optimization, privacy and bias risks with large language models, benchmarking limits, and practical microservice patterns for data-intensive apps.

    Listen to learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production" topics: - NLP - machine learning diff --git a/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md index f7fa4f8a..71066dbe 100644 --- a/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md +++ b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md @@ -1,6 +1,6 @@ --- -title: 'From Medicine to Machine Learning: Skill Stacking, Public Learning & Freelance-Driven Career Building' -short: 'From Medicine to Machine Learning: How Public Learning Turned into a Career' +title: "From Medicine to Machine Learning: Skill Stacking, Public Learning & Freelance-Driven Career Building" +short: "From Medicine to Machine Learning: How Public Learning Turned into a Career" season: 21 episode: 3 guests: @@ -14,23 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/22Gc1bDecKA33KHAaSF9fx youtube: https://www.youtube.com/watch?v=5km62e4nDaw -description: Learn how to build a healthcare ML portfolio, land Upwork freelance gigs - and deploy Dockerized models to AWS—practical tips, capstones, and career strategies -intro: How do you go from medical school to shipping production-ready healthcare ML—and - get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine - learning engineer and mentor who transitioned from medicine and criminology into - production ML, walks through the practical steps he used to build a healthcare ML - portfolio and freelance career.

    We cover his career trajectory (statistician - → data analyst → data engineer), the skill progression from SPSS and R to Python, - and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp - and public learning—publishing exercises, leaderboards, and focused content—attracted - interviews and opportunities. He also breaks down portfolio tactics (Notion notes, - capstone projects using healthcare datasets), production topics (Dockerized models, - AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and - soft skills like English communication and handling critique.

    Listeners - will come away with concrete, repeatable strategies for building a healthcare machine - learning portfolio, landing freelance work, and deploying models to the cloud—plus - time-management and mentoring practices that make it sustainable +description: "Learn how to build a healthcare ML portfolio, land Upwork freelance gigs and deploy Dockerized models to AWS—practical tips, capstones, and career strategies" +topics: +- machine learning +- data science +- data engineering +- MLOps +- career transition +intro: "How do you go from medical school to shipping production-ready healthcare ML—and get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine learning engineer and mentor who transitioned from medicine and criminology into production ML, walks through the practical steps he used to build a healthcare ML portfolio and freelance career.

    We cover his career trajectory (statistician → data analyst → data engineer), the skill progression from SPSS and R to Python, and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp and public learning—publishing exercises, leaderboards, and focused content—attracted interviews and opportunities. He also breaks down portfolio tactics (Notion notes, capstone projects using healthcare datasets), production topics (Dockerized models, AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and soft skills like English communication and handling critique.

    Listeners will come away with concrete, repeatable strategies for building a healthcare machine learning portfolio, landing freelance work, and deploying models to the cloud—plus time-management and mentoring practices that make it sustainable" dateadded: 2025-08-22 duration: PT01H01M07S quotableClips: diff --git a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md index 7fb7cb43..e895322b 100644 --- a/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md +++ b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md @@ -1,6 +1,6 @@ --- -title: 'Open Source and Volunteering: Building AI Projects and Career Momentum' -short: Make an Impact Through Volunteering Open Source Work +title: "Open Source and Volunteering: Building AI Projects and Career Momentum" +short: "Make an Impact Through Volunteering Open Source Work" season: 17 episode: 7 guests: @@ -14,25 +14,16 @@ links: apple: https://podcasts.apple.com/us/podcast/make-an-impact-through-volunteering-open-source-work/id1541710331?i=1000646627892 spotify: https://open.spotify.com/episode/7tZSSgv1yAlnoMyB4ggQmb?si=AqDaME2QS26usoZjOEWNtQ youtube: https://www.youtube.com/watch?v=aHdaIwOEI8Q -description: Learn open source volunteering tactics for AI projects - data sourcing, - hackathon MVP strategy, mentorship and portfolio-building to accelerate career momentum. -intro: How can volunteering in open source AI projects accelerate your career while - delivering tangible community impact? In this episode Sara El-Ateif — Google Developer - Expert in Machine Learning, Google PhD Fellow, co-founder of AI Wonder Girls and - Evercoach-certified business coach — walks through practical ways to build skills - and momentum through volunteering and open source work.

    We cover Sara’s - path from early AI interest to PhD research in multimodal learning and medical imaging, - plus lessons from winning a Google PhD Fellowship. Hear concrete volunteer project - case studies — a PTSD chatbot, trash detection, and cervical spine segmentation - — and learn data sourcing tactics using Open Images and creative collection. Sara - explains how to find opportunities (LinkedIn, social media, mailing lists, WIML), - differences between collaboration platforms like Omdena and Fruit Punch AI, and - how women-led groups structure projects.

    Listeners will get actionable - advice on hackathon strategy, MVP mindset under data/compute constraints, pitching - for volunteer roles, building a research network, and the data engineering tasks - that matter (pipelines, dashboards, prep). Tune in to discover how open source and - volunteering translate into practical experience, referrals, and career traction - in machine learning. +description: "Learn open source volunteering tactics for AI projects - data sourcing, hackathon MVP strategy, mentorship and portfolio-building to accelerate career momentum." +topics: +- computer vision +- machine learning +- data engineering +- open-source +- career development +- mentorship +- career growth +intro: "How can volunteering in open source AI projects accelerate your career while delivering tangible community impact? In this episode Sara El-Ateif — Google Developer Expert in Machine Learning, Google PhD Fellow, co-founder of AI Wonder Girls and Evercoach-certified business coach — walks through practical ways to build skills and momentum through volunteering and open source work.

    We cover Sara's path from early AI interest to PhD research in multimodal learning and medical imaging, plus lessons from winning a Google PhD Fellowship. Hear concrete volunteer project case studies — a PTSD chatbot, trash detection, and cervical spine segmentation — and learn data sourcing tactics using Open Images and creative collection. Sara explains how to find opportunities (LinkedIn, social media, mailing lists, WIML), differences between collaboration platforms like Omdena and Fruit Punch AI, and how women-led groups structure projects.

    Listeners will get actionable advice on hackathon strategy, MVP mindset under data/compute constraints, pitching for volunteer roles, building a research network, and the data engineering tasks that matter (pipelines, dashboards, prep). Tune in to discover how open source and volunteering translate into practical experience, referrals, and career traction in machine learning." dateadded: 2024-02-29 duration: PT00H59M34S quotableClips: diff --git a/_podcast/open-source-ml-contributions.md b/_podcast/open-source-ml-contributions.md index a08e78ae..fd367c86 100644 --- a/_podcast/open-source-ml-contributions.md +++ b/_podcast/open-source-ml-contributions.md @@ -1,6 +1,6 @@ --- -title: 'Contribute to Open Source ML: scikit-learn Pipelines, PRs, Docs & Rasa Conversational AI' -short: Getting Started with Open Source +title: "Contribute to Open Source ML: scikit-learn Pipelines, PRs, Docs & Rasa Conversational AI" +short: "Getting Started with Open Source" season: 2 episode: 3 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/1dsbDeVncfsEg3m3cYB927 apple: https://podcasts.apple.com/us/podcast/getting-started-with-open-source-vincent-warmerdam/id1541710331?i=1000507024598 -description: 'Learn open source contribution tactics for scikit-learn pipelines and Rasa: make solid PRs, write docs & tests, boost your OSS skills and career visibility.' -intro: 'How do you start contributing to open source ML projects like scikit-learn pipelines—or move from curious user to confident contributor on Rasa’s conversational AI stack? In this episode, Vincent Warmerdam, Research Advocate at Rasa and creator of The Algorithm Whiteboard and calmcode.io, walks through practical, hands-on advice for contributing to open source ML.

    Vincent shares his career pivot from design student to data scientist and highlights projects (evol, clumper, memo, whatlies, scikit-lego) that illustrate small-tools-to-impact workflows. We deep-dive into scikit-learn–compatible pipeline components, design principles for low-maintenance APIs, and common mistakes such as publishing to PyPI too early. You’ll get a documentation checklist (README, guides, API reference, examples), guidance on filing reproducible issues, and step-by-step preparation for pull requests: testing, CI, packaging, and pre-commit hooks.

    Listeners will leave with concrete strategies for finding the right project, balancing large vs. small repositories, community stewardship and contribution etiquette, and ways OSS work can boost career visibility through talks, blogs, and meetups. If you want actionable next steps for contributing to open source ML, scikit-learn pipelines, PRs, docs, or Rasa conversational AI, this episode maps the path.' +description: "Learn open source contribution tactics for scikit-learn pipelines and Rasa: make solid PRs, write docs & tests, boost your OSS skills and career visibility." +intro: "How do you start contributing to open source ML projects like scikit-learn pipelines—or move from curious user to confident contributor on Rasa’s conversational AI stack? In this episode, Vincent Warmerdam, Research Advocate at Rasa and creator of The Algorithm Whiteboard and calmcode.io, walks through practical, hands-on advice for contributing to open source ML.

    Vincent shares his career pivot from design student to data scientist and highlights projects (evol, clumper, memo, whatlies, scikit-lego) that illustrate small-tools-to-impact workflows. We deep-dive into scikit-learn–compatible pipeline components, design principles for low-maintenance APIs, and common mistakes such as publishing to PyPI too early. You’ll get a documentation checklist (README, guides, API reference, examples), guidance on filing reproducible issues, and step-by-step preparation for pull requests: testing, CI, packaging, and pre-commit hooks.

    Listeners will leave with concrete strategies for finding the right project, balancing large vs. small repositories, community stewardship and contribution etiquette, and ways OSS work can boost career visibility through talks, blogs, and meetups. If you want actionable next steps for contributing to open source ML, scikit-learn pipelines, PRs, docs, or Rasa conversational AI, this episode maps the path." topics: - open-source - data science diff --git a/_podcast/open-source-ml-tools-strategy-and-business-models.md b/_podcast/open-source-ml-tools-strategy-and-business-models.md index a6ebb830..00b5e32a 100644 --- a/_podcast/open-source-ml-tools-strategy-and-business-models.md +++ b/_podcast/open-source-ml-tools-strategy-and-business-models.md @@ -1,6 +1,6 @@ --- -title: 'Open Source ML Tools: Scikit-Learn Governance, Sustainability and Business Models' -short: Working in Open Source - Probabl.ai and sklearn +title: "Open Source ML Tools: Scikit-Learn Governance, Sustainability and Business Models" +short: "Working in Open Source - Probabl.ai and sklearn" season: 18 episode: 4 guests: @@ -14,24 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/working-in-open-source-probabl-ai-and-sklearn-vincent/id1541710331?i=1000654481795 spotify: https://open.spotify.com/episode/0HT3IQOaTXTMH0OdEBnw9s?si=HrLtx7QKT_amZyUbZuqRzQ youtube: https://www.youtube.com/watch?v=UPlIETGwTg8 -description: Discover Scikit-Learn open source business models—learn maintainer strategies, - CI cost optimization and training monetization to build sustainable projects. -intro: How can open source ML tools stay healthy, useful, and financially sustainable - while serving both researchers and industry? In this episode Vincent Warmerdam — - Research Advocate at Rasa, author of the Koaning blog, creator of the Algorithm - Whiteboard playlist, and cofounder of Calm Code — walks through the real-world tradeoffs - of scikit-learn governance, sustainability, and business models for ML tooling. -

    We dig into scikit-learn’s history, NumFOCUS relationships, and the plugin-versus-core - strategy; practical maintainer issues like transitions, motivating volunteers, and - using open source contributions as hiring signals; and the intersection of developer - relations and core engineering. Vincent also explores Calm Code’s low-pressure teaching - philosophy, content and monetization choices, and platform decisions (Django, contributor - hiring). Technical operations topics include CI cost optimization with custom runners - and sustainable compute examples (Leaf.cloud), plus hands-on projects like Skrub’s - table vectorizer and GAP encoder for pragmatic tabular defaults.

    Listeners - will gain actionable insights on governance models, maintaining project health, - and realistic business options — training, consulting, and partnerships — for anyone - building or stewarding open source ML tools. +description: "Discover Scikit-Learn open source business models—learn maintainer strategies, CI cost optimization and training monetization to build sustainable projects." +topics: +- open-source +- machine learning +- data science +- tools +- developer relations + +intro: "How can open source ML tools stay healthy, useful, and financially sustainable while serving both researchers and industry? In this episode Vincent Warmerdam — Research Advocate at Rasa, author of the Koaning blog, creator of the Algorithm Whiteboard playlist, and cofounder of Calm Code — walks through the real-world tradeoffs of scikit-learn governance, sustainability, and business models for ML tooling.

    We dig into scikit-learn's history, NumFOCUS relationships, and the plugin-versus-core strategy; practical maintainer issues like transitions, motivating volunteers, and using open source contributions as hiring signals; and the intersection of developer relations and core engineering. Vincent also explores Calm Code's low-pressure teaching philosophy, content and monetization choices, and platform decisions (Django, contributor hiring). Technical operations topics include CI cost optimization with custom runners and sustainable compute examples (Leaf.cloud), plus hands-on projects like Skrub's table vectorizer and GAP encoder for pragmatic tabular defaults.

    Listeners will gain actionable insights on governance models, maintaining project health, and realistic business options — training, consulting, and partnerships — for anyone building or stewarding open source ML tools." dateadded: 2024-05-06 duration: PT01H15S quotableClips: diff --git a/_podcast/open-source-turned-into-career-and-startup-creation.md b/_podcast/open-source-turned-into-career-and-startup-creation.md index 93a4c2ad..72bbbf37 100644 --- a/_podcast/open-source-turned-into-career-and-startup-creation.md +++ b/_podcast/open-source-turned-into-career-and-startup-creation.md @@ -1,6 +1,6 @@ --- title: "From Developer to Startup Founder: Building a Career Through Open Source" -short: From Open-Source Maintainer to Founder +short: "From Open-Source Maintainer to Founder" season: 9 episode: 8 guests: @@ -15,7 +15,7 @@ links: spotify: https://open.spotify.com/episode/4JAwU2jQuXu4MoMucsE899?si=6ed45b98dd4a415a youtube: https://www.youtube.com/watch?v=bwfR9dyxf1M -description: Discover how to turn open source work into a sustainable career and even a startup. Learn about terminal apps, fundraising, community growth & hiring signals. +description: "Discover how to turn open source work into a sustainable career and even a startup. Learn about terminal apps, fundraising, community growth & hiring signals." intro: "How do you turn open source work into a sustainable career and even a startup? In this episode Will McGugan — a Python open source maintainer and creator of PyFilesystem, Rich, and Textual — walks through his path from video game developer to founder of Textualize. We trace his early projects (BBCode parser, chess libraries), the design of PyFilesystem and S3 integrations, and how solving personal needs led to learning by building.

    Will breaks down the technical and product journey: Rich’s terminal styling, tables, progress bars and observability features; the Textual framework for terminal GUIs; and the moment of founding Textualize after a viral tweet. He explains building in public, community growth via demos and social media, hiring through open source signals, and practical dev workflows with GitHub, PR reviews, and releases. He also outlines Textualize’s positioning and web hosting business model for terminal apps, plus contribution channels like Discourse and Discord.

    Listen to learn concrete, repeatable steps for leveraging Python open source to find freelance freedom, attract users, raise pre-seed interest, and transition from developer to startup founder." topics: - open-source diff --git a/_podcast/personal-brand-for-data-professionals.md b/_podcast/personal-brand-for-data-professionals.md index 29b2b153..7ea73b85 100644 --- a/_podcast/personal-brand-for-data-professionals.md +++ b/_podcast/personal-brand-for-data-professionals.md @@ -1,6 +1,6 @@ --- -title: 'Build a Personal Brand: Publish on LinkedIn/Medium, Grow Audience, Monetize with Online Courses' -short: Personal Branding +title: "Build a Personal Brand: Publish on LinkedIn/Medium, Grow Audience, Monetize with Online Courses" +short: "Personal Branding" season: 2 episode: 8 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/61Yv13MISTeP5nOVAZpY88 apple: https://podcasts.apple.com/us/podcast/personal-branding-admond-lee-kin-lim/id1541710331?i=1000511761026 -description: 'Build a personal brand: publish on LinkedIn & Medium, grow your audience, and monetize with online courses—publishing best practices, course design, growth tips.' -intro: How do you build a personal brand that actually attracts an audience and turns into revenue? In this episode, Admond Lee Kin Lim — data scientist, writer, speaker, and Data Science Instructor at Hackwagon Academy — breaks down a practical path from first posts to monetizing with online courses. Drawing on his experience at Micron and as an independent consultant and communicator featured in KDnuggets and Medium, Admond defines personal brand purpose and positioning, then walks through the first steps and mindset to start publishing on LinkedIn and Medium.

    You’ll get concrete guidance on formats and best practices for LinkedIn and Medium, idea generation and content frequency, and the tools he uses (BuzzSumo, Feedly and alternatives). We also cover when to add podcasting or audio, offline networking (Lunchclub, 1x1s), conference speaking, and overcoming imposter syndrome. Finally, Admond explains monetization strategies for selling online courses, course design focused on student outcomes, aligning content with your values, balancing frequency vs. quality, and metrics for iteration. Listen for actionable tips, tools, and resource recommendations to grow your audience and monetize your expertise +description: "Build a personal brand: publish on LinkedIn & Medium, grow your audience, and monetize with online courses—publishing best practices, course design, growth tips." +intro: "How do you build a personal brand that actually attracts an audience and turns into revenue? In this episode, Admond Lee Kin Lim — data scientist, writer, speaker, and Data Science Instructor at Hackwagon Academy — breaks down a practical path from first posts to monetizing with online courses. Drawing on his experience at Micron and as an independent consultant and communicator featured in KDnuggets and Medium, Admond defines personal brand purpose and positioning, then walks through the first steps and mindset to start publishing on LinkedIn and Medium.

    You’ll get concrete guidance on formats and best practices for LinkedIn and Medium, idea generation and content frequency, and the tools he uses (BuzzSumo, Feedly and alternatives). We also cover when to add podcasting or audio, offline networking (Lunchclub, 1x1s), conference speaking, and overcoming imposter syndrome. Finally, Admond explains monetization strategies for selling online courses, course design focused on student outcomes, aligning content with your values, balancing frequency vs. quality, and metrics for iteration. Listen for actionable tips, tools, and resource recommendations to grow your audience and monetize your expertise" topics: - personal brand - career growth diff --git a/_podcast/postdoc-to-data-science-lead-career-transition.md b/_podcast/postdoc-to-data-science-lead-career-transition.md index 8c325482..6816cf70 100644 --- a/_podcast/postdoc-to-data-science-lead-career-transition.md +++ b/_podcast/postdoc-to-data-science-lead-career-transition.md @@ -1,6 +1,6 @@ --- -title: 'From Postdoc to Data Science Lead: ML Foundations, Docker Deployment & Hiring Tips' -short: Moving from Academia to Industry +title: "From Postdoc to Data Science Lead: ML Foundations, Docker Deployment & Hiring Tips" +short: "Moving from Academia to Industry" season: 6 episode: 6 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5Jvo53ibSoX6rfkfdGq5pJ apple: https://podcasts.apple.com/us/podcast/moving-from-academia-to-industry-cj-jenkins/id1541710331?i=1000544589971 -description: 'Learn a one-year roadmap from postdoc to data science lead: machine learning foundations, Docker deployment, resume and hiring tips to land jobs.' -intro: 'How do you go from a postdoc to a data science lead while mastering machine learning foundations and deployment? In this episode, CJ Jenkins — a PhD-turned-data science lead working on credit risk modeling, with published research and a textbook used in academia — walks through that transition. We trace CJ’s roots in evolutionary biology and genomics, the statistical ML foundations (GLMs, population dynamics), and practical tools like Bash, R, Python, and SQL. Key topics include Docker deployment and bridging the gap between research and production, hiring signals and interview assessment techniques that prioritize learning agility and humility, and concrete career tactics: a one-year Coursera sprint (Johns Hopkins, Andrew Ng), resume rewrites (14 CV iterations), LinkedIn keyword strategy, and selective application versus volume. CJ also discusses location and networking strategies (Berlin, Stockholm, Klarna onboarding), technical expectations for juniors, code quality, and building psychological safety on teams. Listen to learn actionable steps for skills-first resumes, interview preparation, deployment basics, and how to translate academic output into industry impact. Find CJ on LinkedIn for follow-up questions.' +description: "Learn a one-year roadmap from postdoc to data science lead: machine learning foundations, Docker deployment, resume and hiring tips to land jobs." +intro: "How do you go from a postdoc to a data science lead while mastering machine learning foundations and deployment? In this episode, CJ Jenkins — a PhD-turned-data science lead working on credit risk modeling, with published research and a textbook used in academia — walks through that transition. We trace CJ’s roots in evolutionary biology and genomics, the statistical ML foundations (GLMs, population dynamics), and practical tools like Bash, R, Python, and SQL. Key topics include Docker deployment and bridging the gap between research and production, hiring signals and interview assessment techniques that prioritize learning agility and humility, and concrete career tactics: a one-year Coursera sprint (Johns Hopkins, Andrew Ng), resume rewrites (14 CV iterations), LinkedIn keyword strategy, and selective application versus volume. CJ also discusses location and networking strategies (Berlin, Stockholm, Klarna onboarding), technical expectations for juniors, code quality, and building psychological safety on teams. Listen to learn actionable steps for skills-first resumes, interview preparation, deployment basics, and how to translate academic output into industry impact. Find CJ on LinkedIn for follow-up questions." topics: - career transition - machine learning diff --git a/_podcast/practical-devrel-demofirst-education-and-open-source.md b/_podcast/practical-devrel-demofirst-education-and-open-source.md index bf65b9d8..a2bb8727 100644 --- a/_podcast/practical-devrel-demofirst-education-and-open-source.md +++ b/_podcast/practical-devrel-demofirst-education-and-open-source.md @@ -1,7 +1,6 @@ --- -title: 'Developer Advocacy Through Community Impact: Technical Leadership, Open Source - Mentorship & Demo-Driven Communication' -short: From Hackathons To Developer Advocacy +title: "Developer Advocacy Through Community Impact: Technical Leadership, Open Source Mentorship & Demo-Driven Communication" +short: "Developer Advocacy Through Community Impact" season: 20 episode: 8 guests: @@ -15,23 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/from-hackathons-to-developer-advocacy-will-russel/id1541710331?i=1000709634418 spotify: https://open.spotify.com/episode/4Lt785S38GuK0W2m7naRKt youtube: https://www.youtube.com/watch?v=vXbMUfHE1OE -description: Master developer advocacy, open source mentorship & demo-driven communication - to elevate technical leadership, amplify community impact & accelerate adoption. -intro: How do developer advocates create measurable community impact while balancing - technical leadership, mentorship, and clear communication? In this episode Will - Russell, Developer Advocate at Kestra, explores that question through the lens of - workflow orchestration and developer education. Will is known for his technical - video content on workflow orchestration and for building open source education programs - that help new contributors make their first pull requests.

    We cover core - topics including technical leadership in community settings, practical approaches - to open source mentorship, and the power of demo-driven communication and documentation - to make complex tools approachable. Will discusses how creating targeted videos - and clear docs lowers barriers for developers and nurtures sustainable contributor - pipelines.

    Listeners will come away with concrete ideas for designing open - source education programs, using demos to explain concepts, and applying developer - advocacy techniques to grow healthier communities. This episode is useful for developer - advocates, engineering managers, open source maintainers, and anyone interested - in workflow orchestration, developer education, and community-driven technical leadership. +description: "Master developer advocacy, open source mentorship & demo-driven communication to elevate technical leadership, amplify community impact & accelerate adoption." +topics: +- open-source +- computer vision +- tools +- MLOps +- software engineering +intro: "How do developer advocates create measurable community impact while balancing technical leadership, mentorship, and clear communication? In this episode Will Russell, Developer Advocate at Kestra, explores that question through the lens of workflow orchestration and developer education. Will is known for his technical video content on workflow orchestration and for building open source education programs that help new contributors make their first pull requests.

    We cover core topics including technical leadership in community settings, practical approaches to open source mentorship, and the power of demo-driven communication and documentation to make complex tools approachable. Will discusses how creating targeted videos and clear docs lowers barriers for developers and nurtures sustainable contributor pipelines.

    Listeners will come away with concrete ideas for designing open source education programs, using demos to explain concepts, and applying developer advocacy techniques to grow healthier communities. This episode is useful for developer advocates, engineering managers, open source maintainers, and anyone interested in workflow orchestration, developer education, and community-driven technical leadership." dateadded: 2025-05-26 duration: PT01H01M29S quotableClips: @@ -139,7 +129,7 @@ quotableClips: startOffset: 3442 url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3442 endOffset: 3599 -- name: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around"' +- name: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around" startOffset: 3599 url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3599 endOffset: 3689 @@ -1548,7 +1538,7 @@ transcript: sec: 3578 time: '59:38' who: Alexey -- header: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around"' +- header: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around" - line: When I was at the fellowship, managing a group of student leaders, I felt overwhelmed and unsure if I was giving enough support, especially early in my career. diff --git a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md index 097402a1..fe7717b5 100644 --- a/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md +++ b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md @@ -1,6 +1,6 @@ --- -title: 'Launching a Freelance Generative AI Business: NLP Services and Client Acquisition' -short: From a Research Scientist at Amazon to a Machine learning/AI Consultant +title: "Launching a Freelance Generative AI Business: NLP Services and Client Acquisition" +short: "From a Research Scientist at Amazon to a Machine Learning/AI Consultant" season: 16 episode: 5 guests: @@ -14,22 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/from-a-research-scientist-at-amazon-to-a/id1541710331?i=1000634411188 spotify: https://open.spotify.com/episode/7gJI3ds3k1vXd3m3W9iRj9?si=oG6A7BuTSjaEoH6FhvEVug youtube: https://www.youtube.com/watch?v=4RargY8iOaE -description: 'Learn to launch a freelance generative AI business: package NLP services, - master client acquisition and pricing to win projects and scale revenue.' -intro: How do you move from research scientist to running a freelance generative AI - business focused on NLP—and actually win clients? In this episode Verena Weber, - a former Research Scientist at Alexa AI with 7+ years in machine learning and a - background in statistics, walks through that transition and what it takes to offer - NLP services as a freelancer. Verena’s mission is to help companies prepare for - the GenAI shift, and she draws on deep NLP expertise to explain which service offerings - make sense, how to position technical skills for business clients, and practical - approaches to client acquisition in the generative AI space. Listeners will get - a clear view of launching a freelance generative AI business, including how to translate - research experience into marketable NLP services, approaches to finding and engaging - clients, and what to expect when stepping out on your own. If you’re a machine learning - professional or aspiring NLP freelancer trying to build a sustainable freelance - practice in generative AI, this episode provides grounded, experience-based guidance - to help you get started. +description: "Learn to launch a freelance generative AI business: package NLP services, master client acquisition and pricing to win projects and scale revenue." +topics: +- AI +- LLMs +- NLP +- freelance +- production +- career transition +intro: "How do you move from research scientist to running a freelance generative AI business focused on NLP—and actually win clients? In this episode Verena Weber, a former Research Scientist at Alexa AI with 7+ years in machine learning and a background in statistics, walks through that transition and what it takes to offer NLP services as a freelancer. Verena's mission is to help companies prepare for the GenAI shift, and she draws on deep NLP expertise to explain which service offerings make sense, how to position technical skills for business clients, and practical approaches to client acquisition in the generative AI space. Listeners will get a clear view of launching a freelance generative AI business, including how to translate research experience into marketable NLP services, approaches to finding and engaging clients, and what to expect when stepping out on your own. If you're a machine learning professional or aspiring NLP freelancer trying to build a sustainable freelance practice in generative AI, this episode provides grounded, experience-based guidance to help you get started." dateadded: 2023-11-12 date: 2025-11-07 duration: PT00H59M53S diff --git a/_podcast/practical-llm-engineering-and-rag.md b/_podcast/practical-llm-engineering-and-rag.md index ce9ba068..e8c1c95b 100644 --- a/_podcast/practical-llm-engineering-and-rag.md +++ b/_podcast/practical-llm-engineering-and-rag.md @@ -1,6 +1,6 @@ --- -title: 'Practical LLM Engineering and RAG: Prompting, Evaluation and Real-World Workflows' -short: How to Build and Evaluate AI systems in the Age of LLMs +title: "Practical LLM Engineering and RAG: Prompting, Evaluation and Real-World Workflows" +short: "How to Build and Evaluate AI systems in the Age of LLMs" season: 22 episode: 4 guests: @@ -14,24 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/how-to-build-and-evaluate-ai-systems-in-the-age-of/id1541710331?i=1000733350691 spotify: https://open.spotify.com/episode/2RD2qXaYa2ZjKjuIE7Aj6O youtube: https://www.youtube.com/watch?v=eC3RNuI6ow0 -description: 'Discover LLM engineering and RAG best practices: practical prompting, - evaluation methods and deployment workflows to boost accuracy and retrieval.' -intro: How do you move from experimentation to reliable, production-ready LLM engineering - and retrieval-augmented generation (RAG)? In this episode Hugo Bowne-Anderson — - Head of Developer Relations at Outerbounds, longtime data scientist, educator, and - host of Vanishing Gradients — walks through practical patterns for building, evaluating, - and scaling real-world LLM workflows.

    We cover everyday LLM use cases (summaries, - translation, CSV work), prompting best practices (role prompts, structured output, - timestamps), and transcript pipelines using Gemini, Descript, Loom and automation - with GitHub Actions. Hugo explains the generator–evaluator pattern for automated - quality control, how to design evaluation sets and failure analysis, and concrete - chunking strategies (fixed length, sliding windows, context rotation) that unlock - RAG performance. He also discusses when to add tooling or agentic capabilities, - a four-step framework for agents, memory design tradeoffs, and a practical email - assistant example using the Gmail API plus RAG.

    Listen to learn actionable - guidance on prioritizing RAG for quick business wins, building debuggable MVPs with - logging and traces, and setting up evaluation and monitoring so your LLMs deliver - dependable results in production. +description: "Discover LLM engineering and RAG best practices: practical prompting, evaluation methods and deployment workflows to boost accuracy and retrieval." +topics: +- LLMs +- NLP +- MLOps +- tools + +intro: "How do you move from experimentation to reliable, production-ready LLM engineering and retrieval-augmented generation (RAG)? In this episode Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime data scientist, educator, and host of Vanishing Gradients — walks through practical patterns for building, evaluating, and scaling real-world LLM workflows.

    We cover everyday LLM use cases (summaries, translation, CSV work), prompting best practices (role prompts, structured output, timestamps), and transcript pipelines using Gemini, Descript, Loom and automation with GitHub Actions. Hugo explains the generator–evaluator pattern for automated quality control, how to design evaluation sets and failure analysis, and concrete chunking strategies (fixed length, sliding windows, context rotation) that unlock RAG performance. He also discusses when to add tooling or agentic capabilities, a four-step framework for agents, memory design tradeoffs, and a practical email assistant example using the Gmail API plus RAG.

    Listen to learn actionable guidance on prioritizing RAG for quick business wins, building debuggable MVPs with logging and traces, and setting up evaluation and monitoring so your LLMs deliver dependable results in production." dateadded: 2025-10-27 date: 2025-11-07 duration: PT01H01M30S diff --git a/_podcast/practical-llm-use-cases-and-product-patterns.md b/_podcast/practical-llm-use-cases-and-product-patterns.md index 63e6c8db..0462eeaa 100644 --- a/_podcast/practical-llm-use-cases-and-product-patterns.md +++ b/_podcast/practical-llm-use-cases-and-product-patterns.md @@ -1,6 +1,6 @@ --- -title: 'LLM Value Creation: GPT Communities, Business Use Cases & Human-in-the-Loop AI Applications' -short: The Good, the Bad and the Ugly of GPT +title: "LLM Value Creation: GPT Communities, Business Use Cases & Human-in-the-Loop AI Applications" +short: "The Good, the Bad and the Ugly of GPT" season: 15 episode: 4 guests: diff --git a/_podcast/pragmatic-and-standardized-mlops.md b/_podcast/pragmatic-and-standardized-mlops.md index aeb4e8fd..6cfa9458 100644 --- a/_podcast/pragmatic-and-standardized-mlops.md +++ b/_podcast/pragmatic-and-standardized-mlops.md @@ -1,6 +1,6 @@ --- -title: 'Pragmatic MLOps: Build Standardized CI/CD, Model Registries, Monitoring & Org Best Practices' -short: Pragmatic and Standardized MLOps +title: "Pragmatic MLOps: Build Standardized CI/CD, Model Registries, Monitoring & Org Best Practices" +short: "Pragmatic and Standardized MLOps" season: 15 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/5UZPZTDllam3RrbI9sOyqS?si=Ghm1oD8bSFS6l0ULDlatpQ youtube: https://www.youtube.com/watch?v=q3DTR3Od1MA -description: 'Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring to boost reproducibility, deployment reliability, and team productivity.' -intro: 'How do you build pragmatic, standardized MLOps across teams without chasing every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager with roots in econometrics and early work moving from R to Python — tackles MLOps as an organizational challenge, not just a technology problem.

    Maria walks through core, actionable topics: building reusable CI/CD and standardized repos, choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines central MLOps responsibilities — infrastructure, registries, deployment patterns, and monitoring — and contrasts centralized platform teams with embedded feature teams and guardrails. You’ll hear practical advice on moving logic out of notebooks into packages and pipelines, conducting maturity assessments (reproducibility, testing, documentation), and securing DevOps buy-in. The conversation also covers monitoring standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus retail use cases like demand forecasting and personalization.

    Listen to learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring — and how to prioritize organizational change to make MLOps work in production.' +description: "Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring to boost reproducibility, deployment reliability, and team productivity." +intro: "How do you build pragmatic, standardized MLOps across teams without chasing every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager with roots in econometrics and early work moving from R to Python — tackles MLOps as an organizational challenge, not just a technology problem.

    Maria walks through core, actionable topics: building reusable CI/CD and standardized repos, choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines central MLOps responsibilities — infrastructure, registries, deployment patterns, and monitoring — and contrasts centralized platform teams with embedded feature teams and guardrails. You’ll hear practical advice on moving logic out of notebooks into packages and pipelines, conducting maturity assessments (reproducibility, testing, documentation), and securing DevOps buy-in. The conversation also covers monitoring standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus retail use cases like demand forecasting and personalization.

    Listen to learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring — and how to prioritize organizational change to make MLOps work in production." topics: - MLOps dateadded: 2023-09-25 diff --git a/_podcast/product-designer-to-data-product-manager.md b/_podcast/product-designer-to-data-product-manager.md index 90800e9f..14851fc3 100644 --- a/_podcast/product-designer-to-data-product-manager.md +++ b/_podcast/product-designer-to-data-product-manager.md @@ -1,6 +1,6 @@ --- -title: 'How to Transition from Design to Data Product Manager: SQL, Customer Discovery & Data Quality' -short: Becoming a Data Product Manager +title: "How to Transition from Design to Data Product Manager: SQL, Customer Discovery & Data Quality" +short: "Becoming a Data Product Manager" season: 6 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3NZhd5kgQFpGckyxTQH9bF apple: https://podcasts.apple.com/us/podcast/becoming-a-data-product-manager-sara-menefee/id1541710331?i=1000543165093 -description: 'Learn to transition from product design to Data Product Manager: master SQL, customer discovery, build a portfolio and lead analytics products.' -intro: 'How do you move from product design into a data product manager role — and which technical and discovery skills will make that transition practical and persuasive? Sara Menefee, a product manager at Meroxa and former product designer at Sora, Checkr, Change.org, and Zendesk, walks through her path and the concrete steps designers can take to become data-focused PMs.

    This episode covers customer discovery and hypothesis formation, SQL and data engineering fundamentals, and the operational realities of data product management: data quality, PII/compliance, and the data lifecycle from sources to warehouses and apps. Sara explains how design thinking and PM–designer collaboration inform discovery and prioritization, and lays out a transition strategy that emphasizes networking, on-the-job learning, mentorship, and a portfolio built around case-study structure (problem, research, solution, outcome). You’ll also hear practical workflows — standups, analytics, customer development interviews — plus documentation-first practices (PRDs, knowledge bases), resource recommendations (including Reforge), and where ML and data science fit into the PM role.

    Listen for actionable steps, portfolio guidance, and the technical literacy (SQL, documentation, data curiosity) you''ll need to move from design to data product manager.' +description: "Learn to transition from product design to Data Product Manager: master SQL, customer discovery, build a portfolio and lead analytics products." +intro: "How do you move from product design into a data product manager role — and which technical and discovery skills will make that transition practical and persuasive? Sara Menefee, a product manager at Meroxa and former product designer at Sora, Checkr, Change.org, and Zendesk, walks through her path and the concrete steps designers can take to become data-focused PMs.

    This episode covers customer discovery and hypothesis formation, SQL and data engineering fundamentals, and the operational realities of data product management: data quality, PII/compliance, and the data lifecycle from sources to warehouses and apps. Sara explains how design thinking and PM–designer collaboration inform discovery and prioritization, and lays out a transition strategy that emphasizes networking, on-the-job learning, mentorship, and a portfolio built around case-study structure (problem, research, solution, outcome). You’ll also hear practical workflows — standups, analytics, customer development interviews — plus documentation-first practices (PRDs, knowledge bases), resource recommendations (including Reforge), and where ML and data science fit into the PM role.

    Listen for actionable steps, portfolio guidance, and the technical literacy (SQL, documentation, data curiosity) you''ll need to move from design to data product manager." topics: - career transition - product design diff --git a/_podcast/production-ml-mlops-and-data-team-building.md b/_podcast/production-ml-mlops-and-data-team-building.md index b984253d..5d7a2c09 100644 --- a/_podcast/production-ml-mlops-and-data-team-building.md +++ b/_podcast/production-ml-mlops-and-data-team-building.md @@ -1,6 +1,6 @@ --- -title: 'From Analytics to Production ML: Team Building, Experiments, MLOps & Fraud Detection' -short: Similarities and Differences between ML and Analytics +title: "From Analytics to Production ML: Team Building, Experiments, MLOps & Fraud Detection" +short: "Similarities and Differences between ML and Analytics" season: 5 episode: 7 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/19fWdSuxTLwIdzVT45qF9x apple: https://podcasts.apple.com/us/podcast/similarities-and-differences-between-ml-and/id1541710331?i=1000538713607 -description: Master building data teams, deploying production machine learning and MLOps, running A/B experiments and fraud detection to boost model reliability and ROI -intro: How do teams move beyond dashboards to reliable production ML—while organizing people, running experiments, and tackling use cases like fraud detection? In this episode Rishabh Bhargava (7+ years in analytics and ML, former Sales Engineering lead at Datacoral—acquired by Cloudera—and early Primer.ai engineer; MS CS Stanford) walks through the practical bridge from analytics to ML in production.

    We cover data infrastructure and sales-engineering lessons (demos, POCs, integration), early NLP work (summarization, entity extraction), and the differences between prescriptive and predictive analytics. Rishabh explains day-to-day ML operations—models, APIs, SLAs—and the evolution of fraud detection from rule-based systems to machine learning. He digs into experimental workflows (A/B testing, shadow mode), experiment analysis (segmentation, uplift, root cause), and why documentation and analysts’ tribal knowledge matter. We also discuss hiring and team structure—hire data engineers, then analysts, then data scientists—and trade-offs between embedded versus centralized data roles.

    If you’re responsible for data strategy, MLOps, or deploying fraud detection models, this episode provides actionable perspectives on experiments, team building, and moving ML into production +description: "Master building data teams, deploying production machine learning and MLOps, running A/B experiments and fraud detection to boost model reliability and ROI" +intro: "How do teams move beyond dashboards to reliable production ML—while organizing people, running experiments, and tackling use cases like fraud detection? In this episode Rishabh Bhargava (7+ years in analytics and ML, former Sales Engineering lead at Datacoral—acquired by Cloudera—and early Primer.ai engineer; MS CS Stanford) walks through the practical bridge from analytics to ML in production.

    We cover data infrastructure and sales-engineering lessons (demos, POCs, integration), early NLP work (summarization, entity extraction), and the differences between prescriptive and predictive analytics. Rishabh explains day-to-day ML operations—models, APIs, SLAs—and the evolution of fraud detection from rule-based systems to machine learning. He digs into experimental workflows (A/B testing, shadow mode), experiment analysis (segmentation, uplift, root cause), and why documentation and analysts’ tribal knowledge matter. We also discuss hiring and team structure—hire data engineers, then analysts, then data scientists—and trade-offs between embedded versus centralized data roles.

    If you’re responsible for data strategy, MLOps, or deploying fraud detection models, this episode provides actionable perspectives on experiments, team building, and moving ML into production" topics: - machine learning - production @@ -51,7 +51,7 @@ quotableClips: startOffset: 406 url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=406 endOffset: 572 -- name: 'Terminology Problems: The Ambiguity of "Data Science"' +- name: 'Terminology Problems: The Ambiguity of "Data Science" startOffset: 572 url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=572 endOffset: 648 @@ -280,7 +280,7 @@ transcript: sec: 555 time: '9:15' who: Rishabh -- header: 'Terminology Problems: The Ambiguity of "Data Science"' +- header: 'Terminology Problems: The Ambiguity of "Data Science" - line: 'Actually, in the question that I initially put, I wrote ‘data science’. Then you left a comment saying: “Hey, let''s not use ‘science’ here because it''s too ambiguous. It can mean too many things.”' diff --git a/_podcast/production-ml-pipelines-with-aws-and-kafka.md b/_podcast/production-ml-pipelines-with-aws-and-kafka.md index 9a5c421d..b697b074 100644 --- a/_podcast/production-ml-pipelines-with-aws-and-kafka.md +++ b/_podcast/production-ml-pipelines-with-aws-and-kafka.md @@ -1,6 +1,6 @@ --- -title: 'From Notebooks to Production: Build Data Pipelines & Deploy ML (AWS, Kafka, Streaming)' -short: Build Your Own Data Pipeline +title: "From Notebooks to Production: Build Data Pipelines & Deploy ML (AWS, Kafka, Streaming)" +short: "Build Your Own Data Pipeline" season: 4 episode: 2 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0fFRCAYFCReMxEiq2RDVak apple: https://podcasts.apple.com/us/podcast/build-your-own-data-pipeline-andreas-kretz/id1541710331?i=1000527643914 -description: 'Learn to build data pipelines and deploy ML on AWS: productionize notebooks, cut ops risk, choose cost-effective serving and orchestration.' -intro: 'How do you move models out of notebooks and into reliable production data pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas Kretz — the “Plumber of Data Science” — walks through the practical steps engineers and data scientists need to productionize notebooks and deploy ML systems.

    Andreas, a data engineer focused on platform architecture, explains why data engineering demand is rising and why teams should hire both a data scientist and engineer early. We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, message queues). You’ll also hear about inference strategies, SageMaker endpoints vs precomputed predictions, model storage, and operational trade-offs.

    Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, selecting tools, and getting from prototype to production with minimal operational risk. Find practical learning paths and project ideas to accelerate your data engineering skills.' +description: "Learn to build data pipelines and deploy ML on AWS: productionize notebooks, cut ops risk, choose cost-effective serving and orchestration." +intro: "How do you move models out of notebooks and into reliable production data pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas Kretz — the “Plumber of Data Science” — walks through the practical steps engineers and data scientists need to productionize notebooks and deploy ML systems.

    Andreas, a data engineer focused on platform architecture, explains why data engineering demand is rising and why teams should hire both a data scientist and engineer early. We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, message queues). You’ll also hear about inference strategies, SageMaker endpoints vs precomputed predictions, model storage, and operational trade-offs.

    Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, selecting tools, and getting from prototype to production with minimal operational risk. Find practical learning paths and project ideas to accelerate your data engineering skills." topics: - data engineering - machine learning diff --git a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md index 0b7efa54..3a32c013 100644 --- a/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md +++ b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md @@ -1,6 +1,6 @@ --- -title: 'Production ML Search: Embeddings, Hybrid Architectures and Scalable Indexing' -short: Building Machine Learning Products +title: "Production ML Search: Embeddings, Hybrid Architectures and Scalable Indexing" +short: "Building Machine Learning Products" season: 17 episode: 8 guests: @@ -14,24 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/building-machine-learning-products-reem-mahmoud/id1541710331?i=1000649393833 spotify: https://open.spotify.com/episode/4jNredXndQ2b2evgfSmD2G?si=gU2kT-zXSX27hDPgLtwMgQ youtube: https://www.youtube.com/watch?v=m45tNY-8gY8 -description: 'Master vector search, embeddings and hybrid search: scalable indexing, - multimodal retrieval and ranking tactics to boost relevance and reduce latency.' -intro: How do you move from prototypes to production ML search that scales and stays - relevant? In this episode Reem Mahmoud, Director of Data Science at intervu.ai, - breaks down practical approaches to building production ML search systems—focusing - on embeddings, hybrid architectures, and scalable indexing.

    We cover core - concepts like inverted indexes and Lucene basics, candidate generation versus ML - ranking, and why you should avoid hand-rolling indexes. Dive into vector search - fundamentals—embeddings as shared representations, embedding pipelines, and the - trade-offs between vector compute and storage. Learn how multimodal embeddings (text, - images, CLIP) and feature fusion enable richer relevance, and how hybrid search - combines vector similarity with filters, recency, and business constraints. The - episode also explores time encoding in embeddings, query-time weighting, LLMs versus - specialized encoders, vector DB selection, and operationalization—offline tests, - A/B metrics, and enabling engineers for fast iteration.

    Listen for actionable - guidance on scalable indexing strategies, choosing a vector DB, and measuring search - impact so you can design reliable production search that balances latency, relevance, - and business KPIs. +description: "Master vector search, embeddings and hybrid search: scalable indexing, multimodal retrieval and ranking tactics to boost relevance and reduce latency." +topics: +- LLMs +- NLP +- machine learning +- MLOps +- data engineering + +intro: "How do you move from prototypes to production ML search that scales and stays relevant? In this episode Reem Mahmoud, Director of Data Science at intervu.ai, breaks down practical approaches to building production ML search systems—focusing on embeddings, hybrid architectures, and scalable indexing.

    We cover core concepts like inverted indexes and Lucene basics, candidate generation versus ML ranking, and why you should avoid hand-rolling indexes. Dive into vector search fundamentals—embeddings as shared representations, embedding pipelines, and the trade-offs between vector compute and storage. Learn how multimodal embeddings (text, images, CLIP) and feature fusion enable richer relevance, and how hybrid search combines vector similarity with filters, recency, and business constraints. The episode also explores time encoding in embeddings, query-time weighting, LLMs versus specialized encoders, vector DB selection, and operationalization—offline tests, A/B metrics, and enabling engineers for fast iteration.

    Listen for actionable guidance on scalable indexing strategies, choosing a vector DB, and measuring search impact so you can design reliable production search that balances latency, relevance, and business KPIs." dateadded: 2024-03-17 duration: PT01H05M23S quotableClips: diff --git a/_podcast/production-ready-ai-engineering.md b/_podcast/production-ready-ai-engineering.md index e6dda75d..65ce833d 100644 --- a/_podcast/production-ready-ai-engineering.md +++ b/_podcast/production-ready-ai-engineering.md @@ -1,6 +1,6 @@ --- -title: 'Production AI Engineering: Data Pipelines, Prompt Optimization and Caching' -short: Data Intensive AI +title: "Production AI Engineering: Data Pipelines, Prompt Optimization and Caching" +short: "Data Intensive AI" season: 20 episode: 5 guests: @@ -14,22 +14,14 @@ links: apple: https://podcasts.apple.com/us/podcast/data-intensive-ai-bartosz-mikulski/id1541710331?i=1000700288876 spotify: https://open.spotify.com/episode/0nFSU92IQDbM4C9FLvdn4z youtube: https://www.youtube.com/watch?v=BP6w_vKySN0 -description: 'Master production AI engineering: build scalable data pipelines, optimize - prompts, and implement caching to cut latency and costs for production-ready models' -intro: How do you move AI projects from proof-of-concept to reliable production systems - while keeping prompts, pipelines, and response times under control? In this episode - Bartosz Mikulski, an AI and data engineer who specializes in productionizing AI, - breaks down the engineering work required to make models dependable beyond demos. - Bartosz explains how to design robust data pipelines, apply prompt optimization - practices, and introduce caching strategies that reduce load and improve responsiveness. - He also covers building testing infrastructure and using tests to surface issues - that block production readiness—then how to fix those issues. Listeners will get - concrete, engineering-focused insights into production AI, including practical approaches - to pipeline orchestration, prompt tuning for stability, and where caching fits in - an operational stack. Whether you’re responsible for deploying models, improving - inference reliability, or creating reproducible pipelines, this conversation offers - actionable techniques and perspectives for turning experiments into maintainable - production systems. +description: "Master production AI engineering: build scalable data pipelines, optimize prompts, and implement caching to cut latency and costs for production-ready models" +topics: +- data engineering +- AI +- LLMs +- MLOps +- tools +intro: "How do you move AI projects from proof-of-concept to reliable production systems while keeping prompts, pipelines, and response times under control? In this episode Bartosz Mikulski, an AI and data engineer who specializes in productionizing AI, breaks down the engineering work required to make models dependable beyond demos. Bartosz explains how to design robust data pipelines, apply prompt optimization practices, and introduce caching strategies that reduce load and improve responsiveness. He also covers building testing infrastructure and using tests to surface issues that block production readiness—then how to fix those issues. Listeners will get concrete, engineering-focused insights into production AI, including practical approaches to pipeline orchestration, prompt tuning for stability, and where caching fits in an operational stack. Whether you're responsible for deploying models, improving inference reliability, or creating reproducible pipelines, this conversation offers actionable techniques and perspectives for turning experiments into maintainable production systems." dateadded: 2025-03-26 duration: PT01H01M37S quotableClips: @@ -49,7 +41,7 @@ quotableClips: startOffset: 364 url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=364 endOffset: 545 -- name: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct"' +- name: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct" startOffset: 545 url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=545 endOffset: 707 @@ -279,7 +271,7 @@ transcript: sec: 528 time: '8:48' who: Bartosz -- header: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct"' +- header: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct" - line: How did you end up writing a chapter for 97 Things Every Data Engineer Should Know? sec: 545 diff --git a/_podcast/project-manager-to-data-scientist.md b/_podcast/project-manager-to-data-scientist.md index 07109378..02e6a569 100644 --- a/_podcast/project-manager-to-data-scientist.md +++ b/_podcast/project-manager-to-data-scientist.md @@ -1,6 +1,6 @@ --- -title: 'From Project Manager to Data Scientist: Skills, Tools, ML Courses & Job Search' -short: Transitioning from Project Management to Data Science +title: "From Project Manager to Data Scientist: Skills, Tools, ML Courses & Job Search" +short: "Transitioning from Project Management to Data Science" season: 3 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3vF1B2mKwImsVC7h3NIDJW apple: https://podcasts.apple.com/us/podcast/transitioning-from-project-management-to-data-science/id1541710331?i=1000516467544 -description: 'Discover how project managers switch to data science: master machine learning, Python, CRISP-DM, build a portfolio, and land data roles faster.' -intro: 'How do you move from project management into a data science career — and what skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data Scientist at momox GmbH, walks through her transition after four years as a project manager into three years researching fraud and anomaly detection and earning a degree in data analysis. We cover career foundations, the difference between analytics and data science, and a concrete learning strategy: assess strengths, target gaps, and build core skills in programming, statistics, and domain expertise.

    Ksenia outlines recommended coursework (machine learning, time series, graph analysis), online resources including mlcourse.ai, and a practical tools progression from spreadsheets and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP-DM to structure projects, starting as a data analyst to build a portfolio, using Kaggle and community resources (OpenDataScience, DataTalks), and preparing for production with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain specialization (fraud detection, node2vec), realistic job search expectations, part-time learning plans, and essential math topics — a clear roadmap for transitioning to data science.' +description: "Discover how project managers switch to data science: master machine learning, Python, CRISP-DM, build a portfolio, and land data roles faster." +intro: "How do you move from project management into a data science career — and what skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data Scientist at momox GmbH, walks through her transition after four years as a project manager into three years researching fraud and anomaly detection and earning a degree in data analysis. We cover career foundations, the difference between analytics and data science, and a concrete learning strategy: assess strengths, target gaps, and build core skills in programming, statistics, and domain expertise.

    Ksenia outlines recommended coursework (machine learning, time series, graph analysis), online resources including mlcourse.ai, and a practical tools progression from spreadsheets and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP-DM to structure projects, starting as a data analyst to build a portfolio, using Kaggle and community resources (OpenDataScience, DataTalks), and preparing for production with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain specialization (fraud detection, node2vec), realistic job search expectations, part-time learning plans, and essential math topics — a clear roadmap for transitioning to data science." topics: - career transition - project management diff --git a/_podcast/public-speaking-for-data-scientists.md b/_podcast/public-speaking-for-data-scientists.md index 68b46e36..ea8b9979 100644 --- a/_podcast/public-speaking-for-data-scientists.md +++ b/_podcast/public-speaking-for-data-scientists.md @@ -1,6 +1,6 @@ --- -title: 'Public Speaking for Data Scientists: Master AI Evangelism, Storytelling & Keynotes' -short: The Essentials of Public Speaking for Career in Data Science +title: "Public Speaking for Data Scientists: Master AI Evangelism, Storytelling & Keynotes" +short: "The Essentials of Public Speaking for Career in Data Science" season: 2 episode: 10 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/4QWfObiuYmtOCtpSL5LZf9 apple: https://podcasts.apple.com/us/podcast/essentials-public-speaking-for-career-in-data-science/id1541710331?i=1000513669829 -description: 'Master public speaking, AI evangelism & storytelling for data scientists: learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth.' -intro: How do data scientists move from technical deep dives to memorable keynotes and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at DataRobot, breaks down the public speaking playbook for data practitioners who want to persuade, teach, and scale their talks.

    Ben draws on a career from engineering and quant roles through startups and acquisitions to explain the mindset for improvement, practical rehearsal habits, and the positioning and messaging that define AI evangelism. Key topics include crafting repeatable keynotes, avoiding early mistakes like technical overload, using story hooks and warm-ups to capture attention, and structuring talks around 1–3 clear takeaways and calls to action. He also covers introductions that work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling exercises (Pixar lessons), and executive presentations that lead with recommendations while keeping an appendix ready.

    Listeners will find actionable guidance on earning speaking stages, writing conference proposals that push boundaries, Q&A strategies (including how and when to admit unknowns), starter topics for newcomers, and resources like Toastmasters and story practice to build a speaker resume and break into AI evangelism +description: "Master public speaking, AI evangelism & storytelling for data scientists: learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth." +intro: "How do data scientists move from technical deep dives to memorable keynotes and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at DataRobot, breaks down the public speaking playbook for data practitioners who want to persuade, teach, and scale their talks.

    Ben draws on a career from engineering and quant roles through startups and acquisitions to explain the mindset for improvement, practical rehearsal habits, and the positioning and messaging that define AI evangelism. Key topics include crafting repeatable keynotes, avoiding early mistakes like technical overload, using story hooks and warm-ups to capture attention, and structuring talks around 1–3 clear takeaways and calls to action. He also covers introductions that work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling exercises (Pixar lessons), and executive presentations that lead with recommendations while keeping an appendix ready.

    Listeners will find actionable guidance on earning speaking stages, writing conference proposals that push boundaries, Q&A strategies (including how and when to admit unknowns), starter topics for newcomers, and resources like Toastmasters and story practice to build a speaker resume and break into AI evangelism" topics: - developer relations - public speaking diff --git a/_podcast/remote-data-engineering-work-and-building-iot-platforms.md b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md index 638077c1..2eca06d0 100644 --- a/_podcast/remote-data-engineering-work-and-building-iot-platforms.md +++ b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md @@ -1,6 +1,6 @@ --- -title: 'Remote Data Engineering Life: Building IoT Platforms, Career Transitions & Newsletter-Driven Personal Growth' -short: Mastering Data Engineering as a Remote Worker +title: "Remote Data Engineering Life: Building IoT Platforms, Career Transitions & Newsletter-Driven Personal Growth" +short: "Mastering Data Engineering as a Remote Worker" season: 15 episode: 5 guests: @@ -15,7 +15,7 @@ links: spotify: https://open.spotify.com/episode/2RLxjkPbUO3FBfFpKPHzls?si=TVveHW7PQcW7yGbOyJsJpg youtube: https://www.youtube.com/watch?v=UX7UShEioKc -description: 'Navigate remote data engineering after relocation: IoT platform architecture, sensor onboarding workflows, and newsletter-driven personal branding for career growth.' +description: "Navigate remote data engineering after relocation: IoT platform architecture, sensor onboarding workflows, and newsletter-driven personal branding for career growth." intro: "What does it take to thrive as a remote data engineer — building IoT platforms, navigating international career moves, and leveraging writing for professional growth? In this episode, José María Sánchez Salas — a computer scientist turned data engineer and newsletter author — shares his journey from Spain to Norway and the realities of remote IoT platform work.

    We explore the daily life of remote data engineering: work routines, wellness strategies, and Norway's unique hiring landscape with geographic constraints around Oslo, Bergen, and Trondheim. José breaks down IoT platform engineering fundamentals — treating platforms as an 'operating system' for sensors, sensor onboarding workflows, real-time data processing, and solving common IoT challenges like remote diagnostics and business context integration. The conversation covers data exploration patterns, ETL pipeline design, stakeholder communication, and how José uses his newsletter as both a learning tool and career advancement strategy — translating complex technical work for broader audiences and building professional visibility. You'll get actionable insights on job searching across borders, data engineering learning paths, remote work legal considerations, and communication skills that matter for distributed teams. Listen to discover practical approaches for IoT system design, remote team management, and using content creation to accelerate your data engineering career." topics: - data engineering diff --git a/_podcast/research-to-production-ml-systems-roadmap.md b/_podcast/research-to-production-ml-systems-roadmap.md index eceae549..936301bd 100644 --- a/_podcast/research-to-production-ml-systems-roadmap.md +++ b/_podcast/research-to-production-ml-systems-roadmap.md @@ -1,6 +1,6 @@ --- -title: 'From Research to Production: Build Reproducible, Deployable Full-Stack ML Systems' -short: What Researchers and Engineers Can Learn from Each Other +title: "From Research to Production: Build Reproducible, Deployable Full-Stack ML Systems" +short: "What Researchers and Engineers Can Learn from Each Other" season: 5 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0cJJCjK7nX5p1PdeMvGrVL apple: https://podcasts.apple.com/us/podcast/what-researchers-and-engineers-can-learn-from-each/id1541710331?i=1000537258362 -description: 'Learn to build reproducible, deployable full-stack ML systems: deploy models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows.' -intro: How do you move ML work from research notebooks to reproducible, deployable full-stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that exact challenge. We trace Mihail’s path from academic NLP to self-driving and conversational AI, then into hybrid roles that blend hypothesis-driven research with production engineering.

    Key topics include research infrastructure for data collection and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, role fluidity, code reviews for researchers, and practical skills swaps so researchers learn reproducibility and engineers learn experimental rigor.

    Listeners will get concrete guidance on building end-to-end ML systems, improving reproducibility and model deployment, and actionable career advice (internships, reading papers, reproducing models). Tune in to learn practical steps and tools to bridge research to production for real-world ML systems +description: "Learn to build reproducible, deployable full-stack ML systems: deploy models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows." +intro: "How do you move ML work from research notebooks to reproducible, deployable full-stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that exact challenge. We trace Mihail’s path from academic NLP to self-driving and conversational AI, then into hybrid roles that blend hypothesis-driven research with production engineering.

    Key topics include research infrastructure for data collection and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, role fluidity, code reviews for researchers, and practical skills swaps so researchers learn reproducibility and engineers learn experimental rigor.

    Listeners will get concrete guidance on building end-to-end ML systems, improving reproducibility and model deployment, and actionable career advice (internships, reading papers, reproducing models). Tune in to learn practical steps and tools to bridge research to production for real-world ML systems" topics: - machine learning - MLOps @@ -64,7 +64,7 @@ quotableClips: startOffset: 770 url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=770 endOffset: 885 -- name: 'Sourcing Research Questions: Surveys, Citations, and "Future Work"' +- name: 'Sourcing Research Questions: Surveys, Citations, and "Future Work" startOffset: 885 url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=885 endOffset: 1055 @@ -397,7 +397,7 @@ transcript: sec: 834 time: '13:54' who: Mihail -- header: 'Sourcing Research Questions: Surveys, Citations, and "Future Work"' +- header: 'Sourcing Research Questions: Surveys, Citations, and "Future Work" - line: Where do these open-ended questions come from? Do you have to come up with them yourself? Does your professor tell you about them or you work with companies from the industry to find them? How do you come up with these problems? diff --git a/_podcast/responsible-explainable-ai-bias-detection.md b/_podcast/responsible-explainable-ai-bias-detection.md index 84c85a56..0b52097a 100644 --- a/_podcast/responsible-explainable-ai-bias-detection.md +++ b/_podcast/responsible-explainable-ai-bias-detection.md @@ -1,6 +1,6 @@ --- -title: 'Responsible & Explainable AI: Practical Guide to Bias Detection, Fairness & Governance' -short: Responsible and Explainable AI +title: "Responsible & Explainable AI: Practical Guide to Bias Detection, Fairness & Governance" +short: "Responsible and Explainable AI" season: 10 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0xCSjSCG6tTiMSGfUJrMmO youtube: https://www.youtube.com/watch?v=8Eb5mG-pC3o -description: Discover Responsible AI & Explainable AI tactics for bias detection, fairness checks and governance, practical tools to build trustworthy, compliant ML models -intro: How do you detect bias, enforce fairness, and govern AI systems in production without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia and Rutgers — walks through a practical roadmap for responsible AI and explainable AI grounded in real-world examples.

    We define responsible AI and contrast it with post-hoc explainability, then unpack a credit decision bias case to show disparate outcomes in practice. Supreet outlines glass-box explainability techniques, data-level fairness checks (skewness, missingness, coverage), and EDA methods for bias detection. She covers PII handling, feature necessity assessments with SMEs and compliance, and automating data quality and monitoring. You’ll hear tool recommendations — What-If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local interpretability, drift and feedback-loop detection, and trade-offs between accuracy and interpretability.

    Listeners will gain actionable guidance on bias detection, model interpretability, AI governance structures, and managing AutoML and regulated-industry risks — practical steps to make AI systems more fair, transparent, and accountable +description: "Discover Responsible AI & Explainable AI tactics for bias detection, fairness checks and governance, practical tools to build trustworthy, compliant ML models" +intro: "How do you detect bias, enforce fairness, and govern AI systems in production without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia and Rutgers — walks through a practical roadmap for responsible AI and explainable AI grounded in real-world examples.

    We define responsible AI and contrast it with post-hoc explainability, then unpack a credit decision bias case to show disparate outcomes in practice. Supreet outlines glass-box explainability techniques, data-level fairness checks (skewness, missingness, coverage), and EDA methods for bias detection. She covers PII handling, feature necessity assessments with SMEs and compliance, and automating data quality and monitoring. You’ll hear tool recommendations — What-If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local interpretability, drift and feedback-loop detection, and trade-offs between accuracy and interpretability.

    Listeners will gain actionable guidance on bias detection, model interpretability, AI governance structures, and managing AutoML and regulated-industry risks — practical steps to make AI systems more fair, transparent, and accountable" topics: - responsible AI - explainable AI diff --git a/_podcast/scaling-data-engineering-teams-self-service-platforms.md b/_podcast/scaling-data-engineering-teams-self-service-platforms.md index 2ff58494..9ab71fce 100644 --- a/_podcast/scaling-data-engineering-teams-self-service-platforms.md +++ b/_podcast/scaling-data-engineering-teams-self-service-platforms.md @@ -1,6 +1,6 @@ --- -title: 'Scale Data Engineering Teams: Build Self-Service Data Platforms, Hire Senior Engineers & Use Kafka' -short: Growing Data Engineering Team in a Scale-Up +title: "Scale Data Engineering Teams: Build Self-Service Data Platforms, Hire Senior Engineers & Use Kafka" +short: "Growing Data Engineering Team in a Scale-Up" season: 10 episode: 5 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/5DkuaYQpbJ13sU9bknFZnk?si=RtQnTHHYQb-ytMEw8J3e8g youtube: https://www.youtube.com/watch?v=acJ6sVqKOUk -description: 'Master scaling data engineering teams: build self-service data platforms, hire senior engineers, deploy Kafka best practices to boost velocity, onboarding.' -intro: 'How do you scale data engineering teams during hypergrowth without sacrificing quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, orchestration, infrastructure and analytics — walks through practical approaches to scale data engineering teams, build self-service data platforms, hire senior engineers and adopt Kafka-based event streaming.

    We cover what “scale-up” looks like in practice (rapid hiring, product launches, US expansion), the data platform’s role in enabling self-service onboarding and scalability, and a platform anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi also digs into event streaming: Kafka, schema registries and data contracts, plus hiring-for-scale tactics — prioritizing senior experts and niche tech experience — and assessment strategies like reverse interviews. You’ll hear about balancing platform engineering and use-case pipelines, cultivating culture shifts, creating junior learning paths, and growing toward senior roles through proactivity and cross-team impact.

    Listen for concrete guidance on building a self-service data platform, practical Kafka practices, and hiring strategies that help teams move fast while staying reliable.' +description: "Master scaling data engineering teams: build self-service data platforms, hire senior engineers, deploy Kafka best practices to boost velocity, onboarding." +topics: +- data engineering +- data governance +- tools +- open-source +- career development +intro: "How do you scale data engineering teams during hypergrowth without sacrificing quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, orchestration, infrastructure and analytics — walks through practical approaches to scale data engineering teams, build self-service data platforms, hire senior engineers and adopt Kafka-based event streaming.

    We cover what “scale-up” looks like in practice (rapid hiring, product launches, US expansion), the data platform’s role in enabling self-service onboarding and scalability, and a platform anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi also digs into event streaming: Kafka, schema registries and data contracts, plus hiring-for-scale tactics — prioritizing senior experts and niche tech experience — and assessment strategies like reverse interviews. You’ll hear about balancing platform engineering and use-case pipelines, cultivating culture shifts, creating junior learning paths, and growing toward senior roles through proactivity and cross-team impact.

    Listen for concrete guidance on building a self-service data platform, practical Kafka practices, and hiring strategies that help teams move fast while staying reliable." dateadded: 2022-08-29 duration: PT01H01M25S diff --git a/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md index c403ce58..8c1bab0b 100644 --- a/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md +++ b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md @@ -1,6 +1,6 @@ --- -title: 'Scale Enterprise AI: Data-First Strategies, MLOps Best Practices & Realistic Experiments' -short: Lessons Learned About Data & AI at Enterprises +title: "Scale Enterprise AI: Data-First Strategies, MLOps Best Practices & Realistic Experiments" +short: "Lessons Learned About Data & AI at Enterprises" season: 10 episode: 4 guests: @@ -15,8 +15,14 @@ links: spotify: https://open.spotify.com/episode/5t3SwzH17mFjxEoDUx9i5c?si=gaTfOoFnQ7muVkBiYuMxuA youtube: https://www.youtube.com/watch?v=Vms29u9xC3k -description: Discover data-first Enterprise AI strategies and MLOps best practices—learn realistic experiments, CI/CD, governance, and align ML to business impact -intro: 'How do you move from proof-of-concept to scaled enterprise AI without over-investing in hype? In this episode, Alexander Hendorf — head of data and AI at KÖNIGSWEG, PyData chair and Python Software Foundation/EuroPython fellow — walks through pragmatic, data-first strategies for scaling AI across organizations.

    We cover how to align AI initiatives with company goals, run realistic experiments (and why transparent evaluation matters), and set expectations about AI’s limits (the “Beethoven” example). Alexander breaks down a data-first architecture — data lake, BI vs. ML vs. deep learning splits — and explains productionization needs like retraining, feedback loops, and MLOps automation. He shares MLOps best practices: standardization, CI/CD, governance, reproducibility, and warnings about vendor lock-in and consultancy pitfalls. You’ll also hear advice on prioritization over perfection, timing innovation, and choosing platforms that fit long-term team maturity.

    Listen for actionable guidance on experiment design, model evaluation, and building repeatable pipelines so you can scale enterprise AI responsibly and sustainably. Ideal for data leaders, ML engineers, and product owners implementing production ML and MLOps.' +description: "Discover data-first Enterprise AI strategies and MLOps best practices—learn realistic experiments, CI/CD, governance, and align ML to business impact" +topics: +- MLOps +- AI +- machine learning +- data engineering +- open-source +intro: "How do you move from proof-of-concept to scaled enterprise AI without over-investing in hype? In this episode, Alexander Hendorf — head of data and AI at KÖNIGSWEG, PyData chair and Python Software Foundation/EuroPython fellow — walks through pragmatic, data-first strategies for scaling AI across organizations.

    We cover how to align AI initiatives with company goals, run realistic experiments (and why transparent evaluation matters), and set expectations about AI’s limits (the “Beethoven” example). Alexander breaks down a data-first architecture — data lake, BI vs. ML vs. deep learning splits — and explains productionization needs like retraining, feedback loops, and MLOps automation. He shares MLOps best practices: standardization, CI/CD, governance, reproducibility, and warnings about vendor lock-in and consultancy pitfalls. You’ll also hear advice on prioritization over perfection, timing innovation, and choosing platforms that fit long-term team maturity.

    Listen for actionable guidance on experiment design, model evaluation, and building repeatable pipelines so you can scale enterprise AI responsibly and sustainably. Ideal for data leaders, ML engineers, and product owners implementing production ML and MLOps." dateadded: 2022-08-19 duration: PT01H01M25S @@ -50,7 +56,7 @@ quotableClips: startOffset: 991 url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=991 endOffset: 1256 -- name: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit"' +- name: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit" startOffset: 1256 url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1256 endOffset: 1471 @@ -441,7 +447,7 @@ transcript: sec: 1251 time: '20:51' who: Alexey -- header: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit"' +- header: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit" - line: It's an important topic to say, “Hey, there's the index. It's a very important structure when you work with the data. Actually, you can do really cool things for that as well, which are really useful and big timesavers.” That was then. diff --git a/_podcast/software-engineering-for-machine-learning.md b/_podcast/software-engineering-for-machine-learning.md index fd9eb03d..1879b404 100644 --- a/_podcast/software-engineering-for-machine-learning.md +++ b/_podcast/software-engineering-for-machine-learning.md @@ -1,6 +1,6 @@ --- -title: 'Software Engineering for ML: Prevent Hidden Technical Debt with MLOps, Documentation & Team Alignment' -short: Software Engineering for ML +title: "Software Engineering for ML: Prevent Hidden Technical Debt with MLOps, Documentation & Team Alignment" +short: "Software Engineering for ML" season: 13 episode: 5 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/6ElyurOyGfRiCwLGUWOG7f?si=6k0i3XNUSPWd31vsZv4pfA youtube: https://www.youtube.com/watch?v=35Ch8xL2SA8 -description: Learn how to prevent hidden technical debt in ML systems with MLOps, documentation and responsible AI— improve reliability, tests, and team alignment -intro: How do teams prevent hidden technical debt in ML systems before it derails production? In this episode, Nadia Nahar, a PhD student in Software Engineering at Carnegie Mellon University, walks through the software-engineering challenges unique to machine learning and practical steps to reduce long-term costs.

    We cover defining software engineering for ML systems, differences from traditional software (uncertainty, data workflows, monitoring), and the “hidden technical debt” scope. Nadia describes an artifact analysis of open-source ML products (~300 repos), common failure modes (discontinuation, unmet requirements, poor data, deployment gaps), and research methods combining manual review with commit/code scripts. Key topics include requirements alignment, team structures and integration patterns, CRISP-DM vs Agile mismatches, MLOps and engineering support, plus documentation practices (Model Cards, Datasheets, factsheets, checklists). We also discuss responsible AI use cases—explainability needs in healthcare and education, including a classroom game predicting smoking risk—and governance approaches for product-centric fairness.

    Listen to learn concrete remedies—workshops, shared vocabularies, documentation standards, and how to involve ML practitioners from requirements through testing—to prevent hidden technical debt in ML systems +description: "Learn how to prevent hidden technical debt in ML systems with MLOps, documentation and responsible AI— improve reliability, tests, and team alignment" +intro: "How do teams prevent hidden technical debt in ML systems before it derails production? In this episode, Nadia Nahar, a PhD student in Software Engineering at Carnegie Mellon University, walks through the software-engineering challenges unique to machine learning and practical steps to reduce long-term costs.

    We cover defining software engineering for ML systems, differences from traditional software (uncertainty, data workflows, monitoring), and the “hidden technical debt” scope. Nadia describes an artifact analysis of open-source ML products (~300 repos), common failure modes (discontinuation, unmet requirements, poor data, deployment gaps), and research methods combining manual review with commit/code scripts. Key topics include requirements alignment, team structures and integration patterns, CRISP-DM vs Agile mismatches, MLOps and engineering support, plus documentation practices (Model Cards, Datasheets, factsheets, checklists). We also discuss responsible AI use cases—explainability needs in healthcare and education, including a classroom game predicting smoking risk—and governance approaches for product-centric fairness.

    Listen to learn concrete remedies—workshops, shared vocabularies, documentation standards, and how to involve ML practitioners from requirements through testing—to prevent hidden technical debt in ML systems" topics: - software engineering - machine learning diff --git a/_podcast/solopreneur-data-scientist.md b/_podcast/solopreneur-data-scientist.md index ad717ac2..3405ebea 100644 --- a/_podcast/solopreneur-data-scientist.md +++ b/_podcast/solopreneur-data-scientist.md @@ -1,6 +1,6 @@ --- -title: 'Solo Data Scientist Playbook: 90-Day Roadmap, Pipelines, A/B Tests & Prioritization' -short: Introducing Data Science in Startups +title: "Solo Data Scientist Playbook: 90-Day Roadmap, Pipelines, A/B Tests & Prioritization" +short: "Introducing Data Science in Startups" season: 5 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0kGFYX12RgkmZC2lMml6S4 apple: https://podcasts.apple.com/us/podcast/introducing-data-science-in-startups-marianna-diachuk/id1541710331?i=1000536525162 -description: 'Master the solo data scientist 90-day roadmap: prioritize projects, run A/B tests, align stakeholders and deploy models for fast business impact.' -intro: 'How can a solo data scientist deliver measurable impact in the first 90 days? In this episode, Marianna Diachuk — data scientist at Restream, former DataRobot engineer and fintech team lead, and Data Science Lead/mentor with Women Who Code — walks through a practical Solo Data Scientist playbook. You''ll hear a clear 90-day roadmap covering first-week stakeholder interviews and data exploration, first-month research and proofs-of-concept, and first-quarter priorities: building data pipelines, deployment, methodology, and A/B testing. Marianna breaks down company prerequisites (pipelines, engineers, analytics), the experience needed for end-to-end projects, and how to translate business problems into data science work through proactive outreach and prioritization by feasibility, impact, and stakeholder alignment. Topics include churn workflows, reuse and automation to speed iterations, metrics and KPIs for solution selection, experiment design and safe rollouts, plus communicating results through reports and tech talks. Listen for actionable guidance on transitioning from engineering, when to stop projects, hiring signals, an interview readiness checklist, and resources to learn faster and educate your organization.' +description: "Master the solo data scientist 90-day roadmap: prioritize projects, run A/B tests, align stakeholders and deploy models for fast business impact." +intro: "How can a solo data scientist deliver measurable impact in the first 90 days? In this episode, Marianna Diachuk — data scientist at Restream, former DataRobot engineer and fintech team lead, and Data Science Lead/mentor with Women Who Code — walks through a practical Solo Data Scientist playbook. You''ll hear a clear 90-day roadmap covering first-week stakeholder interviews and data exploration, first-month research and proofs-of-concept, and first-quarter priorities: building data pipelines, deployment, methodology, and A/B testing. Marianna breaks down company prerequisites (pipelines, engineers, analytics), the experience needed for end-to-end projects, and how to translate business problems into data science work through proactive outreach and prioritization by feasibility, impact, and stakeholder alignment. Topics include churn workflows, reuse and automation to speed iterations, metrics and KPIs for solution selection, experiment design and safe rollouts, plus communicating results through reports and tech talks. Listen for actionable guidance on transitioning from engineering, when to stop projects, hiring signals, an interview readiness checklist, and resources to learn faster and educate your organization." topics: - data science - startups diff --git a/_podcast/solopreneur-developer-and-data-professional.md b/_podcast/solopreneur-developer-and-data-professional.md index 77a443ea..536fce0b 100644 --- a/_podcast/solopreneur-developer-and-data-professional.md +++ b/_podcast/solopreneur-developer-and-data-professional.md @@ -1,6 +1,6 @@ --- -title: 'Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs' -short: 'Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs' +title: "Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs" +short: "Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs" season: 6 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/264kr8rkSV71NwlU3kphHm apple: https://podcasts.apple.com/us/podcast/becoming-a-solopreneur-in-data-noah-gift/id1541710331?i=1000540908616 -description: Discover solopreneur tactics to build a side-gig tunnel, diversify income mix with courses, teaching and consulting, and quit corporate on your terms -intro: How do you build a sustainable solopreneur business that doesn't rely on VC funding—while diversifying income across courses, consulting, books, and side-gigs? In this episode, Noah Gift, founder of Pragmatic AI Labs and a lecturer on machine learning and data science at Northwestern, Duke MIDS, UC Berkeley, UC Davis, and UNC Charlotte, walks through his transition to solo work (since 2017) and a repeatable income mix for intentional small-business ownership.

    We cover defining solopreneurship, the practical income mix formula (online courses, university teaching, selective consulting, book publishing, apps, real estate, and investments) plus how to build a side-gig tunnel while employed. Noah shares work allocation strategies (exponential projects vs. consulting), publishing trade-offs, a book workflow (outline -> projects -> write), daily routines, time-and-cost tactics, and signals for financial readiness to quit full-time work.

    If you're planning to diversify income streams with online courses, consulting, or writing, this episode gives actionable steps, publishing considerations, and networking advice to help you transition deliberately and scale revenue without sacrificing control +description: "Discover solopreneur tactics to build a side-gig tunnel, diversify income mix with courses, teaching and consulting, and quit corporate on your terms" +intro: "How do you build a sustainable solopreneur business that doesn't rely on VC funding—while diversifying income across courses, consulting, books, and side-gigs? In this episode, Noah Gift, founder of Pragmatic AI Labs and a lecturer on machine learning and data science at Northwestern, Duke MIDS, UC Berkeley, UC Davis, and UNC Charlotte, walks through his transition to solo work (since 2017) and a repeatable income mix for intentional small-business ownership.

    We cover defining solopreneurship, the practical income mix formula (online courses, university teaching, selective consulting, book publishing, apps, real estate, and investments) plus how to build a side-gig tunnel while employed. Noah shares work allocation strategies (exponential projects vs. consulting), publishing trade-offs, a book workflow (outline -> projects -> write), daily routines, time-and-cost tactics, and signals for financial readiness to quit full-time work.

    If you're planning to diversify income streams with online courses, consulting, or writing, this episode gives actionable steps, publishing considerations, and networking advice to help you transition deliberately and scale revenue without sacrificing control" topics: - solopreneurship - entrepreneurship diff --git a/_podcast/teaching-mentoring-data-analytics-fintech.md b/_podcast/teaching-mentoring-data-analytics-fintech.md index 9382b6a1..89d8e31f 100644 --- a/_podcast/teaching-mentoring-data-analytics-fintech.md +++ b/_podcast/teaching-mentoring-data-analytics-fintech.md @@ -1,6 +1,6 @@ --- -title: 'Designing FinTech Data Analytics Curriculum: Fraud Detection, BigQuery Labs & Mentoring' -short: Teaching and Mentoring in Data Analytics +title: "Designing FinTech Data Analytics Curriculum: Fraud Detection, BigQuery Labs & Mentoring" +short: "Teaching and Mentoring in Data Analytics" season: 11 episode: 9 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/0ES2N4yIu61bUB3dY9oxgQ?si=_KFHPXOUQVap8oSBp6AJgA youtube: https://www.youtube.com/watch?v=saaRRzgHsmE -description: 'Discover FinTech data analytics curriculum: fraud detection, BigQuery labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career guidance.' -intro: 'How do you design a FinTech data analytics curriculum that teaches fraud detection, chargeback modeling, and real-world cloud skills while also mentoring diverse learners? In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google data leader, and long-time mentor and teacher — walks through building practical FinTech courses informed by industry experience across Berlin, Amsterdam and the Bay Area.

    We cover curriculum components you can reuse: rule-based vs neural approaches to fraud detection, chargeback modeling, deploying ML in production, and essential business skills for analysts. Irina explains hands-on BigQuery labs, student cloud access strategies, and how to demystify Google Cloud for analysts. She shares mentoring methods (visual explanations, learner-centered teaching), instructor sourcing and storytelling for classroom impact, cohort analysis for product metrics, recruiting women to zoomcamps, and securing technical reviewers.

    Listen to gain actionable guidance on structuring FinTech analytics training, designing cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring practices that help career changers and underrepresented learners succeed in data analytics.' +description: "Discover FinTech data analytics curriculum: fraud detection, BigQuery labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career guidance." +intro: "How do you design a FinTech data analytics curriculum that teaches fraud detection, chargeback modeling, and real-world cloud skills while also mentoring diverse learners? In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google data leader, and long-time mentor and teacher — walks through building practical FinTech courses informed by industry experience across Berlin, Amsterdam and the Bay Area.

    We cover curriculum components you can reuse: rule-based vs neural approaches to fraud detection, chargeback modeling, deploying ML in production, and essential business skills for analysts. Irina explains hands-on BigQuery labs, student cloud access strategies, and how to demystify Google Cloud for analysts. She shares mentoring methods (visual explanations, learner-centered teaching), instructor sourcing and storytelling for classroom impact, cohort analysis for product metrics, recruiting women to zoomcamps, and securing technical reviewers.

    Listen to gain actionable guidance on structuring FinTech analytics training, designing cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring practices that help career changers and underrepresented learners succeed in data analytics." topics: - data analytics - fintech diff --git a/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md index ea0d60e6..72cce4ce 100644 --- a/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md +++ b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md @@ -1,6 +1,6 @@ --- -title: 'Teaching Open Science & Reproducible Research: Research Software Engineering Practices for Academia' -short: Teaching Open Science & Reproducible Research +title: "Teaching Open Science & Reproducible Research: Research Software Engineering Practices for Academia" +short: "Teaching Open Science & Reproducible Research" season: 12 episode: 4 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/3ol91Xt0A6VBbPgFxGh5N6?si=QDcjMCJ7SOG6eJjjYbyEcg youtube: https://www.youtube.com/watch?v=K0PdQITQzVQ -description: 'Master reproducible research for neuroimaging: packaging, MLflow & data sharing to publish reproducible manuscripts, boost citations and career visibility.' -intro: 'How do you teach reproducible research and practical research software engineering (RSE) skills to neuroimaging students and researchers? In this episode, Johanna Bayer — a psychologist-turned-computational neuroscientist completing a PhD in machine learning for clinical neuroimaging at the University of Melbourne and an open science advocate — walks through concrete approaches for teaching reproducible research. We cover course design (Carpentries-style curricula, Git introductions, and reproducible manuscripts with embedded code), guided onboarding to open source (small repos, pull requests, cookiecutter templates), and core coding practices to teach: packaging, environments, formatting, testing, branching and versioning. Johanna also discusses experiment tracking with MLflow, treating software as a research output (DOIs and toolboxes), data sharing realities and sensitive-data practices, and strategies for culture change in labs via hackathons and grassroots efforts. Listeners will gain practical teaching tactics, tooling recommendations, and considerations for infrastructure and academic-industry tensions — plus pointers to resources like The Turing Way, The Carpentries, and the ML Solutions Handbook to help implement reproducible research and RSE practices in neuroimaging projects.' +description: "Master reproducible research for neuroimaging: packaging, MLflow & data sharing to publish reproducible manuscripts, boost citations and career visibility." +intro: "How do you teach reproducible research and practical research software engineering (RSE) skills to neuroimaging students and researchers? In this episode, Johanna Bayer — a psychologist-turned-computational neuroscientist completing a PhD in machine learning for clinical neuroimaging at the University of Melbourne and an open science advocate — walks through concrete approaches for teaching reproducible research. We cover course design (Carpentries-style curricula, Git introductions, and reproducible manuscripts with embedded code), guided onboarding to open source (small repos, pull requests, cookiecutter templates), and core coding practices to teach: packaging, environments, formatting, testing, branching and versioning. Johanna also discusses experiment tracking with MLflow, treating software as a research output (DOIs and toolboxes), data sharing realities and sensitive-data practices, and strategies for culture change in labs via hackathons and grassroots efforts. Listeners will gain practical teaching tactics, tooling recommendations, and considerations for infrastructure and academic-industry tensions — plus pointers to resources like The Turing Way, The Carpentries, and the ML Solutions Handbook to help implement reproducible research and RSE practices in neuroimaging projects." topics: - open science - software engineering diff --git a/_podcast/technical-writing-for-data-scientists.md b/_podcast/technical-writing-for-data-scientists.md index 719cad2e..d1f15318 100644 --- a/_podcast/technical-writing-for-data-scientists.md +++ b/_podcast/technical-writing-for-data-scientists.md @@ -1,6 +1,6 @@ --- -title: 'Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career' -short: 'Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career' +title: "Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career" +short: "Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career" season: 2 episode: 1 guests: @@ -15,8 +15,14 @@ links: spotify: TODO apple: TODO -description: 'Master technical writing for data science with a practical 7-day workflow: outline-first cadence, portfolio tips, docs & distribution to accelerate your career.' -intro: How can technical writing accelerate your data science career in just one week? In this episode, Eugene Yan — an Applied Scientist at Amazon who previously led data science teams at Lazada and uCare.ai and writes about ML in production and career growth — walks through a practical, repeatable 7-day workflow for technical writing tailored to data scientists.

    We cover Eugene’s career transition and first public writing, motivations for sharing work, and how to target readers (peers, future teammates, and hiring managers). He frames writing as a product with a weekly shipping cadence, explains the outline-first method for filtering ideas, and outlines a realistic time budget and editing limits. You’ll get concrete guidance on idea sourcing, title crafting, article length, blogging tools (Medium, Substack, WordPress, Jekyll), writing habits, distribution via Twitter and LinkedIn, and writing at work (press releases, design docs, decision logs). Practical portfolio advice — clear README, quick start, repo tour — and tips to iterate outlines and ship weekly round out the episode.

    Listen to learn a concrete 7-day workflow, documentation and portfolio best practices, and distribution tactics to boost your technical writing and advance your data science career +description: "Master technical writing for data science with a practical 7-day workflow: outline-first cadence, portfolio tips, docs & distribution to accelerate your career." +topics: +- software engineering +- tools +- practices +- communication +- career transition +intro: "How can technical writing accelerate your data science career in just one week? In this episode, Eugene Yan — an Applied Scientist at Amazon who previously led data science teams at Lazada and uCare.ai and writes about ML in production and career growth — walks through a practical, repeatable 7-day workflow for technical writing tailored to data scientists.

    We cover Eugene’s career transition and first public writing, motivations for sharing work, and how to target readers (peers, future teammates, and hiring managers). He frames writing as a product with a weekly shipping cadence, explains the outline-first method for filtering ideas, and outlines a realistic time budget and editing limits. You’ll get concrete guidance on idea sourcing, title crafting, article length, blogging tools (Medium, Substack, WordPress, Jekyll), writing habits, distribution via Twitter and LinkedIn, and writing at work (press releases, design docs, decision logs). Practical portfolio advice — clear README, quick start, repo tour — and tips to iterate outlines and ship weekly round out the episode.

    Listen to learn a concrete 7-day workflow, documentation and portfolio best practices, and distribution tactics to boost your technical writing and advance your data science career" dateadded: 2021-02-23 diff --git a/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md index c32809fe..c35fe327 100644 --- a/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md +++ b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md @@ -1,6 +1,6 @@ --- -title: 'From Theme Parks to Tesla: Building Data Products Through Applied ML and Full-Stack Engineering' -short: 'From Theme Parks to Tesla: Building Data Products That Work' +title: "From Theme Parks to Tesla: Building Data Products Through Applied ML and Full-Stack Engineering" +short: "From Theme Parks to Tesla: Building Data Products That Work" season: 21 episode: 9 guests: @@ -14,24 +14,15 @@ links: apple: https://podcasts.apple.com/us/podcast/from-theme-parks-to-tesla-building-data-products-that-work/id1541710331?i=1000731198436 spotify: https://open.spotify.com/episode/5dpBs4xr3zMkBDw6cTYHQE?si=pivilqeDTHOiNCBb1bFHdA youtube: https://www.youtube.com/watch?v=gXvVMvhfrIY -description: Discover crowd modeling, queue prediction and real-time recommendations - to optimize visitor flow, reduce wait times and boost engagement with smart routing -intro: 'How can theme parks use data to cut wait times and guide visitors in real - time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and - data engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) - and automotive (Tesla) — walks through building systems that optimize visitor flow - using crowd modeling, queue prediction and real-time recommendations.

    We - cover the core problems of modeling crowd dynamics and ride capacity, designing - a next-best-action visitor routing engine, and using behavioral route modeling and - probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: - incentivizing app adoption to collect data, validating recommendations with employee - swiping experiments and A/B tests, and running streaming pipelines for live experiments - and rollout (engagement metrics and accuracy measurement). He also touches on deployment - concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines - — and how these engineering choices affect measurement and user experience.

    - Listen to learn concrete approaches for queue prediction, visitor routing, real-time - processing, and experimentation so you can design and validate systems that improve - throughput and guest satisfaction.' +description: "Discover crowd modeling, queue prediction and real-time recommendations to optimize visitor flow, reduce wait times and boost engagement with smart routing." +topics: +- machine learning +- MLOps +- data engineering +- LLMs +- data science + +intro: "How can theme parks use data to cut wait times and guide visitors in real time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and data engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) and automotive (Tesla) — walks through building systems that optimize visitor flow using crowd modeling, queue prediction and real-time recommendations.

    We cover the core problems of modeling crowd dynamics and ride capacity, designing a next-best-action visitor routing engine, and using behavioral route modeling and probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: incentivizing app adoption to collect data, validating recommendations with employee swiping experiments and A/B tests, and running streaming pipelines for live experiments and rollout (engagement metrics and accuracy measurement). He also touches on deployment concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines — and how these engineering choices affect measurement and user experience.

    Listen to learn concrete approaches for queue prediction, visitor routing, real-time processing, and experimentation so you can design and validate systems that improve throughput and guest satisfaction." dateadded: 2025-10-21 duration: PT01H35S quotableClips: diff --git a/_podcast/trends-in-modern-data-engineering.md b/_podcast/trends-in-modern-data-engineering.md index 1b5435c9..fae50732 100644 --- a/_podcast/trends-in-modern-data-engineering.md +++ b/_podcast/trends-in-modern-data-engineering.md @@ -1,6 +1,6 @@ --- -title: 'Modern Data Engineering: Iceberg, Delta Lake & AI-Powered Pipelines' -short: Trends in Data Engineering +title: "Modern Data Engineering: Iceberg, Delta Lake & AI-Powered Pipelines" +short: "Trends in Data Engineering" season: 20 episode: 3 guests: @@ -14,22 +14,13 @@ links: apple: https://podcasts.apple.com/us/podcast/trends-in-data-engineering-adrian-brudaru/id1541710331?i=1000698294801 spotify: https://open.spotify.com/episode/35QbCW6Evqk1EPMKUDGGdv youtube: https://www.youtube.com/watch?v=AlCFKbFIEM8 -description: Master Iceberg, Delta Lake and AI-powered pipelines to build scalable, - governed data lakehouses—optimize ETL, boost real-time analytics and ML performance. -intro: How can engineering teams build reliable, scalable lakehouse pipelines that - combine transactional table formats with AI-driven automation? In this episode Adrian - Brudaru—an economics-trained analyst turned freelance data practitioner and co-founder - of a data company focused on open source tooling—joins us to explore the realities - of modern data engineering.

    Adrian draws on years of startup and freelance - experience and a current mission to democratise data engineering through open source - to discuss the practical trade-offs between Iceberg and Delta Lake, how table formats - fit into a data lakehouse architecture, and where AI can augment pipeline development - and observability. Key topics include selecting the right table format for versioning - and governance, integrating AI-powered features into ETL/ELT workflows, and the - role of open source tools in scaling data platforms.

    Listen to gain grounded - perspectives on Iceberg, Delta Lake, AI-powered pipelines, and data pipeline best - practices—especially useful for data engineers, architects, and engineering managers - evaluating lakehouse strategies or looking to adopt open source solutions. +description: "Master Iceberg, Delta Lake and AI-powered pipelines to build scalable, governed data lakehouses—optimize ETL, boost real-time analytics and ML performance." +topics: +- data engineering +- data governance +- AI +- open-source +intro: "How can engineering teams build reliable, scalable lakehouse pipelines that combine transactional table formats with AI-driven automation? In this episode Adrian Brudaru—an economics-trained analyst turned freelance data practitioner and co-founder of a data company focused on open source tooling—joins us to explore the realities of modern data engineering.

    Adrian draws on years of startup and freelance experience and a current mission to democratise data engineering through open source to discuss the practical trade-offs between Iceberg and Delta Lake, how table formats fit into a data lakehouse architecture, and where AI can augment pipeline development and observability. Key topics include selecting the right table format for versioning and governance, integrating AI-powered features into ETL/ELT workflows, and the role of open source tools in scaling data platforms.

    Listen to gain grounded perspectives on Iceberg, Delta Lake, AI-powered pipelines, and data pipeline best practices—especially useful for data engineers, architects, and engineering managers evaluating lakehouse strategies or looking to adopt open source solutions." dateadded: 2025-03-14 duration: PT01H02M16S quotableClips: diff --git a/_podcast/urban-data-science.md b/_podcast/urban-data-science.md index 2367e004..98b31095 100644 --- a/_podcast/urban-data-science.md +++ b/_podcast/urban-data-science.md @@ -1,6 +1,6 @@ --- -title: 'Urban Data Science: Transport Analytics, Sensors and Liveable Cities' -short: Using Data to Create Liveable Cities +title: "Urban Data Science: Transport Analytics, Sensors and Liveable Cities" +short: "Using Data to Create Liveable Cities" season: 19 episode: 1 guests: @@ -14,24 +14,13 @@ links: apple: https://podcasts.apple.com/us/podcast/using-data-to-create-liveable-cities-rachel-lim/id1541710331?i=1000675373908 spotify: https://open.spotify.com/episode/1z7jdogto8i4Zk6Zh1vDxE?si=KCg2Iq1US0SKwFCKasGqUg youtube: https://www.youtube.com/watch?v=VXQIGHUWeL0 -description: Discover urban data science, transport analytics & sensors for livable - cities - real-time monitoring, fare-card insights, data pipelines, AI tools. -intro: 'How can cities use transport analytics, sensors and AI to become more liveable? - In this episode Rachel Lim, an urban data scientist with a geography background - and a master’s in urban data science, walks through practical ways data informs - transport planning and placemaking. We cover core data sources—GPS, sensors, fare - card systems, ride-hailing logs and computer vision for passenger flow—plus travel - demand forecasting, real-time monitoring (including event analytics like F1), and - operational responses such as traffic marshals and recovery services.

    Rachel - explains data engineering realities—Kafka, Apache Spark, real-time APIs, data pipelines - and warehousing—alongside journey logic, fare computation and data quality management. - She also explores emerging tools: generative AI for natural-language access, text-to-SQL - architectures, synthetic data, and privacy practices for publishing masked datasets. - The conversation highlights Singapore’s planning context, open data portals (data.gov.sg, - DataMall), and project ideas for learners using parking and taxi datasets.

    - Listen to learn which transport analytics and sensor strategies produce actionable - insights, how to set up robust data pipelines, and where to start hands-on projects - to build liveable cities.' +description: "Discover urban data science, transport analytics & sensors for livable cities - real-time monitoring, fare-card insights, data pipelines, AI tools." +topics: +- data engineering +- LLMs +- computer vision +- data science +intro: "How can cities use transport analytics, sensors and AI to become more liveable? In this episode Rachel Lim, an urban data scientist with a geography background and a master's in urban data science, walks through practical ways data informs transport planning and placemaking. We cover core data sources—GPS, sensors, fare card systems, ride-hailing logs and computer vision for passenger flow—plus travel demand forecasting, real-time monitoring (including event analytics like F1), and operational responses such as traffic marshals and recovery services.

    Rachel explains data engineering realities—Kafka, Apache Spark, real-time APIs, data pipelines and warehousing—alongside journey logic, fare computation and data quality management. She also explores emerging tools: generative AI for natural-language access, text-to-SQL architectures, synthetic data, and privacy practices for publishing masked datasets. The conversation highlights Singapore's planning context, open data portals (data.gov.sg, DataMall), and project ideas for learners using parking and taxi datasets.

    Listen to learn which transport analytics and sensor strategies produce actionable insights, how to set up robust data pipelines, and where to start hands-on projects to build liveable cities." dateadded: 2024-11-06 duration: PT00H51M32S quotableClips: diff --git a/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md index cc0e988c..84f10470 100644 --- a/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md +++ b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md @@ -1,6 +1,6 @@ --- -title: 'Using Visualizations to Explain Machine Learning: Build Intuition with kDimensions, Figma & Templates' -short: Using Visualizations to Explain Machine Learning +title: "Using Visualizations to Explain Machine Learning: Build Intuition with kDimensions, Figma & Templates" +short: "Using Visualizations to Explain Machine Learning" season: 8 episode: 1 guests: @@ -15,8 +15,8 @@ links: spotify: https://open.spotify.com/episode/032NhEphm5QDdDFDUIypOL youtube: https://www.youtube.com/watch?v=OuCuk-7RHjM -description: Discover kDimensions and Figma templates to visualize machine learning, build intuition before the math, map ML problems, and create shareable visuals -intro: 'How do you teach machine learning so people build intuition before diving into math? In this episode, Meor Amer—educator, author, and Developer Relations at Cohere—walks through a visual-first approach to machine learning that makes concepts accessible and actionable. Drawing on his journey from bioengineering and telecom analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, Meor explains why visual machine learning and dimensionality reduction matter and how templates can scale understanding.

    We cover practical workflows: generating ideas (visualize the verb, use metaphors like the catapult and airplane), design constraints that spark creativity, and a sketchbook → Figma pipeline for engineers that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn visuals, how to map ML problems (classification, regression, clustering, anomaly, RL) to templates, and hands-on learning techniques—consume with intent, break and modify code. He also discusses monetizing visual design services and turning articles into key visuals using 4–5 keywords.

    Listen to learn concrete techniques for ML visualization, Figma for engineers, and creating reusable templates that build intuition and make machine learning teachable.' +description: "Discover kDimensions and Figma templates to visualize machine learning, build intuition before the math, map ML problems, and create shareable visuals" +intro: "How do you teach machine learning so people build intuition before diving into math? In this episode, Meor Amer—educator, author, and Developer Relations at Cohere—walks through a visual-first approach to machine learning that makes concepts accessible and actionable. Drawing on his journey from bioengineering and telecom analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, Meor explains why visual machine learning and dimensionality reduction matter and how templates can scale understanding.

    We cover practical workflows: generating ideas (visualize the verb, use metaphors like the catapult and airplane), design constraints that spark creativity, and a sketchbook → Figma pipeline for engineers that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn visuals, how to map ML problems (classification, regression, clustering, anomaly, RL) to templates, and hands-on learning techniques—consume with intent, break and modify code. He also discusses monetizing visual design services and turning articles into key visuals using 4–5 keywords.

    Listen to learn concrete techniques for ML visualization, Figma for engineers, and creating reusable templates that build intuition and make machine learning teachable." topics: - machine learning - education diff --git a/scripts/generate_topics_podcasts.py b/scripts/generate_topics_podcasts.py new file mode 100755 index 00000000..e6a030f5 --- /dev/null +++ b/scripts/generate_topics_podcasts.py @@ -0,0 +1,579 @@ +#!/usr/bin/env python3 +""" +Generate topics for podcast episodes using OpenAI API. + +This script takes a podcast page, extracts quotableClips from the frontmatter, +and generates relevant topics based on the episode content. + +Usage: + python generate_topics_podcasts.py [--update] [--api-key YOUR_KEY] + python generate_topics_podcasts.py --all-in-dir _podcast/ --update + python generate_topics_podcasts.py --file-list podcasts.txt --update + +Example: + python generate_topics_podcasts.py _podcast/s01e02-processes.md --update +""" + +import os +import sys +import argparse +import yaml +import re +from pathlib import Path +from typing import List +from openai import OpenAI + + +# Standard topic list - curated from existing podcasts +DEFAULT_TOPIC_LIST = [ + "machine learning", + "MLOps", + "data engineering", + "data science", + "AI", + "LLMs", + "career growth", + "career transition", + "leadership", + "freelance", + "tools", + "open-source", + "startups", + "production", + "team building", + "data governance", + "software engineering", + "entrepreneurship", + "job search", + "data analytics", + "data strategy", + "product management", + "communication", + "practices", + "bioinformatics", + "computer vision", + "NLP", + "consulting", + "remote work", + "career development", +] + + +def load_topic_list(): + """Load topic list from file if it exists, otherwise use default.""" + script_dir = Path(__file__).parent + topics_file = script_dir / 'topic_list.txt' + + if topics_file.exists(): + try: + with open(topics_file, 'r', encoding='utf-8') as f: + topics = [line.strip() for line in f if line.strip()] + print(f"Loaded {len(topics)} topics from {topics_file.name}") + return topics + except Exception as e: + print(f"Warning: Could not load topic list from file: {e}") + return DEFAULT_TOPIC_LIST + else: + return DEFAULT_TOPIC_LIST + + +# Load topic list (will use file if exists, otherwise default) +TOPIC_LIST = load_topic_list() + + +DEFAULT_PROMPT = """You are an expert at categorizing technical podcast content based on topics. + +You are given episode quotable clips showing key topics and discussion flow from a podcast episode. + +TASK: Based on the podcast quotable clips, select 3-5 most relevant topics from the provided topic list. + +AVAILABLE TOPICS: +{topic_list} + +REQUIREMENTS: +- You MUST choose topics from the available topics list OR generate new topics that are relevant to the episode content and are of the similar level of specificity as the available topics. +- Select between 3 to 5 topics that best represent the episode content +- Choose topics that are most prominently discussed in the quotable clips +- Prioritize specific technical topics over general career topics when both are present +- If the episode clearly focuses on a technology/practice, include that +- If career journey or transition is a significant theme, include relevant career topics +- Consider both the depth and breadth of coverage when selecting topics + +QUOTABLE CLIPS: +{clips_content} + +OUTPUT FORMAT: Return ONLY a valid YAML list of topics, nothing else. +Example format: +- machine learning +- MLOps +- career transition +- leadership +- tools +""" + + +def get_quotable_clips(podcast_file_path): + """Extract quotableClips from podcast frontmatter.""" + with open(podcast_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Parse frontmatter + match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL) + if not match: + return None + + try: + frontmatter_data = yaml.safe_load(match.group(1)) + clips = frontmatter_data.get('quotableClips', []) + + if not clips: + return None + + # Format clips as text + clips_text = [] + for clip in clips: + name = clip.get('name', '') + # Convert startOffset (seconds) to timestamp format + start_seconds = clip.get('startOffset', 0) + hours = start_seconds // 3600 + minutes = (start_seconds % 3600) // 60 + seconds = start_seconds % 60 + timestamp = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + + clips_text.append(f"{timestamp} {name}") + + return '\n'.join(clips_text) + + except Exception as e: + print(f"Error parsing frontmatter: {e}") + return None + + +def generate_topics(clips_content, api_key=None): + """Generate topics using OpenAI API.""" + # Initialize OpenAI client + if api_key: + client = OpenAI(api_key=api_key) + else: + # Will use OPENAI_API_KEY environment variable + client = OpenAI() + + # Format the topic list for the prompt + topic_list_formatted = '\n'.join([f"- {topic}" for topic in TOPIC_LIST]) + + # Format the prompt with all the information + prompt = DEFAULT_PROMPT.format( + topic_list=topic_list_formatted, + clips_content=clips_content, + ) + + print(f"Prompt size: {len(prompt)} characters") + print(f" - Clips: {len(clips_content)} characters") + print(f" - Available topics: {len(TOPIC_LIST)}") + print() + + # Call OpenAI API + response = client.responses.create( + model="gpt-5-mini", # Using gpt-5-mini + input=prompt, + ) + + topics_yaml = response.output_text.strip() + + # Remove markdown code blocks if present + topics_yaml = re.sub(r'^```ya?ml\s*\n', '', topics_yaml) + topics_yaml = re.sub(r'\n```\s*$', '', topics_yaml) + + # Parse the YAML to get list of topics + try: + topics = yaml.safe_load(topics_yaml) + if not isinstance(topics, list): + raise ValueError("Response is not a list") + + # Process and validate topics + valid_topics = [] + new_topics = [] + + for topic in topics: + # Normalize and check + topic_clean = topic.strip() + + if topic_clean in TOPIC_LIST: + valid_topics.append(topic_clean) + else: + # New topic - accept it if it's reasonable + print(f" New topic found: '{topic_clean}'") + valid_topics.append(topic_clean) + new_topics.append(topic_clean) + + return valid_topics, new_topics + except Exception as e: + print(f"Error parsing topics: {e}") + print(f"Raw response:\n{topics_yaml}") + return [], [] + + +def update_podcast_file(file_path, topics): + """Update the podcast file with the generated topics.""" + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Parse frontmatter + match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL) + if not match: + print("Error: Could not find frontmatter") + return False + + frontmatter_str = match.group(1) + body = content[match.end():] + + # Parse YAML frontmatter + try: + frontmatter_data = yaml.safe_load(frontmatter_str) + except Exception as e: + print(f"Error parsing frontmatter YAML: {e}") + return False + + # Update or add topics field + frontmatter_data['topics'] = topics + + # Rebuild the file content + # We need to maintain formatting, so we'll find the right place to insert/update topics + lines = frontmatter_str.split('\n') + + # Find if topics already exists + topics_line_idx = None + topics_end_idx = None + for i, line in enumerate(lines): + if line.strip().startswith('topics:'): + topics_line_idx = i + # Find the end of topics list + for j in range(i + 1, len(lines)): + if lines[j] and not lines[j].startswith('-') and not lines[j].startswith(' '): + topics_end_idx = j + break + else: + topics_end_idx = len(lines) + break + + # Format topics as YAML list + topics_yaml_lines = ['topics:'] + for topic in topics: + topics_yaml_lines.append(f'- {topic}') + + # Insert or replace topics + if topics_line_idx is not None: + # Replace existing topics + new_lines = lines[:topics_line_idx] + topics_yaml_lines + lines[topics_end_idx:] + else: + # Find a good place to insert (after description or intro if they exist) + insert_idx = len(lines) + for i, line in enumerate(lines): + if line.strip().startswith('dateadded:'): + insert_idx = i + break + elif line.strip().startswith('description:'): + insert_idx = i + 1 + break + elif line.strip().startswith('intro:'): + # Skip multiline intro + for j in range(i + 1, len(lines)): + if lines[j] and not lines[j].startswith(' ') and ':' in lines[j]: + insert_idx = j + break + break + + new_lines = lines[:insert_idx] + topics_yaml_lines + lines[insert_idx:] + + # Reconstruct content + new_frontmatter = '\n'.join(new_lines) + new_content = f"---\n{new_frontmatter}\n---{body}" + + # Write back to file + with open(file_path, 'w', encoding='utf-8') as f: + f.write(new_content) + + return True + + +def get_project_root(): + """Get the project root directory (parent of scripts directory).""" + script_dir = Path(__file__).parent + return script_dir.parent + + +def resolve_podcast_path(podcast_file: str) -> Path: + """Resolve podcast file path relative to project root.""" + file_path = Path(podcast_file) + if file_path.exists(): + return file_path + + # Try relative to project root + project_root = get_project_root() + file_path = project_root / podcast_file + if file_path.exists(): + return file_path + + return None + + +def get_podcast_files_from_args(args) -> List[Path]: + """Get list of podcast files from command line arguments.""" + files = [] + + if args.file_list: + # Read from file + list_file = Path(args.file_list) + if not list_file.exists(): + project_root = get_project_root() + list_file = project_root / args.file_list + + if not list_file.exists(): + print(f"Error: File list not found: {args.file_list}", file=sys.stderr) + sys.exit(1) + + with open(list_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + file_path = resolve_podcast_path(line) + if file_path: + files.append(file_path) + else: + print(f"Warning: File not found: {line}", file=sys.stderr) + + elif args.all_in_dir: + # Get all .md files in directory + project_root = get_project_root() + dir_path = Path(args.all_in_dir) + if not dir_path.is_absolute(): + dir_path = project_root / args.all_in_dir + + if not dir_path.exists(): + print(f"Error: Directory not found: {args.all_in_dir}", file=sys.stderr) + sys.exit(1) + + files = sorted(dir_path.glob('*.md')) + # Exclude template file + files = [f for f in files if f.name != '_template.md'] + + else: + # From command line arguments + for podcast_file in args.podcast_files: + file_path = resolve_podcast_path(podcast_file) + if file_path: + files.append(file_path) + else: + print(f"Error: File not found: {podcast_file}", file=sys.stderr) + + return files + + +def check_existing_topics(podcast_file: Path) -> bool: + """Check if podcast already has topics.""" + with open(podcast_file, 'r', encoding='utf-8') as f: + content = f.read() + + match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL) + if match: + try: + frontmatter_data = yaml.safe_load(match.group(1)) + topics = frontmatter_data.get('topics', []) + return bool(topics) + except: + pass + + return False + + +def process_podcast_file(podcast_file: Path, api_key: str = None, update: bool = False, + dry_run: bool = False, skip_existing: bool = False) -> tuple: + """Process a single podcast file to generate and optionally update topics. + + Returns: + tuple: (success: bool, new_topics: list) + """ + print(f"Processing: {podcast_file.name}") + print("-" * 60) + + try: + # Check if already has topics + if skip_existing and check_existing_topics(podcast_file): + print(f"Skipping: Already has topics") + print() + return True, [] + + # Get quotable clips from frontmatter + clips_content = get_quotable_clips(podcast_file) + if not clips_content: + print(f"Warning: No quotableClips found in frontmatter", file=sys.stderr) + print(f"Skipping this file.") + print() + return True, [] + + print(f"Found {len(clips_content.splitlines())} quotable clips") + print() + print("Generating topics...") + print() + + # Generate topics + topics, new_topics = generate_topics(clips_content, api_key=api_key) + + if not topics: + print("Error: No valid topics generated") + return False, [] + + print(f"Generated {len(topics)} topics:") + for topic in topics: + print(f" - {topic}") + print() + + # Update file if requested + if update: + if dry_run: + print("\n[DRY RUN] Would update the file with these topics") + return True, new_topics + else: + success = update_podcast_file(podcast_file, topics) + if success: + print(f"\n✓ File updated successfully!") + return True, new_topics + else: + print(f"\n✗ Failed to update file", file=sys.stderr) + return False, [] + else: + print("\nTo update the file, run with --update flag") + return True, new_topics + + except Exception as e: + print(f"\nError: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + return False, [] + + +def main(): + parser = argparse.ArgumentParser( + description='Generate topics for podcast episodes using OpenAI API based on quotableClips', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate topics and display them + python generate_topics_podcasts.py _podcast/building-ml-platform.md + + # Generate and update the file + python generate_topics_podcasts.py _podcast/building-ml-platform.md --update + + # Process multiple files + python generate_topics_podcasts.py _podcast/episode1.md _podcast/episode2.md --update + + # Process all files in a directory (skip those with existing topics) + python generate_topics_podcasts.py --all-in-dir _podcast/ --update --skip-existing + + # Read file list from a text file + python generate_topics_podcasts.py --file-list podcasts_without_topics.txt --update + + # Use custom API key + python generate_topics_podcasts.py _podcast/building-ml-platform.md --api-key sk-... + + # Dry run to see what would be done + python generate_topics_podcasts.py --all-in-dir _podcast/ --dry-run + +Note: This script reads quotableClips from the podcast frontmatter. + Files without quotableClips will be skipped. + """ + ) + + parser.add_argument('podcast_files', nargs='*', help='Podcast markdown files to process') + parser.add_argument('--file-list', help='Text file containing list of podcast files (one per line)') + parser.add_argument('--all-in-dir', help='Process all .md files in the specified directory') + parser.add_argument('--update', action='store_true', help='Update the file with generated topics') + parser.add_argument('--skip-existing', action='store_true', help='Skip files that already have topics') + parser.add_argument('--api-key', help='OpenAI API key (or set OPENAI_API_KEY env var)') + parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes') + + args = parser.parse_args() + + # Validate arguments + if not args.podcast_files and not args.file_list and not args.all_in_dir: + parser.error("Must provide podcast files, --file-list, or --all-in-dir") + + # Get list of files to process + files = get_podcast_files_from_args(args) + + if not files: + print("Error: No valid podcast files found", file=sys.stderr) + sys.exit(1) + + print(f"Found {len(files)} podcast file(s) to process") + if args.skip_existing: + print("[Skipping files with existing topics]") + if args.dry_run: + print("[DRY RUN MODE - No changes will be made]") + print() + + # Use API key from environment if not provided + api_key = args.api_key or os.getenv('OPENAI_API_KEY') + + # Process each file + successful = 0 + failed = 0 + all_new_topics = [] + + for i, podcast_file in enumerate(files, 1): + print(f"\n[{i}/{len(files)}] ", end='') + success, new_topics = process_podcast_file( + podcast_file, + api_key=api_key, + update=args.update, + dry_run=args.dry_run, + skip_existing=args.skip_existing + ) + + if success: + successful += 1 + if new_topics: + all_new_topics.extend(new_topics) + else: + failed += 1 + + # Summary + print("\n" + "=" * 60) + print(f"Summary: {successful} successful, {failed} failed") + + # Report new topics found + if all_new_topics: + unique_new_topics = sorted(set(all_new_topics)) + print(f"\n🆕 Found {len(unique_new_topics)} new topics:") + for topic in unique_new_topics: + print(f" - {topic}") + + # Update the topic list file + script_dir = Path(__file__).parent + topics_file = script_dir / 'topic_list.txt' + + # Read existing topics + existing_topics = list(TOPIC_LIST) + + # Add new topics + for topic in unique_new_topics: + if topic not in existing_topics: + existing_topics.append(topic) + + # Save updated list + existing_topics.sort() + with open(topics_file, 'w', encoding='utf-8') as f: + for topic in existing_topics: + f.write(f"{topic}\n") + + print(f"\n✓ Updated topic list saved to: {topics_file}") + print(f" Total topics: {len(existing_topics)} (was {len(TOPIC_LIST)})") + print(f"\nTo use the updated list, update TOPIC_LIST in generate_topics_podcasts.py") + + if failed > 0: + sys.exit(1) + + +if __name__ == '__main__': + main() + diff --git a/scripts/topic_list.txt b/scripts/topic_list.txt new file mode 100644 index 00000000..6edb50ad --- /dev/null +++ b/scripts/topic_list.txt @@ -0,0 +1,52 @@ +AI +DataOps +LLMs +MLOps +NLP +agent engineering +astroinformatics +autonomous driving +bayesian statistics +bioinformatics +career development +career growth +career transition +communication +computer vision +consulting +data analytics +data engineering +data governance +data science +data strategy +developer relations +embeddings +embeddings & vector databases +entrepreneurship +finops +fraud detection +freelance +graph ML +graph databases +information retrieval +job search +knowledge graphs +leadership +machine learning +mcmc and sampling algorithms +mentoring +modern data stack +open-source +practices +probabilistic programming +product management +production +remote sensing +remote work +retrieval-augmented generation +retrieval-augmented generation (RAG) +software engineering +startups +team building +tools +vector databases