diff --git a/Gemfile.lock b/Gemfile.lock index 3fae0114..10ee5728 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -36,6 +36,8 @@ GEM logger faraday-net_http (3.4.1) net-http (>= 0.5.0) + faraday-retry (2.3.2) + faraday (~> 2.0) ffi (1.17.2) forwardable-extended (2.6.0) gemoji (4.1.0) @@ -272,8 +274,8 @@ PLATFORMS x64-mingw32 DEPENDENCIES + faraday-retry github-pages - jekyll jekyll-feed (~> 0.12) jekyll-theme-cayman tzinfo (~> 1.2) diff --git a/_config.yml b/_config.yml index 619f8849..48b538b6 100644 --- a/_config.yml +++ b/_config.yml @@ -19,6 +19,7 @@ exclude: - Pipfile.lock - Makefile - .gitignore + - sh-thd-* collections: diff --git a/_layouts/podcast.html b/_layouts/podcast.html index be24e64e..cc4b4c0c 100644 --- a/_layouts/podcast.html +++ b/_layouts/podcast.html @@ -338,10 +338,6 @@

Listen to or watch on your favorite platform

Spotify
{% endif %} - - Anchor icon -
Anchor
-
@@ -368,7 +364,60 @@

Show Notes

{{ content }}
+ +
+

Timestamps

+ {% if page.transcript %} +

Click any timestamp to jump to that moment in the video

+
+ +
+ {% else %} +

Timestamps coming soon...

+ {% endif %} +
+ + {% if page.transcript %} +
+

Transcript

+
+

+ The transcripts are edited for clarity, sometimes with AI. + If you notice any incorrect information, + let us know. +

+ + {% for line in page.transcript %} + {% if line.header %} +

{{ line.header }}

+ {% else %} +

{{ line.who }}: {{ line.line }}{% if line.sec %} ({{ line.time }}){% endif %}

+ {% endif %} + {% endfor %} +
+
+ {% endif %} + + + @@ -574,7 +573,7 @@

{{ line.header }} }); // Timestamp click functionality - const timestampLinks = document.querySelectorAll('.timestamp-link'); + const timestampLinks = document.querySelectorAll('.timestamp-link, .transcript-timestamp-link'); timestampLinks.forEach(link => { link.addEventListener('click', function(e) { e.preventDefault(); diff --git a/_podcast/_s12e08.md b/_podcast/_s12e08.md index 713ef42e..1f9683b1 100644 --- a/_podcast/_s12e08.md +++ b/_podcast/_s12e08.md @@ -12,8 +12,8 @@ links: spotify: https://open.spotify.com/episode/5fB185hGlGYQmdk0kbIsPv?si=YtnsaYNzTc-fl7emZ2IjEA youtube: https://www.youtube.com/watch?v=FRi0SUtxdMw season: 12 -short: 'The Journey of a Data Generalist: From Bioinformatics to Freelancing' -title: 'The Journey of a Data Generalist: From Bioinformatics to Freelancing' +short: "The Journey of a Data Generalist: From Bioinformatics to Freelancing" +title: "The Journey of a Data Generalist: From Bioinformatics to Freelancing" transcript: - line: This week we'll talk about being a data generalist. We'll discuss going from bioinformatics to freelancing. We have a special guest today, Katya. As a freelancer diff --git a/_podcast/s07e06-ab-testing.md b/_podcast/ab-testing-and-product-experimentation.md similarity index 97% rename from _podcast/s07e06-ab-testing.md rename to _podcast/ab-testing-and-product-experimentation.md index f1e24b3c..2c6b3d97 100644 --- a/_podcast/s07e06-ab-testing.md +++ b/_podcast/ab-testing-and-product-experimentation.md @@ -1,41 +1,115 @@ --- +title: "Product Analytics & A/B Testing: Causality, Metrics, Power Analysis, A/A Tests" +short: "A/B Testing" +season: 7 episode: 6 guests: - jakobgraff -short: A/B Testing -title: 'Product Analytics & A/B Testing: Causality, Metrics, Power Analysis, A/A Tests' -description: 'Master product analytics, A/B testing & power analysis: design stable - metrics, validate randomization with A/A tests, plan sample size to de-risk features.' -intro: How do you design product experiments that truly establish causality and avoid - costly false conclusions? In this episode, Jakob Graff — Director of Data Science - and Data Analytics at diconium, with prior analytics leadership at Inkitt, Babbel, - King and a background in econometrics — walks through practical product analytics - and A/B testing strategies focused on causality and reliable metrics.

We - cover why randomized experiments mirror clinical trials, how experimentation de-risks - features and builds organizational learning, and a concrete case study on subscription - vs. points revenue metric design. Jakob explains experimentation platform trade-offs - (third-party vs. in-house), traffic splitters, assignment tracking, and why A/A - tests validate system trust. You’ll hear best practices for first tests (two-group - simplicity), metric selection considering noise and seasonality, and how to plan - duration with power analysis and sample-size calculations. The discussion also compares - z/t/nonparametric tests, p-value intuition from A/A comparisons, frequentist vs - Bayesian perspectives, and multi-armed test considerations.

Listen to learn - practical steps for designing randomized experiments, selecting stable metrics, - planning sample sizes, and interpreting results so your product analytics and A/B - testing produce actionable, causal insights. -topics: -- data science -- practices +image: images/podcast/ab-testing-and-product-experimentation.jpg ids: anchor: AB-Testing---Jakob-Graff-e1eq73v youtube: 0Gqx1LtqRZU -image: images/podcast/s07e06-ab-testing.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/AB-Testing---Jakob-Graff-e1eq73v apple: https://podcasts.apple.com/us/podcast/a-b-testing-jakob-graff/id1541710331?i=1000552243668 spotify: https://open.spotify.com/episode/3LhBOO1UANCGbOwkntZt4j youtube: https://www.youtube.com/watch?v=0Gqx1LtqRZU -season: 7 + +description: "Master product analytics, A/B testing & power analysis: design stable metrics, validate randomization with A/A tests, plan sample size to de-risk features." +intro: "How do you design product experiments that truly establish causality and avoid costly false conclusions? In this episode, Jakob Graff — Director of Data Science and Data Analytics at diconium, with prior analytics leadership at Inkitt, Babbel, King and a background in econometrics — walks through practical product analytics and A/B testing strategies focused on causality and reliable metrics.

We cover why randomized experiments mirror clinical trials, how experimentation de-risks features and builds organizational learning, and a concrete case study on subscription vs. points revenue metric design. Jakob explains experimentation platform trade-offs (third-party vs. in-house), traffic splitters, assignment tracking, and why A/A tests validate system trust. You’ll hear best practices for first tests (two-group simplicity), metric selection considering noise and seasonality, and how to plan duration with power analysis and sample-size calculations. The discussion also compares z/t/nonparametric tests, p-value intuition from A/A comparisons, frequentist vs Bayesian perspectives, and multi-armed test considerations.

Listen to learn practical steps for designing randomized experiments, selecting stable metrics, planning sample sizes, and interpreting results so your product analytics and A/B testing produce actionable, causal insights" +topics: +- data science +- practices +dateadded: 2022-02-27 + +duration: PT01H03M37S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=0 + endOffset: 63 +- name: Guest Background & Career Transition to Data Science + startOffset: 63 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=63 + endOffset: 311 +- name: 'Econometrics to Product Analytics: Causality Emphasis' + startOffset: 311 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=311 + endOffset: 493 +- name: 'A/B Testing Explained: Clinical Trials Analogy & Randomization' + startOffset: 493 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=493 + endOffset: 708 +- name: 'Experimentation Purpose: Establishing Causality & Controlling Noise' + startOffset: 708 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=708 + endOffset: 867 +- name: 'Case Study: Subscription vs Points — Revenue Metric Design' + startOffset: 867 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=867 + endOffset: 1086 +- name: De-risking Features & Building Organizational Learning with Experiments + startOffset: 1086 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1086 + endOffset: 1434 +- name: 'Experimentation Platform Choices: Third-Party vs In-House' + startOffset: 1434 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1434 + endOffset: 1484 +- name: Traffic Splitter Implementation, Assignment Tracking & Monitoring + startOffset: 1484 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1484 + endOffset: 1672 +- name: 'A/A Testing: Validating Randomization and System Trust' + startOffset: 1672 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1672 + endOffset: 1805 +- name: 'First Test Best Practices: Two-Group Design & Simplicity' + startOffset: 1805 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1805 + endOffset: 2003 +- name: 'Metric Selection: Noise, Stability, Seasonality & Business Cycles' + startOffset: 2003 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2003 + endOffset: 2264 +- name: 'Test Duration & Power Analysis: Sample Size Planning' + startOffset: 2264 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2264 + endOffset: 2423 +- name: 'Statistical Tests Overview: Z-test, T-test, and Nonparametric Options' + startOffset: 2423 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2423 + endOffset: 2679 +- name: 'Data Distribution Checks: Histograms, Tails, and Visualization' + startOffset: 2679 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2679 + endOffset: 2864 +- name: 'P-value Intuition: Explaining Significance via A/A Comparison' + startOffset: 2864 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2864 + endOffset: 3115 +- name: 'Frequentist vs Bayesian Testing: Credible Intervals, Priors & Costs' + startOffset: 3115 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3115 + endOffset: 3548 +- name: 'Multi-armed Tests (A/B/C/D): Duration, Power, and Multiple Comparisons' + startOffset: 3548 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3548 + endOffset: 3772 +- name: Practical Experimentation Tips & Analogies (Pizza Dough) + startOffset: 3772 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3772 + endOffset: 3839 +- name: Hiring, Resources & Contact Information + startOffset: 3839 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3839 + endOffset: 3880 +- name: Episode Wrap-up and Key Takeaways + startOffset: 3880 + url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3880 + endOffset: 3817 + transcript: - header: Podcast Introduction - header: Guest Background & Career Transition to Data Science @@ -1009,91 +1083,4 @@ transcript: sec: 3880 time: '1:04:40' who: Alexey -dateadded: '2022-02-27' -duration: PT01H03M37S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=0 - endOffset: 63 -- name: Guest Background & Career Transition to Data Science - startOffset: 63 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=63 - endOffset: 311 -- name: 'Econometrics to Product Analytics: Causality Emphasis' - startOffset: 311 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=311 - endOffset: 493 -- name: 'A/B Testing Explained: Clinical Trials Analogy & Randomization' - startOffset: 493 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=493 - endOffset: 708 -- name: 'Experimentation Purpose: Establishing Causality & Controlling Noise' - startOffset: 708 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=708 - endOffset: 867 -- name: 'Case Study: Subscription vs Points — Revenue Metric Design' - startOffset: 867 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=867 - endOffset: 1086 -- name: De-risking Features & Building Organizational Learning with Experiments - startOffset: 1086 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1086 - endOffset: 1434 -- name: 'Experimentation Platform Choices: Third-Party vs In-House' - startOffset: 1434 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1434 - endOffset: 1484 -- name: Traffic Splitter Implementation, Assignment Tracking & Monitoring - startOffset: 1484 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1484 - endOffset: 1672 -- name: 'A/A Testing: Validating Randomization and System Trust' - startOffset: 1672 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1672 - endOffset: 1805 -- name: 'First Test Best Practices: Two-Group Design & Simplicity' - startOffset: 1805 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=1805 - endOffset: 2003 -- name: 'Metric Selection: Noise, Stability, Seasonality & Business Cycles' - startOffset: 2003 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2003 - endOffset: 2264 -- name: 'Test Duration & Power Analysis: Sample Size Planning' - startOffset: 2264 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2264 - endOffset: 2423 -- name: 'Statistical Tests Overview: Z-test, T-test, and Nonparametric Options' - startOffset: 2423 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2423 - endOffset: 2679 -- name: 'Data Distribution Checks: Histograms, Tails, and Visualization' - startOffset: 2679 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2679 - endOffset: 2864 -- name: 'P-value Intuition: Explaining Significance via A/A Comparison' - startOffset: 2864 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=2864 - endOffset: 3115 -- name: 'Frequentist vs Bayesian Testing: Credible Intervals, Priors & Costs' - startOffset: 3115 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3115 - endOffset: 3548 -- name: 'Multi-armed Tests (A/B/C/D): Duration, Power, and Multiple Comparisons' - startOffset: 3548 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3548 - endOffset: 3772 -- name: Practical Experimentation Tips & Analogies (Pizza Dough) - startOffset: 3772 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3772 - endOffset: 3839 -- name: Hiring, Resources & Contact Information - startOffset: 3839 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3839 - endOffset: 3880 -- name: Episode Wrap-up and Key Takeaways - startOffset: 3880 - url: https://www.youtube.com/watch?v=0Gqx1LtqRZU&t=3880 - endOffset: 3817 --- diff --git a/_podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.md b/_podcast/ai-for-ecology-biodiversity-and-conservation.md similarity index 57% rename from _podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.md rename to _podcast/ai-for-ecology-biodiversity-and-conservation.md index 7f0e8781..8dd1b481 100644 --- a/_podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.md +++ b/_podcast/ai-for-ecology-biodiversity-and-conservation.md @@ -1,40 +1,28 @@ --- +title: "AI for Ecology, Biodiversity, and Conservation: Computer Vision, Remote Sensing and Citizen Science" +short: "AI for Ecology, Biodiversity, and Conservation" +season: 18 episode: 3 guests: - tanyabergerwolf +image: images/podcast/ai-for-ecology-biodiversity-and-conservation.jpg ids: - anchor: atatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi + anchor: datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi youtube: 30tTrozbAkg -image: images/podcast/s18e03-ai-for-ecology-biodiversity-and-conservation.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/AI-for-Ecology--Biodiversity--and-Conservation---Tanya-Berger-Wolf-e2inadi apple: https://podcasts.apple.com/us/podcast/ai-for-ecology-biodiversity-and-conservation-tanya/id1541710331?i=1000653709956 spotify: https://open.spotify.com/episode/3Hhz5N8ZDvsOPlPP3wxQxq?si=Oz7y_pBrTfeypfYZXubu-g youtube: https://www.youtube.com/watch?v=30tTrozbAkg -season: 18 -short: AI for Ecology, Biodiversity, and Conservation -title: 'Scaling Wildlife Conservation with AI: Computer Vision, Remote Sensing & Citizen - Science' -description: 'Discover AI-driven wildlife conservation: computer vision, remote sensing - & citizen science for scalable species ID, habitat maps, alerts and policy impact.' -intro: How can AI actually scale wildlife conservation in the face of accelerating - biodiversity loss and persistent data gaps? In this episode, computational ecologist - Tanya Berger-Wolf—director of TDAI@OSU, co‑founder of the Wildbook project, and - director of technology at Wild Me—walks us through practical ways computer vision, - remote sensing, and citizen science are transforming biodiversity monitoring.

- We explore core AI techniques (machine learning, transfer learning, domain adaptation), - image‑based monitoring with camera traps, drones and photo‑ID for individual tracking, - and remote sensing for habitat mapping and change detection. Tanya addresses key - data challenges—labeling, class imbalance, sparse observations—and the need for - interoperable datasets, open standards and FAIR principles. We also cover model - robustness, edge deployment in the field, ethics and Indigenous knowledge, scalable - platforms like Wildbook, and how citizen science and crowdsourcing support quality - control and long‑term monitoring.

Listeners will come away with a clearer - understanding of tools and workflows for wildlife monitoring, practical barriers - to scaling AI for conservation, policy and funding considerations, and resources - to begin applying computer vision, remote sensing, and citizen science in their - own conservation projects. -dateadded: '2024-04-28' +description: "Discover AI-driven computer vision and remote sensing strategies to scale biodiversity monitoring, improve species ID, and inform conservation policy." +intro: "How can AI help close critical data gaps in biodiversity monitoring and turn images and sensor data into actionable conservation decisions? In this episode Tanya Berger-Wolf, a computational ecologist, director of TDAI@OSU, and co-founder of the Wildbook project (Wild Me), walks through practical applications of AI for ecology, biodiversity monitoring, and conservation.

We cover core techniques—computer vision, machine learning, and remote sensing—and their use in image-based monitoring with camera traps, drones, and species identification. Tanya explains individual identification and longitudinal tracking, habitat mapping and change detection, and the data challenges of labeling, class imbalance, and sparse observations. The conversation addresses integration of heterogeneous datasets, model robustness (domain shift and transfer learning), and ethical considerations including Indigenous knowledge and equity. You’ll also hear about scalable platforms like Wildbook, citizen science workflows for crowdsourcing and quality control, policy relevance, open data and FAIR principles, edge deployment in the field, and building sustainable monitoring programs.

Listen to gain concrete insights on tools, pitfalls, and next steps for applying AI to conservation—what works now, what remains hard, and resources to explore further." +topics: +- AI +- computer vision +- remote sensing +- MLOps +- data engineering +dateadded: 2024-04-28 quotableClips: - name: Podcast Introduction startOffset: 0 @@ -60,7 +48,7 @@ quotableClips: startOffset: 630 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=630 endOffset: 840 -- name: 'Individual Identification & Tracking: Photo‑ID and Longitudinal Monitoring' +- name: 'Individual Identification & Tracking: Photo-ID and Longitudinal Monitoring' startOffset: 840 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=840 endOffset: 1020 @@ -84,7 +72,7 @@ quotableClips: startOffset: 1740 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=1740 endOffset: 1920 -- name: 'Scalable Platforms: Wildbook and Large‑Scale Biodiversity Monitoring Tools' +- name: 'Scalable Platforms: Wildbook and Large-Scale Biodiversity Monitoring Tools' startOffset: 1920 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=1920 endOffset: 2130 @@ -104,7 +92,7 @@ quotableClips: startOffset: 2670 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=2670 endOffset: 2820 -- name: 'Edge Deployment: Low‑Power Devices, Field Constraints, and Real‑Time Alerts' +- name: 'Edge Deployment: Low-Power Devices, Field Constraints, and Real-Time Alerts' startOffset: 2820 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=2820 endOffset: 2970 @@ -112,7 +100,7 @@ quotableClips: startOffset: 2970 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=2970 endOffset: 3150 -- name: 'Funding & Sustainability: Maintaining Long‑Term Monitoring Systems' +- name: 'Funding & Sustainability: Maintaining Long-Term Monitoring Systems' startOffset: 3150 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3150 endOffset: 3330 @@ -132,8 +120,20 @@ quotableClips: startOffset: 3720 url: https://www.youtube.com/watch?v=30tTrozbAkg&t=3720 endOffset: 3720 ---- +context: 'Context: The episode frames a biodiversity crisis made harder by fragmented, + sparse data and limited monitoring capacity, then surveys AI tools (computer vision, + remote sensing, platforms, citizen science), technical challenges, ethical concerns, + and policy needs for conservation. + Core narrative: AI''s most important role in conservation is as an integrative, + trustworthy infrastructure that turns heterogeneous, messy ecological data into + continuous, scalable, and actionable knowledge—bridging camera traps, drones, satellites, + citizen science, and field expertise through interoperable standards, robust models, + edge deployment, and open platforms. Real impact requires coupling technical advances + with ethics, community engagement, capacity building, sustainable funding, and multistakeholder + governance so that AI-enabled monitoring directly informs equitable conservation + decisions, enforcement, and long-term policy.' +--- Links: * [Biodiversity and Artificial Intelligence pdf](https://www.gpai.ai/projects/responsible-ai/environment/biodiversity-and-AI-opportunities-recommendations-for-action.pdf){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s08e04-machine-learning-and-personalization-in-healthcare.md b/_podcast/ai-in-healthcare-and-digital-therapeutics.md similarity index 95% rename from _podcast/s08e04-machine-learning-and-personalization-in-healthcare.md rename to _podcast/ai-in-healthcare-and-digital-therapeutics.md index f0697a00..20129c6a 100644 --- a/_podcast/s08e04-machine-learning-and-personalization-in-healthcare.md +++ b/_podcast/ai-in-healthcare-and-digital-therapeutics.md @@ -1,41 +1,112 @@ --- +title: "AI in Healthcare & Digital Therapeutics: Building Data Teams, Personalization, A/B Testing & Ethics" +short: "Machine Learning and Personalization in Healthcare" +season: 8 episode: 4 guests: - stefangudmundsson -intro: How can AI power effective digital therapeutics while balancing personalization, - rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — - Director of Data, Analytics, and AI with a track record building ML and data teams - at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches - for AI in healthcare and digital therapeutics.

We cover how machine learning - is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s - gamified digital therapeutics and quality‑of‑life goals; behavioral design that - minimizes in‑app time; and engagement strategies like charity incentives versus - leaderboards. Stefan explains building the analytics foundation—data pipelines, - dashboards, and experimentation capabilities—and why A/B testing and agenda‑driven - recommender systems are core to personalization. He also tackles data privacy and - ethics (GDPR/HIPAA, de‑identification), remote monitoring with wearables, clinical - trials versus app experiments, managing medical risk, and hiring and scaling data, - ML, and engineering teams.

Listen to get concrete frameworks for building - data teams, running safe, measurable experiments, designing personalized interventions, - and embedding ethical safeguards into AI-driven digital therapeutics. +image: images/podcast/ai-in-healthcare-and-digital-therapeutics.jpg ids: anchor: Machine-Learning-and-Personalization-in-Healthcare---Stefan-Gudmundsson-e1h5gdg youtube: IDzhmmKeNG4 -image: images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Machine-Learning-and-Personalization-in-Healthcare---Stefan-Gudmundsson-e1h5gdg apple: https://podcasts.apple.com/us/podcast/machine-learning-and-personalization-in-healthcare/id1541710331?i=1000557726819 spotify: https://open.spotify.com/episode/3s78PtlbUmecuMOXwO8aD5?si=991e1811a5204305 youtube: https://www.youtube.com/watch?v=IDzhmmKeNG4 -season: 8 -short: Machine Learning and Personalization in Healthcare -title: 'AI in Healthcare & Digital Therapeutics: Building Data Teams, Personalization, - A/B Testing & Ethics' -description: 'Learn to build data teams and ethical AI in healthcare: actionable personalization, - A/B testing for digital therapeutics, GDPR-safe experiments.' + +description: "Learn to build data teams and ethical AI in healthcare: actionable personalization, A/B testing for digital therapeutics, GDPR-safe experiments." +intro: "How can AI power effective digital therapeutics while balancing personalization, rapid experimentation, and patient safety? In this episode, Stefan Gudmundsson — Director of Data, Analytics, and AI with a track record building ML and data teams at Sidekick Health, King, H&M, and CCP Games — walks through practical approaches for AI in healthcare and digital therapeutics.

We cover how machine learning is applied to diagnosis, drug discovery, and biologics (AlphaFold); Sidekick Health’s gamified digital therapeutics and quality-of-life goals; behavioral design that minimizes in-app time; and engagement strategies like charity incentives versus leaderboards. Stefan explains building the analytics foundation—data pipelines, dashboards, and experimentation capabilities—and why A/B testing and agenda-driven recommender systems are core to personalization. He also tackles data privacy and ethics (GDPR/HIPAA, de-identification), remote monitoring with wearables, clinical trials versus app experiments, managing medical risk, and hiring and scaling data, ML, and engineering teams.

Listen to get concrete frameworks for building data teams, running safe, measurable experiments, designing personalized interventions, and embedding ethical safeguards into AI-driven digital therapeutics" topics: - machine learning - healthcare +dateadded: 2022-04-16 + +duration: PT00H57M48S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=0 + endOffset: 38 +- name: 'Career Snapshot: Developer to AI & Data Leader' + startOffset: 38 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=38 + endOffset: 128 +- name: Building AI Teams at King and H&M + startOffset: 128 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=128 + endOffset: 367 +- name: 'Machine Learning in Healthcare: Diagnosis, Drug Discovery & AlphaFold' + startOffset: 367 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=367 + endOffset: 602 +- name: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality-of-Life + Goals' + startOffset: 602 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=602 + endOffset: 904 +- name: 'Behavioral Design & Habit Formation: Low In-App Time Strategy' + startOffset: 904 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=904 + endOffset: 1167 +- name: 'Building Data Culture: Metrics, Buy-in, and Responsible Experimentation' + startOffset: 1167 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1167 + endOffset: 1543 +- name: 'Engagement & Rewards: Charity Incentives vs. Leaderboards' + startOffset: 1543 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1543 + endOffset: 1622 +- name: 'Analytics Foundation: Data Pipelines, Dashboards & Experimentation Capabilities' + startOffset: 1622 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1622 + endOffset: 1773 +- name: 'Remote Monitoring & Wearables: Activity and Heart-Rate Variability' + startOffset: 1773 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1773 + endOffset: 1901 +- name: 'Data Privacy & Ethics: GDPR/HIPAA, De-identification, and Empathy' + startOffset: 1901 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1901 + endOffset: 2139 +- name: 'Personalization Strategy: Agenda-Driven Recommender Systems' + startOffset: 2139 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2139 + endOffset: 2397 +- name: 'A/B Testing as Personalization Foundation: Segmentation & Iteration' + startOffset: 2397 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2397 + endOffset: 2580 +- name: 'Experimentation Platform: Variant Availability and Measurement' + startOffset: 2580 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2580 + endOffset: 2729 +- name: 'Clinical Trials vs. App Experiments: Scale, Cost, and Bias' + startOffset: 2729 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2729 + endOffset: 2965 +- name: 'Data-Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' + startOffset: 2965 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2965 + endOffset: 3115 +- name: 'Managing Medical Risk: Safeguards for Safe Experimentation' + startOffset: 3115 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3115 + endOffset: 3201 +- name: 'Hiring & Scaling: Growing the Data, ML and Engineering Team' + startOffset: 3201 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3201 + endOffset: 3353 +- name: 'AI for Mental Health: Monitoring Signals and Supportive Interventions' + startOffset: 3353 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3353 + endOffset: 3449 +- name: 'Resources & Contact: LinkedIn and Open Roles at Sidekick Health' + startOffset: 3449 + url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3449 + endOffset: 3468 + transcript: - header: Podcast Introduction - line: Hello, everyone. This week we'll talk about machine learning in healthcare, @@ -227,7 +298,7 @@ transcript: sec: 593 time: '9:53' who: Stefan -- header: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality‑of‑Life +- header: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality-of-Life Goals' - line: So basically every scientist becomes the target audience. Before this episode, I was doing a little bit of research about the company where you work right now @@ -305,7 +376,7 @@ transcript: sec: 874 time: '14:34' who: Alexey -- header: 'Behavioral Design & Habit Formation: Low In‑App Time Strategy' +- header: 'Behavioral Design & Habit Formation: Low In-App Time Strategy' - line: Yes, yes. But at the same time, there are critical differences. We don't want to keep you in the app for hours, because most of the activity you need to do is outside of the app. So that is a very interesting difference between the two @@ -397,7 +468,7 @@ transcript: sec: 1119 time: '18:39' who: Alexey -- header: 'Building Data Culture: Metrics, Buy‑in, and Responsible Experimentation' +- header: 'Building Data Culture: Metrics, Buy-in, and Responsible Experimentation' - line: Exactly. I think it's much more similar than you would think in the beginning. You basically have a program – some kind of solution – and you're in a company where you really want to create this data-driven culture from the data science @@ -607,7 +678,7 @@ transcript: sec: 1767 time: '29:27' who: Alexey -- header: 'Remote Monitoring & Wearables: Activity and Heart‑Rate Variability' +- header: 'Remote Monitoring & Wearables: Activity and Heart-Rate Variability' - line: Yeah, [reluctantly] I mean – you should start there. I think that should always be the approach – start with something simple. Then you have data and then you have everything in place to automate it. Don't try to automate out of thin air. @@ -641,7 +712,7 @@ transcript: sec: 1852 time: '30:52' who: Stefan -- header: 'Data Privacy & Ethics: GDPR/HIPAA, De‑identification, and Empathy' +- header: 'Data Privacy & Ethics: GDPR/HIPAA, De-identification, and Empathy' - line: We have a question. I mentioned that healthcare is quite a regulated area. And usually in healthcare, people take questions about data privacy and this kind of stuff very seriously. Does it change the way you work? You have to keep these @@ -715,7 +786,7 @@ transcript: sec: 2100 time: '35:00' who: Stefan -- header: 'Personalization Strategy: Agenda‑Driven Recommender Systems' +- header: 'Personalization Strategy: Agenda-Driven Recommender Systems' - line: Okay. I wanted to go back to what we were talking about. You said that the app is based on the customer profile – patient profile – it makes different recommendations, or personalized recommendations, based on that. Can you maybe tell us a bit more @@ -974,7 +1045,7 @@ transcript: sec: 2921 time: '48:41' who: Alexey -- header: 'Data‑Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' +- header: 'Data-Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' - line: No, not at all. All of these people are very data-driven just by nature. The biggest challenges may be to tell a medical doctor, “Okay, now we're testing a feature in the app. Let's just test it.” “What?! No, no. Wait!” [laughs] When @@ -1161,90 +1232,6 @@ transcript: sec: 3469 time: '57:49' who: Stefan -dateadded: '2022-04-16' -duration: PT00H57M48S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=0 - endOffset: 38 -- name: 'Career Snapshot: Developer to AI & Data Leader' - startOffset: 38 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=38 - endOffset: 128 -- name: Building AI Teams at King and H&M - startOffset: 128 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=128 - endOffset: 367 -- name: 'Machine Learning in Healthcare: Diagnosis, Drug Discovery & AlphaFold' - startOffset: 367 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=367 - endOffset: 602 -- name: 'Sidekick Health Overview: Gamified Digital Therapeutics & Quality‑of‑Life - Goals' - startOffset: 602 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=602 - endOffset: 904 -- name: 'Behavioral Design & Habit Formation: Low In‑App Time Strategy' - startOffset: 904 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=904 - endOffset: 1167 -- name: 'Building Data Culture: Metrics, Buy‑in, and Responsible Experimentation' - startOffset: 1167 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1167 - endOffset: 1543 -- name: 'Engagement & Rewards: Charity Incentives vs. Leaderboards' - startOffset: 1543 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1543 - endOffset: 1622 -- name: 'Analytics Foundation: Data Pipelines, Dashboards & Experimentation Capabilities' - startOffset: 1622 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1622 - endOffset: 1773 -- name: 'Remote Monitoring & Wearables: Activity and Heart‑Rate Variability' - startOffset: 1773 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1773 - endOffset: 1901 -- name: 'Data Privacy & Ethics: GDPR/HIPAA, De‑identification, and Empathy' - startOffset: 1901 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=1901 - endOffset: 2139 -- name: 'Personalization Strategy: Agenda‑Driven Recommender Systems' - startOffset: 2139 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2139 - endOffset: 2397 -- name: 'A/B Testing as Personalization Foundation: Segmentation & Iteration' - startOffset: 2397 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2397 - endOffset: 2580 -- name: 'Experimentation Platform: Variant Availability and Measurement' - startOffset: 2580 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2580 - endOffset: 2729 -- name: 'Clinical Trials vs. App Experiments: Scale, Cost, and Bias' - startOffset: 2729 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2729 - endOffset: 2965 -- name: 'Data‑Driven Tradeoffs: Speed over Perfection in Healthcare Analytics' - startOffset: 2965 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=2965 - endOffset: 3115 -- name: 'Managing Medical Risk: Safeguards for Safe Experimentation' - startOffset: 3115 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3115 - endOffset: 3201 -- name: 'Hiring & Scaling: Growing the Data, ML and Engineering Team' - startOffset: 3201 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3201 - endOffset: 3353 -- name: 'AI for Mental Health: Monitoring Signals and Supportive Interventions' - startOffset: 3353 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3353 - endOffset: 3449 -- name: 'Resources & Contact: LinkedIn and Open Roles at Sidekick Health' - startOffset: 3449 - url: https://www.youtube.com/watch?v=IDzhmmKeNG4&t=3449 - endOffset: 3468 --- Links: diff --git a/_podcast/s20e01-trends-in-ai-infrastructure.md b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md similarity index 90% rename from _podcast/s20e01-trends-in-ai-infrastructure.md rename to _podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md index 5e3067ed..aac0b8e4 100644 --- a/_podcast/s20e01-trends-in-ai-infrastructure.md +++ b/_podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.md @@ -1,19 +1,114 @@ --- +title: "Post-ChatGPT AI Infrastructure: Open Source Orchestration, On-Prem Economics & Distributed Training at Scale" +short: "Trends in AI Infrastructure" +season: 20 episode: 1 guests: - andreycheptsov +image: images/podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg ids: - anchor: atalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 + anchor: datatalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 youtube: 1aMuynlLM3o -image: images/podcast/s20e01-trends-in-ai-infrastructure.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Redefining-AI-Infrastructure-Open-Source--Chips--and-the-Future-Beyond-Kubernetes--Andrey-Cheptsov-e2u7lc2 apple: https://podcasts.apple.com/us/podcast/redefining-ai-infrastructure-open-source-chips-and/id1541710331?i=1000687565459 spotify: https://open.spotify.com/episode/5MIc1pAXPxVYSr0E4pndU4 youtube: https://www.youtube.com/watch?v=1aMuynlLM3o -season: 20 -short: Trends in AI Infrastructure -title: 'Cut AI Infrastructure Costs: DStack for On‑Prem GPU Training & MLOps Alternatives' +description: "Discover AI infrastructure strategies: open source orchestration, on-prem economics and distributed training at scale to cut costs, boost performance and control." +topics: +- AI infrastructure +- MLOps +- LLMs +- open-source +- tools +intro: "How has the rise of ChatGPT reshaped the infrastructure needed to build and run large language models, and when does open source orchestration make sense compared to cloud or proprietary systems? In this episode we speak with Andrey Cheptsov, founder and CEO of dstack — an open-source alternative to Kubernetes and Slurm designed to simplify AI infrastructure orchestration. Drawing on his decade-plus at JetBrains building developer tools, Andrey frames practical trade-offs between on-prem economics and cloud spend, the maturity of open source orchestration tools, and patterns for distributed training at scale. We cover core topics including open source orchestration for AI workloads, cost and operational considerations for on-prem deployments, and strategies to scale distributed training efficiently and reliably. Listen to understand when an open source approach like dstack is appropriate, what to evaluate in orchestration tools, and how to balance performance, cost, and control as you scale AI projects post-ChatGPT. This episode is for engineering leaders and ML infrastructure teams seeking actionable insights on AI infrastructure, orchestration tools, on-prem economics, and distributed training best practices." +dateadded: 2025-02-26 +duration: PT01H06M04S +quotableClips: +- name: Episode Kickoff & Guest Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=0 + endOffset: 166 +- name: 'Career Background: JetBrains, DataSpell, and Move into AI' + startOffset: 166 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=166 + endOffset: 327 +- name: 'Origins of DStack: Reducing AI Infrastructure Cost of Ownership' + startOffset: 327 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=327 + endOffset: 505 +- name: Cloud vs On-Prem Costs and MLOps Limitations (SageMaker example) + startOffset: 505 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=505 + endOffset: 600 +- name: Cloud-to-On-Prem Realities in the Post-ChatGPT Era + startOffset: 600 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=600 + endOffset: 778 +- name: 'Choosing Open Source: Developer Tools, Feedback, and Community' + startOffset: 778 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=778 + endOffset: 1053 +- name: 'Open vs Proprietary Models: Business Models and Trade-Offs' + startOffset: 1053 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1053 + endOffset: 1297 +- name: 'Decentralization in AI: Privacy, Control, and Industry Fit' + startOffset: 1297 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1297 + endOffset: 1816 +- name: 'Training at Scale: GPU Requirements and Distributed Challenges' + startOffset: 1816 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1816 + endOffset: 2086 +- name: 'Distributed Training Stack: PyTorch, NCCL, and Communication Bottlenecks' + startOffset: 2086 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2086 + endOffset: 2255 +- name: 'Efficiency Over Brute Force: Optimization Strategies and DeepSpeed' + startOffset: 2255 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2255 + endOffset: 2370 +- name: Fine-Tuning & Serving Models for Non–AI-First Companies + startOffset: 2370 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2370 + endOffset: 2836 +- name: 'Orchestration Gaps: Kubernetes Limitations for AI Workflows and SLURM' + startOffset: 2836 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2836 + endOffset: 3059 +- name: Kubernetes as the Deployment Standard vs Smaller Alternatives + startOffset: 3059 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3059 + endOffset: 3116 +- name: 'Hybrid Infrastructure Outlook: Cloud Dominance and On-Prem Nuances' + startOffset: 3116 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3116 + endOffset: 3271 +- name: 'On-Prem GPU Coordination: SSH, Resource Contention, and Real Examples' + startOffset: 3271 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3271 + endOffset: 3413 +- name: 'Bare-Metal as a Service: Provisioning, Automation, and Firmware Management' + startOffset: 3413 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3413 + endOffset: 3487 +- name: 'Edge Computing Scope: Devices, Local Models, and Definition Ambiguity' + startOffset: 3487 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3487 + endOffset: 3630 +- name: 'Federated Learning vs Distributed Compute: Practicality and Use Cases' + startOffset: 3630 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3630 + endOffset: 3771 +- name: 'Closing Pick: Science-Fiction Recommendation — The Three-Body Problem' + startOffset: 3771 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3771 + endOffset: 3938 +- name: Episode Wrap-Up & Links to DStack and Guest Resources + startOffset: 3938 + url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3938 + endOffset: 3964 transcript: - header: Episode Kickoff & Guest Introduction - line: This week, we'll talk about AI infrastructure and everything related to it. @@ -136,7 +231,7 @@ transcript: sec: 327 time: '5:27' who: Andrey -- header: Cloud vs On‑Prem Costs and MLOps Limitations (SageMaker example) +- header: Cloud vs On-Prem Costs and MLOps Limitations (SageMaker example) - line: Yes, there are existing tools for machine learning, like SageMaker, but as you mentioned, cost becomes a major issue. sec: 505 @@ -163,7 +258,7 @@ transcript: sec: 537 time: '8:57' who: Alexey -- header: Cloud-to-On‑Prem Realities in the Post‑ChatGPT Era +- header: Cloud-to-On-Prem Realities in the Post-ChatGPT Era - line: Yes, and while many of these challenges are still relevant today, there are even bigger challenges ahead. The "ChatGPT moment" has introduced new issues, which makes AI infrastructure an even more important topic today. @@ -245,7 +340,7 @@ transcript: sec: 809 time: '13:29' who: Andrey -- header: 'Open vs Proprietary Models: Business Models and Trade‑Offs' +- header: 'Open vs Proprietary Models: Business Models and Trade-Offs' - line: I don’t know the full story behind OpenAI either, but I think they initially released many things as open-source. GPT-2 was open-source, and they also released Whisper and CLIP. But when they released GPT-3, they realized it was a gold mine. @@ -497,7 +592,7 @@ transcript: sec: 2255 time: '37:35' who: Alexey -- header: Fine‑Tuning & Serving Models for Non–AI‑First Companies +- header: Fine-Tuning & Serving Models for Non–AI-First Companies - line: Correct, although I’d be cautious about labeling companies as small or medium. I think it’s more about whether a company is AI-first or not. Once you figure that out, everything becomes much clearer. If a company is AI-first, they’re likely @@ -614,7 +709,7 @@ transcript: sec: 3106 time: '51:46' who: Andrey -- header: 'Hybrid Infrastructure Outlook: Cloud Dominance and On‑Prem Nuances' +- header: 'Hybrid Infrastructure Outlook: Cloud Dominance and On-Prem Nuances' - line: 'Here''s a question: Do you think the future will be a hybrid of bare metal and cloud, or will it be cloud-only?' sec: 3116 @@ -657,7 +752,7 @@ transcript: sec: 3268 time: '54:28' who: Andrey -- header: 'On‑Prem GPU Coordination: SSH, Resource Contention, and Real Examples' +- header: 'On-Prem GPU Coordination: SSH, Resource Contention, and Real Examples' - line: When I think about on-prem, particularly for data teams, data science teams, and ML teams, I recall my first company in Germany. We had a machine with GPUs, and everyone had access to it. We would SSH into the machine, but then we had @@ -691,7 +786,7 @@ transcript: sec: 3411 time: '56:51' who: Alexey -- header: 'Bare‑Metal as a Service: Provisioning, Automation, and Firmware Management' +- header: 'Bare-Metal as a Service: Provisioning, Automation, and Firmware Management' - line: Yes, bare metal as a service is another option. Some companies offer bare metal as a service, where they handle the provisioning and firmware updates for you. But if you want to run a service yourself across multiple bare metal providers, @@ -784,7 +879,7 @@ transcript: sec: 3762 time: '1:02:42' who: Andrey -- header: 'Closing Pick: Science‑Fiction Recommendation — The Three‑Body Problem' +- header: 'Closing Pick: Science-Fiction Recommendation — The Three-Body Problem' - line: So, last question for you. You mentioned you like science fiction. What’s your favorite book? sec: 3771 @@ -837,7 +932,7 @@ transcript: sec: 3920 time: '1:05:20' who: Andrey -- header: Episode Wrap‑Up & Links to DStack and Guest Resources +- header: Episode Wrap-Up & Links to DStack and Guest Resources - line: Sounds interesting! Thanks a lot, Andrey. We only touched on a fraction of the topics we wanted to discuss today, which is no surprise, given how much we wanted to cover. But it was great talking with you. Thanks for accepting the invite, @@ -850,113 +945,30 @@ transcript: sec: 3964 time: '1:06:04' who: Andrey -description: Discover DStack to cut AI infrastructure costs with on‑prem GPU training - and MLOps alternatives—optimize distributed training, reduce orchestration overhead. -intro: 'How can engineering teams cut AI infrastructure costs without sacrificing - performance or control? In this episode, Andrey Cheptsov — founder and CEO of dstack - and former JetBrains engineer — walks through the motivation behind DStack, an open‑source - orchestration alternative designed to lower AI infrastructure total cost of ownership. - We trace the cloud vs on‑prem economics (including MLOps limitations like SageMaker), - the decision to build open‑source developer tooling, and the trade‑offs between - open and proprietary models.

You’ll hear practical discussion of on‑prem - GPU training and distributed training challenges: GPU requirements, PyTorch + NCCL - communication bottlenecks, optimization strategies such as DeepSpeed, and tips for - fine‑tuning and serving models for non–AI‑first companies. The episode also covers - orchestration gaps — Kubernetes and SLURM limitations — plus bare‑metal provisioning, - hybrid cloud realities, edge computing scope, and federated learning versus distributed - compute.

If you’re evaluating MLOps alternatives, on‑prem GPU coordination, - or ways to reduce AI infrastructure cost, this episode offers concrete perspectives - on when to choose on‑prem vs cloud, how DStack fits into the stack, and practical - trade‑offs for production ML workloads.' -dateadded: '2025-02-26' -duration: PT01H06M04S -quotableClips: -- name: Episode Kickoff & Guest Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=0 - endOffset: 166 -- name: 'Career Background: JetBrains, DataSpell, and Move into AI' - startOffset: 166 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=166 - endOffset: 327 -- name: 'Origins of DStack: Reducing AI Infrastructure Cost of Ownership' - startOffset: 327 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=327 - endOffset: 505 -- name: Cloud vs On‑Prem Costs and MLOps Limitations (SageMaker example) - startOffset: 505 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=505 - endOffset: 600 -- name: Cloud-to-On‑Prem Realities in the Post‑ChatGPT Era - startOffset: 600 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=600 - endOffset: 778 -- name: 'Choosing Open Source: Developer Tools, Feedback, and Community' - startOffset: 778 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=778 - endOffset: 1053 -- name: 'Open vs Proprietary Models: Business Models and Trade‑Offs' - startOffset: 1053 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1053 - endOffset: 1297 -- name: 'Decentralization in AI: Privacy, Control, and Industry Fit' - startOffset: 1297 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1297 - endOffset: 1816 -- name: 'Training at Scale: GPU Requirements and Distributed Challenges' - startOffset: 1816 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=1816 - endOffset: 2086 -- name: 'Distributed Training Stack: PyTorch, NCCL, and Communication Bottlenecks' - startOffset: 2086 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2086 - endOffset: 2255 -- name: 'Efficiency Over Brute Force: Optimization Strategies and DeepSpeed' - startOffset: 2255 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2255 - endOffset: 2370 -- name: Fine‑Tuning & Serving Models for Non–AI‑First Companies - startOffset: 2370 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2370 - endOffset: 2836 -- name: 'Orchestration Gaps: Kubernetes Limitations for AI Workflows and SLURM' - startOffset: 2836 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=2836 - endOffset: 3059 -- name: Kubernetes as the Deployment Standard vs Smaller Alternatives - startOffset: 3059 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3059 - endOffset: 3116 -- name: 'Hybrid Infrastructure Outlook: Cloud Dominance and On‑Prem Nuances' - startOffset: 3116 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3116 - endOffset: 3271 -- name: 'On‑Prem GPU Coordination: SSH, Resource Contention, and Real Examples' - startOffset: 3271 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3271 - endOffset: 3413 -- name: 'Bare‑Metal as a Service: Provisioning, Automation, and Firmware Management' - startOffset: 3413 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3413 - endOffset: 3487 -- name: 'Edge Computing Scope: Devices, Local Models, and Definition Ambiguity' - startOffset: 3487 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3487 - endOffset: 3630 -- name: 'Federated Learning vs Distributed Compute: Practicality and Use Cases' - startOffset: 3630 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3630 - endOffset: 3771 -- name: 'Closing Pick: Science‑Fiction Recommendation — The Three‑Body Problem' - startOffset: 3771 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3771 - endOffset: 3938 -- name: Episode Wrap‑Up & Links to DStack and Guest Resources - startOffset: 3938 - url: https://www.youtube.com/watch?v=1aMuynlLM3o&t=3938 - endOffset: 3964 ---- +context: 'Context: A conversation with an AI-infrastructure practitioner about moving + from developer tools to building DStack, exploring real-world trade-offs across + hardware, software, deployment, and business models for practical AI adoption. + + Core theme (single unifying idea): Practical AI is an infrastructure-first problem + — success depends less on chasing the biggest model and more on designing cost-effective, + controllable, and efficient stacks (hardware, orchestration, and software) that + fit hybrid cloud/on-prem realities, leverage open-source ecosystems, and optimize + distributed training and serving for real-world constraints. + Dominant through-line: Every segment — from cost of ownership and cloud vs on-prem + trade-offs to open vs proprietary models, decentralization, distributed training + bottlenecks, orchestration gaps, and edge/federated use cases — returns to the same + tension: how to deliver AI that is scalable, performant, and economically sustainable + by choosing the right mix of tooling, deployment model, and optimizations. + + Key themes implied by the narrative: - Cost and control drive architecture choices + more than raw model capability. - Hybrid cloud + on-prem is the pragmatic reality; + orchestration must adapt. - Open-source ecosystems accelerate feedback, tooling, + and business flexibility. - Efficient distributed training and communication optimizations + trump brute-force scaling. - Decentralization (privacy, local control, edge) is + often a matter of fit and trade-offs, not ideology. - Practical provisioning, automation, + and orchestration are the unsolved scaling problems for non–AI-first organizations.' +--- Links: * [Twitter](https://twitter.com/andrey_cheptsov/){:target="_blank"} diff --git a/_podcast/s08e03-innovation-and-design-for-machine-learning.md b/_podcast/ai-ml-product-design-and-experimentation.md similarity index 97% rename from _podcast/s08e03-innovation-and-design-for-machine-learning.md rename to _podcast/ai-ml-product-design-and-experimentation.md index d1ba5a75..1d6ccf8a 100644 --- a/_podcast/s08e03-innovation-and-design-for-machine-learning.md +++ b/_podcast/ai-ml-product-design-and-experimentation.md @@ -1,41 +1,142 @@ --- +title: "AI Product Design: Algorithm-Ready UX, Rapid Experiments & Data-Driven Roadmaps" +short: "Innovation and Design for Machine Learning" +season: 8 episode: 3 guests: - liesbethdingemans -intro: How do you design products that are “algorithm-ready” while running rapid experiments - and building data-driven roadmaps? In this episode, Liesbeth Dingemans—strategy and - AI leader, founder of Dingemans Consulting, former VP of Revenue at Source.ag and - Head of AI Strategy at Prosus—walks through pragmatic approaches to AI product design - that bridge vision and execution.

We cover algorithm-friendly UX and signal - collection, a concrete interaction-design case study comparing TikTok and Instagram - signals, and the Double Diamond framework for moving from problem framing to solution - exploration. Liesbeth explains scoping and prioritization, parallel experiments - and proofs of concept, one-week design sprints, appropriate timeframes for research-to-scale, - and the role of designers, data scientists, engineers and product managers in shaping - AI roadmaps.

Listeners will learn how to avoid rework by involving data - science early, use scoping documents to challenge assumptions, create measurable - experiments (the Task Force/“Jet Ski” model), and build data-driven pitches for - long-term bets versus quarterly OKRs. Tune in for concrete frameworks and practices - to make AI product design, rapid experiments, and data-driven roadmaps work in your - organization. +image: images/podcast/ai-ml-product-design-and-experimentation.jpg ids: anchor: Innovation-and-Design-for-Machine-Learning---Liesbeth-Dingemans-e1gq0en youtube: tcqBfZw41FM -image: images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Innovation-and-Design-for-Machine-Learning---Liesbeth-Dingemans-e1gq0en apple: https://podcasts.apple.com/us/podcast/innovation-and-design-for-machine-learning-liesbeth/id1541710331?i=1000556693861 spotify: https://open.spotify.com/episode/4vhTQJ6Aj9z5VHm9UsHspv youtube: https://www.youtube.com/watch?v=tcqBfZw41FM -season: 8 -short: Innovation and Design for Machine Learning -title: 'AI Product Design: Algorithm-Ready UX, Rapid Experiments & Data-Driven Roadmaps' -description: 'Master AI product design: build algorithm-ready UX, run rapid experiments - and craft data-driven roadmaps to prioritize innovation and ship measurable results.' + +description: "Master AI product design: build algorithm-ready UX, run rapid experiments and craft data-driven roadmaps to prioritize innovation and ship measurable results." +intro: "How do you design products that are “algorithm-ready” while running rapid experiments and building data-driven roadmaps? In this episode, Liesbeth Dingemans—strategy and AI leader, founder of Dingemans Consulting, former VP of Revenue at Source.ag and Head of AI Strategy at Prosus—walks through pragmatic approaches to AI product design that bridge vision and execution.

We cover algorithm-friendly UX and signal collection, a concrete interaction-design case study comparing TikTok and Instagram signals, and the Double Diamond framework for moving from problem framing to solution exploration. Liesbeth explains scoping and prioritization, parallel experiments and proofs of concept, one-week design sprints, appropriate timeframes for research-to-scale, and the role of designers, data scientists, engineers and product managers in shaping AI roadmaps.

Listeners will learn how to avoid rework by involving data science early, use scoping documents to challenge assumptions, create measurable experiments (the Task Force/“Jet Ski” model), and build data-driven pitches for long-term bets versus quarterly OKRs. Tune in for concrete frameworks and practices to make AI product design, rapid experiments, and data-driven roadmaps work in your organization" topics: - machine learning - design thinking +- strategy +- ai - practices +dateadded: 2022-04-10 + +duration: PT00H59M14S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=0 + endOffset: 78 +- name: 'Guest Background: Strategy, Product and AI Trajectory' + startOffset: 78 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=78 + endOffset: 221 +- name: 'Interdisciplinary Perspective: Physics Meets Humanities' + startOffset: 221 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=221 + endOffset: 307 +- name: Design as a User-Centered Product Process + startOffset: 307 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=307 + endOffset: 403 +- name: Algorithm-Friendly Product Design & Signal Collection + startOffset: 403 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=403 + endOffset: 604 +- name: 'Interaction Design Case Study: TikTok vs Instagram Signals' + startOffset: 604 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=604 + endOffset: 732 +- name: 'Double Diamond Framework: Problem Framing to Solutions' + startOffset: 732 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=732 + endOffset: 872 +- name: 'Problem Discovery: Scoping and Prioritizing User Problems' + startOffset: 872 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=872 + endOffset: 962 +- name: 'Solution Exploration: Parallel Experiments & Proofs of Concept' + startOffset: 962 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=962 + endOffset: 1101 +- name: Timeframes for Research, Prototyping and Scaling + startOffset: 1101 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1101 + endOffset: 1217 +- name: Design Thinking Overview & Google PAIR Resources + startOffset: 1217 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1217 + endOffset: 1396 +- name: 'Design Sprint Structure: One-Week Prototyping Approach' + startOffset: 1396 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1396 + endOffset: 1500 +- name: 'Cross-Functional Participation: Designers, Data Scientists, PMs' + startOffset: 1500 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1500 + endOffset: 1633 +- name: 'Engineering Involvement: Building Algorithm-Ready Interfaces' + startOffset: 1633 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1633 + endOffset: 1698 +- name: 'Data Scientists in Problem Definition: Avoiding Rework' + startOffset: 1698 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1698 + endOffset: 1864 +- name: 'Scoping Documents: Challenging Assumptions with "Why" + startOffset: 1864 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1864 + endOffset: 2005 +- name: Organizational Miscommunication & Backtracking Problems + startOffset: 2005 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2005 + endOffset: 2235 +- name: Product Managers’ Role in AI Roadmaps and Prioritization + startOffset: 2235 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2235 + endOffset: 2373 +- name: 'Innovation vs Quarterly OKRs: Making Space for Long-Term Bets' + startOffset: 2373 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2373 + endOffset: 2599 +- name: 'Radical Innovation Example: Second-Hand Car Trust Solutions' + startOffset: 2599 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2599 + endOffset: 2790 +- name: 'Building Evidence: Data-Driven Pitches for Big Ideas' + startOffset: 2790 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2790 + endOffset: 2956 +- name: 'Task Force Model (Jet Ski): Rapid Experimentation Teams' + startOffset: 2956 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2956 + endOffset: 3165 +- name: 'Innovation Workflow: From Discovery to Investment Case' + startOffset: 3165 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3165 + endOffset: 3251 +- name: 'Experimentation Culture: Prioritization Through Measurability' + startOffset: 3251 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3251 + endOffset: 3396 +- name: 'Measurement Mindset: Data-Guided Product Decisions (Citrix)' + startOffset: 3396 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3396 + endOffset: 3500 +- name: 'Skill Building: Learnable Design & Innovation Practices' + startOffset: 3500 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3500 + endOffset: 3605 +- name: Closing Notes, Resources and Contact Links + startOffset: 3605 + url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3605 + endOffset: 3554 + transcript: - header: Episode Introduction & Guest Overview - header: 'Guest Background: Strategy, Product and AI Trajectory' @@ -587,7 +688,7 @@ transcript: sec: 1817 time: '30:17' who: Liesbeth -- header: 'Scoping Documents: Challenging Assumptions with "Why"' +- header: 'Scoping Documents: Challenging Assumptions with "Why" - line: 'Let''s imagine we have this situation: a manager comes to me, or to the team, or to the product manager and says, “Hey, this is the problem we think we have. Let''s solve it with a neural network.” So how do we challenge that person? How @@ -1119,117 +1220,6 @@ transcript: sec: 3632 time: '1:00:32' who: Liesbeth -dateadded: '2022-04-10' -duration: PT00H59M14S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=0 - endOffset: 78 -- name: 'Guest Background: Strategy, Product and AI Trajectory' - startOffset: 78 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=78 - endOffset: 221 -- name: 'Interdisciplinary Perspective: Physics Meets Humanities' - startOffset: 221 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=221 - endOffset: 307 -- name: Design as a User-Centered Product Process - startOffset: 307 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=307 - endOffset: 403 -- name: Algorithm-Friendly Product Design & Signal Collection - startOffset: 403 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=403 - endOffset: 604 -- name: 'Interaction Design Case Study: TikTok vs Instagram Signals' - startOffset: 604 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=604 - endOffset: 732 -- name: 'Double Diamond Framework: Problem Framing to Solutions' - startOffset: 732 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=732 - endOffset: 872 -- name: 'Problem Discovery: Scoping and Prioritizing User Problems' - startOffset: 872 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=872 - endOffset: 962 -- name: 'Solution Exploration: Parallel Experiments & Proofs of Concept' - startOffset: 962 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=962 - endOffset: 1101 -- name: Timeframes for Research, Prototyping and Scaling - startOffset: 1101 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1101 - endOffset: 1217 -- name: Design Thinking Overview & Google PAIR Resources - startOffset: 1217 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1217 - endOffset: 1396 -- name: 'Design Sprint Structure: One-Week Prototyping Approach' - startOffset: 1396 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1396 - endOffset: 1500 -- name: 'Cross-Functional Participation: Designers, Data Scientists, PMs' - startOffset: 1500 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1500 - endOffset: 1633 -- name: 'Engineering Involvement: Building Algorithm-Ready Interfaces' - startOffset: 1633 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1633 - endOffset: 1698 -- name: 'Data Scientists in Problem Definition: Avoiding Rework' - startOffset: 1698 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1698 - endOffset: 1864 -- name: 'Scoping Documents: Challenging Assumptions with "Why"' - startOffset: 1864 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=1864 - endOffset: 2005 -- name: Organizational Miscommunication & Backtracking Problems - startOffset: 2005 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2005 - endOffset: 2235 -- name: Product Managers’ Role in AI Roadmaps and Prioritization - startOffset: 2235 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2235 - endOffset: 2373 -- name: 'Innovation vs Quarterly OKRs: Making Space for Long-Term Bets' - startOffset: 2373 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2373 - endOffset: 2599 -- name: 'Radical Innovation Example: Second-Hand Car Trust Solutions' - startOffset: 2599 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2599 - endOffset: 2790 -- name: 'Building Evidence: Data-Driven Pitches for Big Ideas' - startOffset: 2790 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2790 - endOffset: 2956 -- name: 'Task Force Model (Jet Ski): Rapid Experimentation Teams' - startOffset: 2956 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=2956 - endOffset: 3165 -- name: 'Innovation Workflow: From Discovery to Investment Case' - startOffset: 3165 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3165 - endOffset: 3251 -- name: 'Experimentation Culture: Prioritization Through Measurability' - startOffset: 3251 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3251 - endOffset: 3396 -- name: 'Measurement Mindset: Data-Guided Product Decisions (Citrix)' - startOffset: 3396 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3396 - endOffset: 3500 -- name: 'Skill Building: Learnable Design & Innovation Practices' - startOffset: 3500 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3500 - endOffset: 3605 -- name: Closing Notes, Resources and Contact Links - startOffset: 3605 - url: https://www.youtube.com/watch?v=tcqBfZw41FM&t=3605 - endOffset: 3554 --- Links: diff --git a/_podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.md b/_podcast/algorithmic-trading-with-python-and-machine-learning.md similarity index 93% rename from _podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.md rename to _podcast/algorithmic-trading-with-python-and-machine-learning.md index 8a9bd6d2..c999c45a 100644 --- a/_podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.md +++ b/_podcast/algorithmic-trading-with-python-and-machine-learning.md @@ -1,20 +1,138 @@ --- +title: "Algorithmic Trading with Python: Backtesting, Risk Management and Deployment" +short: "Stock Market Analysis with Python and Machine Learning" +season: 17 episode: 3 guests: - ivanbrigida +image: images/podcast/algorithmic-trading-with-python-and-machine-learning.jpg ids: - anchor: atatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 + anchor: datatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 youtube: NThHAEIazFk -image: images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Stock-Market-Analysis-with-Python-and-Machine-Learning---Ivan-Brigida-e2e6ph2 apple: https://podcasts.apple.com/us/podcast/stock-market-analysis-with-python-and-machine/id1541710331?i=1000641465239 spotify: https://open.spotify.com/episode/1ZXAeGr4Kx7F6oLQUip8Cc?si=KJwpYL-3SvuX8nPdc2cyOg youtube: https://www.youtube.com/watch?v=NThHAEIazFk -season: 17 -short: Stock Market Analysis with Python and Machine Learning -title: 'Algorithmic Trading & Mean Reversion: Backtesting, Data APIs, Risk Management - & ML' +description: "Master algorithmic trading: backtesting and risk management—learn practical data sources, features, models & execution to build robust strategies." +topics: +- machine learning +- data science +- MLOps +- algorithmic trading +- tools +intro: "How do you turn a trading idea into a robust, risk-managed algorithm in Python? In this episode Ivan Brigida — analytics lead behind PythonInvest with 10+ years in statistical modeling, forecasting, econometrics and finance — walks through practical steps for algorithmic trading with Python, from data sourcing to deployment (and a clear reminder this is educational, not investment advice).

We cover where retail traders get market data (Yahoo, Quandl, Polygon), OHLCV and adjusted-close nuances, and a concrete mean-reversion example. Ivan explains backtesting methodology, common pitfalls like time-series data leakage, and walk-forward simulation for realistic validation. He breaks down risk management (stop-loss thresholds, position sizing), execution and trading fees, plus evaluation metrics (ROI, precision) and defining prediction targets (binary growth thresholds such as 5%).

On the modeling side you’ll hear practical feature engineering (time-window stats, handcrafted indicators), model choices (logistic regression, XGBoost, neural nets), explainability via feature importance, and deployment options (cron, Airflow, APIs, partial automation). Listen to gain actionable guidance for building, validating, and deploying algorithmic trading systems in Python." +dateadded: 2024-01-24 +duration: PT01H40S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=0 + endOffset: 95 +- name: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' + startOffset: 95 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=95 + endOffset: 128 +- name: 'Disclaimer: Financial discussion, not investment advice' + startOffset: 128 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=128 + endOffset: 233 +- name: Background & career trajectory from finance to analytics + startOffset: 233 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=233 + endOffset: 402 +- name: Google experience and role transitions + startOffset: 402 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=402 + endOffset: 449 +- name: Choosing individual contributor work over people management + startOffset: 449 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=449 + endOffset: 565 +- name: 'Investing interest: economics education to practical trading' + startOffset: 565 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=565 + endOffset: 707 +- name: Blogging & building a pet project to test strategies + startOffset: 707 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=707 + endOffset: 795 +- name: Financial data sources and APIs for retail investors (Yahoo, Quandl, Polygon) + startOffset: 795 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=795 + endOffset: 923 +- name: 'Market data format explained: OHLCV time series' + startOffset: 923 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=923 + endOffset: 1119 +- name: Adjusted close and data quality considerations + startOffset: 1119 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1119 + endOffset: 1187 +- name: 'Mean reversion strategy: concept and application' + startOffset: 1187 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1187 + endOffset: 1334 +- name: Risk management fundamentals and stop-loss thresholds + startOffset: 1334 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1334 + endOffset: 1608 +- name: Backtesting methodology and avoiding time-series data leakage + startOffset: 1608 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1608 + endOffset: 1784 +- name: 'Walk-forward simulation: weekly predictions and selection rules' + startOffset: 1784 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1784 + endOffset: 2115 +- name: Trade execution and position sizing for algorithmic strategies + startOffset: 2115 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2115 + endOffset: 2304 +- name: 'Discipline: sticking to strategy vs emotional trading' + startOffset: 2304 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2304 + endOffset: 2451 +- name: 'Evaluation metrics: ROI, precision focus, and trading fees impact' + startOffset: 2451 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2451 + endOffset: 2619 +- name: 'Prediction target definition: binary growth thresholds (e.g., 5%)' + startOffset: 2619 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2619 + endOffset: 2755 +- name: 'Feature engineering: time-window stats and handcrafted indicators' + startOffset: 2755 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2755 + endOffset: 2882 +- name: 'Model choices: logistic regression, XGBoost, NN for stock prediction' + startOffset: 2882 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2882 + endOffset: 2998 +- name: 'Explainability: feature importance and model debugging' + startOffset: 2998 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2998 + endOffset: 3106 +- name: 'Deployment options: cron, Airflow, APIs and partial automation' + startOffset: 3106 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3106 + endOffset: 3305 +- name: 'Learning pathways: MLOps, ML Zoomcamp, and practical projects' + startOffset: 3305 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3305 + endOffset: 3449 +- name: 'PythonInvest content: API guides, models, portfolio allocation stories' + startOffset: 3449 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3449 + endOffset: 3666 +- name: Course plans, sign-up, and community building + startOffset: 3666 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3666 + endOffset: 3696 +- name: Episode Wrap-up and final reminder (not financial advice) + startOffset: 3696 + url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3696 + endOffset: 3640 transcript: - header: Podcast Introduction - header: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' @@ -401,7 +519,7 @@ transcript: sec: 1311 time: '21:51' who: Ivan -- header: Risk management fundamentals and stop‑loss thresholds +- header: Risk management fundamentals and stop-loss thresholds - line: I have two questions right now. First question is, “Okay, now I see losses. What do I do with them?” sec: 1334 @@ -468,7 +586,7 @@ transcript: sec: 1513 time: '25:13' who: Ivan -- header: Backtesting methodology and avoiding time‑series data leakage +- header: Backtesting methodology and avoiding time-series data leakage - line: Okay, I actually got lost a bit. You said many things, like “long stocks,” “rebalancing strategy,” “portfolio allocation”… We should probably talk about that later. [Ivan agrees] One question I still have is – we know how to get data @@ -509,7 +627,7 @@ transcript: sec: 1738 time: '28:58' who: Alexey -- header: 'Walk‑forward simulation: weekly predictions and selection rules' +- header: 'Walk-forward simulation: weekly predictions and selection rules' - line: Yes. I can give an example of the exact thing that I had. I started from the 100 largest US stocks, and I made predictions for one week ahead. I tried to predict… Historically you can calculate future growth from the data – when you don't know @@ -746,7 +864,7 @@ transcript: sec: 2740 time: '45:40' who: Ivan -- header: 'Feature engineering: time‑window stats and handcrafted indicators' +- header: 'Feature engineering: time-window stats and handcrafted indicators' - line: How do you build…? Let's say, we want to build the simplest possible model for that, but still use machine learning – like logistic regression or something else. How exactly would we design the problem in order to predict this growth @@ -985,7 +1103,7 @@ transcript: sec: 3665 time: '1:01:05' who: Alexey -- header: Course plans, sign‑up, and community building +- header: Course plans, sign-up, and community building - line: Yeah. I heard that and I will probably wait until 500 people are subscribed, so that I can say, “Okay, next year from January, I will run it.” It’s not there yet, but I hope someday. @@ -1001,7 +1119,7 @@ transcript: sec: 3692 time: '1:01:32' who: Ivan -- header: Episode Wrap‑up and final reminder (not financial advice) +- header: Episode Wrap-up and final reminder (not financial advice) - line: Okay. So we just need 300 more. [chuckles] Okay. Thanks a lot. It's unfortunately time to wrap up for today. It was amazing. I learned many new things. Hopefully, everyone else also learned new things. Thanks for joining us today, for sharing. @@ -1016,138 +1134,21 @@ transcript: sec: 3735 time: '1:02:15' who: Ivan -description: 'Discover algorithmic trading & mean reversion: practical backtesting, - data APIs, risk management, model choices and trade execution to boost strategy - ROI.' -intro: 'How do you build, backtest, and deploy a robust mean-reversion algorithm without - falling prey to bad data or time‑series leakage? In this episode, Ivan Brigida — - Analytics Lead and creator of PythonInvest — draws on 10+ years in business intelligence, - econometrics, forecasting, machine learning and finance to answer that question. -

We walk through practical steps for algorithmic trading: choosing retail-friendly - data APIs (Yahoo, Quandl, Polygon), understanding market data formats like OHLCV - and adjusted close, and cleaning for data quality. Ivan explains mean reversion - strategy design, risk management fundamentals including stop‑loss and position sizing, - and rigorous backtesting methods—covering time‑series leakage and walk‑forward simulation. - He also breaks down prediction targets, feature engineering with time‑window statistics, - and model choices from logistic regression to XGBoost and neural networks, plus - approaches to explainability and evaluation metrics (ROI, precision, trading fees). - Finally, deployment options (cron, Airflow, APIs) and learning resources from PythonInvest - are discussed.

Listen to gain actionable guidance on backtesting, data - sources, risk controls, and machine learning techniques to move a mean‑reversion - idea toward a reproducible algorithmic trading workflow.' -dateadded: '2024-01-24' -duration: PT01H40S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=0 - endOffset: 95 -- name: 'Guest Introduction: Ivan Brigida — Analytics Lead & PythonInvest' - startOffset: 95 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=95 - endOffset: 128 -- name: 'Disclaimer: Financial discussion, not investment advice' - startOffset: 128 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=128 - endOffset: 233 -- name: Background & career trajectory from finance to analytics - startOffset: 233 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=233 - endOffset: 402 -- name: Google experience and role transitions - startOffset: 402 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=402 - endOffset: 449 -- name: Choosing individual contributor work over people management - startOffset: 449 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=449 - endOffset: 565 -- name: 'Investing interest: economics education to practical trading' - startOffset: 565 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=565 - endOffset: 707 -- name: Blogging & building a pet project to test strategies - startOffset: 707 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=707 - endOffset: 795 -- name: Financial data sources and APIs for retail investors (Yahoo, Quandl, Polygon) - startOffset: 795 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=795 - endOffset: 923 -- name: 'Market data format explained: OHLCV time series' - startOffset: 923 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=923 - endOffset: 1119 -- name: Adjusted close and data quality considerations - startOffset: 1119 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1119 - endOffset: 1187 -- name: 'Mean reversion strategy: concept and application' - startOffset: 1187 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1187 - endOffset: 1334 -- name: Risk management fundamentals and stop‑loss thresholds - startOffset: 1334 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1334 - endOffset: 1608 -- name: Backtesting methodology and avoiding time‑series data leakage - startOffset: 1608 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1608 - endOffset: 1784 -- name: 'Walk‑forward simulation: weekly predictions and selection rules' - startOffset: 1784 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=1784 - endOffset: 2115 -- name: Trade execution and position sizing for algorithmic strategies - startOffset: 2115 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2115 - endOffset: 2304 -- name: 'Discipline: sticking to strategy vs emotional trading' - startOffset: 2304 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2304 - endOffset: 2451 -- name: 'Evaluation metrics: ROI, precision focus, and trading fees impact' - startOffset: 2451 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2451 - endOffset: 2619 -- name: 'Prediction target definition: binary growth thresholds (e.g., 5%)' - startOffset: 2619 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2619 - endOffset: 2755 -- name: 'Feature engineering: time‑window stats and handcrafted indicators' - startOffset: 2755 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2755 - endOffset: 2882 -- name: 'Model choices: logistic regression, XGBoost, NN for stock prediction' - startOffset: 2882 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2882 - endOffset: 2998 -- name: 'Explainability: feature importance and model debugging' - startOffset: 2998 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=2998 - endOffset: 3106 -- name: 'Deployment options: cron, Airflow, APIs and partial automation' - startOffset: 3106 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3106 - endOffset: 3305 -- name: 'Learning pathways: MLOps, ML Zoomcamp, and practical projects' - startOffset: 3305 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3305 - endOffset: 3449 -- name: 'PythonInvest content: API guides, models, portfolio allocation stories' - startOffset: 3449 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3449 - endOffset: 3666 -- name: Course plans, sign‑up, and community building - startOffset: 3666 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3666 - endOffset: 3696 -- name: Episode Wrap‑up and final reminder (not financial advice) - startOffset: 3696 - url: https://www.youtube.com/watch?v=NThHAEIazFk&t=3696 - endOffset: 3640 ---- +context: 'Context: This episode follows Ivan Brigida’s path from finance to analytics + and walks listeners step-by-step through the practical craft of retail algorithmic + investing — covering data sources and quality, time-series market formats, strategy + ideas (like mean reversion), rigorous backtesting and walk-forward validation, risk + management and execution, feature engineering and model choice, explainability, + deployment, and learning resources. + Core: The unifying idea is that successful retail algorithmic trading is built like + an engineering pipeline — start with clean, well-understood data; define precise + prediction targets; design simple, interpretable models and handcrafted features; + validate performance with rigorous, leakage-free backtests and walk-forward simulations; + embed strict risk controls and disciplined execution; and iterate toward partial + automation and reproducible deployment while treating the whole process as a continuous + learning project rather than a shortcut to quick profits.' +--- Links: * [Exploring Finance APIs](https://pythoninvest.com/long-read/exploring-finance-apis){:target="_blank"} diff --git a/_podcast/s05e01-mastering-algorithms-and-data-structures.md b/_podcast/algorithms-data-structures-for-engineers.md similarity index 96% rename from _podcast/s05e01-mastering-algorithms-and-data-structures.md rename to _podcast/algorithms-data-structures-for-engineers.md index 094b9bc7..205bbcf5 100644 --- a/_podcast/s05e01-mastering-algorithms-and-data-structures.md +++ b/_podcast/algorithms-data-structures-for-engineers.md @@ -1,12 +1,11 @@ --- -title: 'Practical Algorithms for Engineers: Bloom Filters, Approximate Nearest-Neighbor - & Performance' -short: Mastering Algorithms and Data Structures -guests: -- marcellolarocca -image: images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg +title: "Practical Algorithms for Engineers: Bloom Filters, Approximate Nearest-Neighbor & Performance" +short: "Mastering Algorithms and Data Structures" season: 5 episode: 1 +guests: +- marcellolarocca +image: images/podcast/algorithms-data-structures-for-engineers.jpg ids: youtube: RiQa-9LguW8 anchor: Mastering-Algorithms-and-Data-Structures---Marcello-La-Rocca-e16s7lf @@ -15,6 +14,131 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Mastering-Algorithms-and-Data-Structures---Marcello-La-Rocca-e16s7lf spotify: https://open.spotify.com/episode/5IM2Des1sjVIwrvB3dGoJN apple: https://podcasts.apple.com/us/podcast/mastering-algorithms-and-data-structures-marcello-la/id1541710331?i=1000534241523 + +description: "Learn Bloom filters, approximate nearest-neighbor and performance tuning to gain memory-efficient containment, fast vector search and practical profiling tips" +intro: "How do engineers choose and implement the right algorithm for memory, latency, and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com and author of Algorithms and Data Structures in Action, with experience at Twitter, Microsoft and Apple — walks through practical algorithmic solutions engineers can actually use in production. We focus on Bloom filters for memory-efficient containment checks (and real-world uses like crawlers, routing tables, and adtech device-ID targeting), and on approximate nearest-neighbour (ANN) strategies when KD-trees break down for high-dimensional data — covering R-trees, SS-trees, vector similarity, embeddings and Faiss. Along the way Marcello discusses core data structures, profiling and performance pitfalls, abstraction vs implementation trade-offs, cross-language serialization, and language performance choices (Python vs C++ and Cython). If you want actionable guidance — including when to trust libraries versus inspect internals, practical code in Java/JavaScript/Python, and study resources to get hands-on — this episode gives concrete patterns, trade-offs, and examples you can apply to improve search, recommendation, and large-scale systems performance" +topics: +- algorithms +- data structures +- software engineering +dateadded: 2021-09-05 + +duration: PT01H01M51S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=0 + endOffset: 111 +- name: 'Guest Intro: Marcello La Rocca and book announcement' + startOffset: 111 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=111 + endOffset: 191 +- name: 'Career Path: web development to Twitter, Microsoft, Apple, Tundra' + startOffset: 191 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=191 + endOffset: 319 +- name: 'Learning Philosophy: focus on applications over formal proofs' + startOffset: 319 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=319 + endOffset: 450 +- name: 'Anecdote: mathematical proof vs practical innovation' + startOffset: 450 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=450 + endOffset: 563 +- name: 'Recommended Resources: MIT course, Tim Roughgarden, Grokking Algorithms' + startOffset: 563 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=563 + endOffset: 634 +- name: 'Core Data Structures: arrays, lists, sets, dictionaries, stacks, queues' + startOffset: 634 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=634 + endOffset: 737 +- name: 'Abstraction vs Implementation: APIs, performance trade-offs' + startOffset: 737 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=737 + endOffset: 957 +- name: 'Practicing Algorithms Outside Work: competitions and side projects' + startOffset: 957 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=957 + endOffset: 1154 +- name: 'Using Libraries & Profiling: spotting algorithmic wins in production' + startOffset: 1154 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1154 + endOffset: 1214 +- name: 'Performance Pitfalls: containment checks and wrong list usage' + startOffset: 1214 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1214 + endOffset: 1332 +- name: 'Data-Science Use Cases: Bloom filters and nearest-neighbour search' + startOffset: 1332 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1332 + endOffset: 1419 +- name: 'Book Overview: bridging theory and practical use cases' + startOffset: 1419 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1419 + endOffset: 1504 +- name: 'Book Structure: basics, nearest-neighbour & MapReduce, graphs & optimization' + startOffset: 1504 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1504 + endOffset: 1591 +- name: 'Prerequisites & Format: appendices, pseudocode, who the book is for' + startOffset: 1591 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1591 + endOffset: 1717 +- name: 'Code Repository: implementations in Java, JavaScript, Python (and more)' + startOffset: 1717 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1717 + endOffset: 1809 +- name: 'Bloom Filter Explained: memory-efficient containment with false positives' + startOffset: 1809 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1809 + endOffset: 2083 +- name: 'Bloom Filter Applications: crawlers, routing tables, marketing/adtech' + startOffset: 2083 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2083 + endOffset: 2159 +- name: 'Adtech Example: device IDs and returning-user targeting with Bloom filters' + startOffset: 2159 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2159 + endOffset: 2350 +- name: 'Nearest-Neighbour Need: KD-tree limits and high-dimensional data challenges' + startOffset: 2350 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2350 + endOffset: 2564 +- name: 'Approximate Nearest-Neighbour: R-trees, SS-trees for geolocation & logistics' + startOffset: 2564 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2564 + endOffset: 2686 +- name: 'Vector Similarity: embeddings, recommender systems, Faiss usage' + startOffset: 2686 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2686 + endOffset: 2867 +- name: 'Frameworks vs Internals: when to trust libraries and when to inspect them' + startOffset: 2867 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2867 + endOffset: 2992 +- name: 'Cross-language Compatibility: serializing Bloom filters and hash seeds' + startOffset: 2992 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2992 + endOffset: 3175 +- name: 'Tech Interviews: algorithm emphasis, balanced assessment approaches' + startOffset: 3175 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3175 + endOffset: 3533 +- name: 'Hands-on Learning: LeetCode, contests, open-source projects' + startOffset: 3533 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3533 + endOffset: 3639 +- name: 'Language Trade-offs: Python vs C++ and using Cython for performance' + startOffset: 3639 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3639 + endOffset: 3781 +- name: 'Closing: contact info and book links' + startOffset: 3781 + url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3781 + endOffset: 3711 + transcript: - header: Podcast Introduction - header: 'Guest Intro: Marcello La Rocca and book announcement' @@ -780,139 +904,6 @@ transcript: sec: 3822 time: '1:03:42' who: Alexey -description: Learn Bloom filters, approximate nearest-neighbor and performance tuning - to gain memory-efficient containment, fast vector search and practical profiling - tips. -intro: How do engineers choose and implement the right algorithm for memory, latency, - and scale? In this episode, Marcello La Rocca — senior software engineer at Tundra.com - and author of Algorithms and Data Structures in Action, with experience at Twitter, - Microsoft and Apple — walks through practical algorithmic solutions engineers can - actually use in production. We focus on Bloom filters for memory‑efficient containment - checks (and real-world uses like crawlers, routing tables, and adtech device-ID - targeting), and on approximate nearest‑neighbour (ANN) strategies when KD‑trees - break down for high‑dimensional data — covering R‑trees, SS‑trees, vector similarity, - embeddings and Faiss. Along the way Marcello discusses core data structures, profiling - and performance pitfalls, abstraction vs implementation trade‑offs, cross‑language - serialization, and language performance choices (Python vs C++ and Cython). If you - want actionable guidance — including when to trust libraries versus inspect internals, - practical code in Java/JavaScript/Python, and study resources to get hands‑on — - this episode gives concrete patterns, trade‑offs, and examples you can apply to - improve search, recommendation, and large‑scale systems performance. -dateadded: '2021-09-05' -duration: PT01H01M51S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=0 - endOffset: 111 -- name: 'Guest Intro: Marcello La Rocca and book announcement' - startOffset: 111 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=111 - endOffset: 191 -- name: 'Career Path: web development to Twitter, Microsoft, Apple, Tundra' - startOffset: 191 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=191 - endOffset: 319 -- name: 'Learning Philosophy: focus on applications over formal proofs' - startOffset: 319 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=319 - endOffset: 450 -- name: 'Anecdote: mathematical proof vs practical innovation' - startOffset: 450 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=450 - endOffset: 563 -- name: 'Recommended Resources: MIT course, Tim Roughgarden, Grokking Algorithms' - startOffset: 563 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=563 - endOffset: 634 -- name: 'Core Data Structures: arrays, lists, sets, dictionaries, stacks, queues' - startOffset: 634 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=634 - endOffset: 737 -- name: 'Abstraction vs Implementation: APIs, performance trade-offs' - startOffset: 737 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=737 - endOffset: 957 -- name: 'Practicing Algorithms Outside Work: competitions and side projects' - startOffset: 957 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=957 - endOffset: 1154 -- name: 'Using Libraries & Profiling: spotting algorithmic wins in production' - startOffset: 1154 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1154 - endOffset: 1214 -- name: 'Performance Pitfalls: containment checks and wrong list usage' - startOffset: 1214 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1214 - endOffset: 1332 -- name: 'Data-Science Use Cases: Bloom filters and nearest-neighbour search' - startOffset: 1332 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1332 - endOffset: 1419 -- name: 'Book Overview: bridging theory and practical use cases' - startOffset: 1419 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1419 - endOffset: 1504 -- name: 'Book Structure: basics, nearest-neighbour & MapReduce, graphs & optimization' - startOffset: 1504 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1504 - endOffset: 1591 -- name: 'Prerequisites & Format: appendices, pseudocode, who the book is for' - startOffset: 1591 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1591 - endOffset: 1717 -- name: 'Code Repository: implementations in Java, JavaScript, Python (and more)' - startOffset: 1717 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1717 - endOffset: 1809 -- name: 'Bloom Filter Explained: memory-efficient containment with false positives' - startOffset: 1809 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=1809 - endOffset: 2083 -- name: 'Bloom Filter Applications: crawlers, routing tables, marketing/adtech' - startOffset: 2083 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2083 - endOffset: 2159 -- name: 'Adtech Example: device IDs and returning-user targeting with Bloom filters' - startOffset: 2159 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2159 - endOffset: 2350 -- name: 'Nearest-Neighbour Need: KD-tree limits and high-dimensional data challenges' - startOffset: 2350 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2350 - endOffset: 2564 -- name: 'Approximate Nearest-Neighbour: R-trees, SS-trees for geolocation & logistics' - startOffset: 2564 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2564 - endOffset: 2686 -- name: 'Vector Similarity: embeddings, recommender systems, Faiss usage' - startOffset: 2686 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2686 - endOffset: 2867 -- name: 'Frameworks vs Internals: when to trust libraries and when to inspect them' - startOffset: 2867 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2867 - endOffset: 2992 -- name: 'Cross-language Compatibility: serializing Bloom filters and hash seeds' - startOffset: 2992 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=2992 - endOffset: 3175 -- name: 'Tech Interviews: algorithm emphasis, balanced assessment approaches' - startOffset: 3175 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3175 - endOffset: 3533 -- name: 'Hands-on Learning: LeetCode, contests, open-source projects' - startOffset: 3533 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3533 - endOffset: 3639 -- name: 'Language Trade-offs: Python vs C++ and using Cython for performance' - startOffset: 3639 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3639 - endOffset: 3781 -- name: 'Closing: contact info and book links' - startOffset: 3781 - url: https://www.youtube.com/watch?v=RiQa-9LguW8&t=3781 - endOffset: 3711 --- diff --git a/_podcast/s03e11-analytics-engineer.md b/_podcast/analytics-engineer-skills-tools.md similarity index 96% rename from _podcast/s03e11-analytics-engineer.md rename to _podcast/analytics-engineer-skills-tools.md index 0b016688..8eb99030 100644 --- a/_podcast/s03e11-analytics-engineer.md +++ b/_podcast/analytics-engineer-skills-tools.md @@ -1,11 +1,11 @@ --- -title: 'Master Analytics Engineering: Skills, Toolstack, Career Roadmap' -short: 'Analytics Engineer: New Role in a Data Team' -guests: -- victoriaperezmola -image: images/podcast/s03e11-analytics-engineer.jpg +title: "Master Analytics Engineering: Skills, Toolstack, Career Roadmap" +short: "Analytics Engineer: New Role in a Data Team" season: 3 episode: 11 +guests: +- victoriaperezmola +image: images/podcast/analytics-engineer-skills-tools.jpg ids: youtube: C5UcxBwdCEg anchor: Analytics-Engineer-New-Role-in-a-Data-Team---Victoria-Perez-Mola-e131e3n @@ -14,8 +14,115 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Analytics-Engineer-New-Role-in-a-Data-Team---Victoria-Perez-Mola-e131e3n spotify: https://open.spotify.com/episode/4rLQ5ulsYR9LqXxbFe2MlN apple: https://podcasts.apple.com/us/podcast/analytics-engineer-new-role-in-data-team-victoria-perez/id1541710331?i=1000526036141 + +description: "Master analytics engineering with dbt and data modeling: learn pipelines, testing, Snowflake basics and a clear career roadmap to advance your data career." +intro: "How do you become an effective analytics engineer and what skills, tools, and career steps matter most? In this episode, Victoria Perez Mola—born in Argentina, trained as a Systems Engineer and now an Analytics Engineer at Tier in Berlin—walks us through her move from ERP and finance reporting into analytics engineering. We cover daily responsibilities like data modeling, pipelines, data quality and Looker; the DBT workflow (SQL transformations, version control, tests, DAG); and a practical analytics toolstack including DBT, Snowflake, Adlib ETL and Looker. Victoria contrasts analytics engineer, data analyst and data engineer roles, explains role origins, and outlines typical job expectations such as pipeline ownership, auditing and dashboarding. She digs into core skills—SQL, dimensional modeling, Snowflake—strategies for handling bad data and schema changes with DBT macros and tests, and team structures from platform teams to embedded roles. Listen for a clear career roadmap, concrete learning resources (DBT tutorials and an 'Analytics readings' Notion list), and indicators of role fit if you enjoy modeling, data quality and engineering best practices" +topics: +- analytics engineering +dateadded: 2021-06-19 + +duration: PT00H49M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=0 + endOffset: 108 +- name: 'Guest Introduction: Victoria Perez Mola overview' + startOffset: 108 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=108 + endOffset: 165 +- name: 'Career Journey: Systems engineering, ERP & finance reporting' + startOffset: 165 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=165 + endOffset: 245 +- name: 'Daily Responsibilities: Data modeling, pipelines, data quality, Looker' + startOffset: 245 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=245 + endOffset: 409 +- name: 'DBT Overview: SQL transformations, version control, tests, DAG' + startOffset: 409 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=409 + endOffset: 604 +- name: 'Analytics Toolstack: DBT, Snowflake, Adlib ETL, Looker' + startOffset: 604 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=604 + endOffset: 708 +- name: 'Transition Story: From BI/ERP work to analytics engineering' + startOffset: 708 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=708 + endOffset: 874 +- name: 'Role Comparison: Analytics Engineer vs Data Analyst vs Data Engineer' + startOffset: 874 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=874 + endOffset: 1014 +- name: 'Role Origins & Purpose: Spotify, reducing analysts'' cleaning workload' + startOffset: 1014 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1014 + endOffset: 1252 +- name: 'Job Expectations: Example posting traits (pipelines, auditing, dashboards)' + startOffset: 1252 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1252 + endOffset: 1570 +- name: 'Core Skills: SQL, dimensional modeling, Snowflake and tooling variance' + startOffset: 1570 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1570 + endOffset: 1806 +- name: 'DBT Ecosystem: DBT''s role in the analytics engineer movement' + startOffset: 1806 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1806 + endOffset: 1869 +- name: 'Organizational Variability: Team setups and role definitions across companies' + startOffset: 1869 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1869 + endOffset: 1982 +- name: 'Cross-functional Collaboration: Working with analysts, data scientists, backend' + startOffset: 1982 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1982 + endOffset: 2204 +- name: 'Managing Bad Data & Schema Changes: DBT cleaning, macros, limitations' + startOffset: 2204 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2204 + endOffset: 2333 +- name: 'Data Testing Strategy: DBT tests, upstream checks, warnings vs errors' + startOffset: 2333 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2333 + endOffset: 2442 +- name: 'BI Roles vs Analytics Engineering: Overlaps with BI developer and analyst' + startOffset: 2442 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2442 + endOffset: 2525 +- name: 'Pathway to Analytics Engineering: Software practices, Kimball, DBT learning' + startOffset: 2525 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2525 + endOffset: 2619 +- name: 'Learning Resources: DBT tutorials and ''Analytics readings'' Notion list' + startOffset: 2619 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2619 + endOffset: 2692 +- name: 'Role Fit Signals: Enjoy modeling, quality, and best practices' + startOffset: 2692 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2692 + endOffset: 2788 +- name: 'Job Frustrations: Enforcing guidelines, ad-hoc firefights, limited raw control' + startOffset: 2788 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2788 + endOffset: 2916 +- name: 'Team Scale & Placement: Platform teams vs embedded analytics engineers' + startOffset: 2916 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2916 + endOffset: 3046 +- name: 'Data Documentation & Profiling: DBT docs strengths and profiling tools (Datafold, + Monte Carlo)' + startOffset: 3046 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3046 + endOffset: 3090 +- name: Episode Wrap-Up & Links + startOffset: 3090 + url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3090 + endOffset: 2949 + transcript: -- header: Podcast Introduction - header: 'Guest Introduction: Victoria Perez Mola overview' - line: This week, we'll talk about a new role in the data team. This role is the analytics engineer. We have a special guest today, Victoria. Victoria works as @@ -861,122 +968,6 @@ transcript: detailed. It has the code and it has dependencies. It's very easy to go from there and see what else you are going to affect if you touch something. who: Victoria -description: 'Master analytics engineering with dbt and data modeling: learn pipelines, - testing, Snowflake basics and a clear career roadmap to advance your data career.' -intro: How do you become an effective analytics engineer and what skills, tools, and - career steps matter most? In this episode, Victoria Perez Mola—born in Argentina, - trained as a Systems Engineer and now an Analytics Engineer at Tier in Berlin—walks - us through her move from ERP and finance reporting into analytics engineering. We - cover daily responsibilities like data modeling, pipelines, data quality and Looker; - the DBT workflow (SQL transformations, version control, tests, DAG); and a practical - analytics toolstack including DBT, Snowflake, Adlib ETL and Looker. Victoria contrasts - analytics engineer, data analyst and data engineer roles, explains role origins, - and outlines typical job expectations such as pipeline ownership, auditing and dashboarding. - She digs into core skills—SQL, dimensional modeling, Snowflake—strategies for handling - bad data and schema changes with DBT macros and tests, and team structures from - platform teams to embedded roles. Listen for a clear career roadmap, concrete learning - resources (DBT tutorials and an 'Analytics readings' Notion list), and indicators - of role fit if you enjoy modeling, data quality and engineering best practices. -dateadded: '2021-06-19' -duration: PT00H49M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=0 - endOffset: 108 -- name: 'Guest Introduction: Victoria Perez Mola overview' - startOffset: 108 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=108 - endOffset: 165 -- name: 'Career Journey: Systems engineering, ERP & finance reporting' - startOffset: 165 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=165 - endOffset: 245 -- name: 'Daily Responsibilities: Data modeling, pipelines, data quality, Looker' - startOffset: 245 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=245 - endOffset: 409 -- name: 'DBT Overview: SQL transformations, version control, tests, DAG' - startOffset: 409 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=409 - endOffset: 604 -- name: 'Analytics Toolstack: DBT, Snowflake, Adlib ETL, Looker' - startOffset: 604 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=604 - endOffset: 708 -- name: 'Transition Story: From BI/ERP work to analytics engineering' - startOffset: 708 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=708 - endOffset: 874 -- name: 'Role Comparison: Analytics Engineer vs Data Analyst vs Data Engineer' - startOffset: 874 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=874 - endOffset: 1014 -- name: 'Role Origins & Purpose: Spotify, reducing analysts'' cleaning workload' - startOffset: 1014 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1014 - endOffset: 1252 -- name: 'Job Expectations: Example posting traits (pipelines, auditing, dashboards)' - startOffset: 1252 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1252 - endOffset: 1570 -- name: 'Core Skills: SQL, dimensional modeling, Snowflake and tooling variance' - startOffset: 1570 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1570 - endOffset: 1806 -- name: 'DBT Ecosystem: DBT''s role in the analytics engineer movement' - startOffset: 1806 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1806 - endOffset: 1869 -- name: 'Organizational Variability: Team setups and role definitions across companies' - startOffset: 1869 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1869 - endOffset: 1982 -- name: 'Cross-functional Collaboration: Working with analysts, data scientists, backend' - startOffset: 1982 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=1982 - endOffset: 2204 -- name: 'Managing Bad Data & Schema Changes: DBT cleaning, macros, limitations' - startOffset: 2204 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2204 - endOffset: 2333 -- name: 'Data Testing Strategy: DBT tests, upstream checks, warnings vs errors' - startOffset: 2333 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2333 - endOffset: 2442 -- name: 'BI Roles vs Analytics Engineering: Overlaps with BI developer and analyst' - startOffset: 2442 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2442 - endOffset: 2525 -- name: 'Pathway to Analytics Engineering: Software practices, Kimball, DBT learning' - startOffset: 2525 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2525 - endOffset: 2619 -- name: 'Learning Resources: DBT tutorials and ''Analytics readings'' Notion list' - startOffset: 2619 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2619 - endOffset: 2692 -- name: 'Role Fit Signals: Enjoy modeling, quality, and best practices' - startOffset: 2692 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2692 - endOffset: 2788 -- name: 'Job Frustrations: Enforcing guidelines, ad-hoc firefights, limited raw control' - startOffset: 2788 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2788 - endOffset: 2916 -- name: 'Team Scale & Placement: Platform teams vs embedded analytics engineers' - startOffset: 2916 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=2916 - endOffset: 3046 -- name: 'Data Documentation & Profiling: DBT docs strengths and profiling tools (Datafold, - Monte Carlo)' - startOffset: 3046 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3046 - endOffset: 3090 -- name: Episode Wrap-Up & Links - startOffset: 3090 - url: https://www.youtube.com/watch?v=C5UcxBwdCEg&t=3090 - endOffset: 2949 --- diff --git a/_podcast/s03e02-from-analytics-to-data-science.md b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md similarity index 96% rename from _podcast/s03e02-from-analytics-to-data-science.md rename to _podcast/analytics-to-data-science-with-kaggle-portfolio.md index 409881e7..918bbcdd 100644 --- a/_podcast/s03e02-from-analytics-to-data-science.md +++ b/_podcast/analytics-to-data-science-with-kaggle-portfolio.md @@ -1,12 +1,11 @@ --- -title: 'Career Transition from Analytics to Data Science: Build a Kaggle Notebook - Portfolio, Learn Python & Get Hired' -short: Shifting Career from Analytics to Data Science -guests: -- andradaolteanu -image: images/podcast/s03e02-from-analytics-to-data-science.jpg +title: "Career Transition from Analytics to Data Science: Build a Kaggle Notebook Portfolio, Learn Python & Get Hired" +short: "Shifting Career from Analytics to Data Science" season: 3 episode: 2 +guests: +- andradaolteanu +image: images/podcast/analytics-to-data-science-with-kaggle-portfolio.jpg ids: youtube: ixmTewD5Waw anchor: Shifting-Career-from-Analytics-to-Data-Science---Andrada-Olteanu-ev19ma @@ -15,6 +14,101 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Shifting-Career-from-Analytics-to-Data-Science---Andrada-Olteanu-ev19ma spotify: https://open.spotify.com/episode/1GVuHJzqbcf2BvaLBTgsAL apple: https://podcasts.apple.com/us/podcast/shifting-career-from-analytics-to-data-science-andrada/id1541710331?i=1000517426368 + +description: "Build a Kaggle portfolio, learn Python to pivot from analytics to data science—hands-on notebooks, interview prep and hiring strategies to get hired" +intro: "How do you move from analytics into a hireable data science role by building a Kaggle notebook portfolio and learning Python fast? In this episode, Andrada Olteanu — Data Scientist at Endava, Kaggle Notebooks Master, and Z by HP & NVIDIA Data Science Ambassador — walks through her path from a statistics degree and data analyst role at Avon to a master’s in DS and a practical, project-driven transition.

We cover concrete steps: recommended courses like Jose Portilla’s “Python for Data Science & Machine Learning,” using Kaggle as your primary practice environment, and specific notebook work such as the Iowa House Prices project with hyperparameter tuning. Andrada explains how to translate academic dissertations into public notebooks, decompose and reimplement kernels to grow coding skills, and leverage mentorship (including connecting with Gabi Preda on Kaggle) during the job search. Listeners will also learn how to present work on Kaggle and GitHub, navigate interview expectations (algorithmic coding tests vs practical ML), and use LinkedIn/Twitter for networking.

If you’re building a Kaggle notebook portfolio, learning Python, and aiming for data science roles, this episode gives a practical, step-by-step roadmap." +topics: +- career transition +- analytics +- data science +dateadded: 2021-04-16 + +duration: PT01H02M21S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=0 + endOffset: 97 +- name: 'Episode Overview: Transitioning from Analytics to Data Science' + startOffset: 97 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=97 + endOffset: 130 +- name: 'Career Path: Statistics Degree → Avon Data Analyst → Master’s → Data Scientist' + startOffset: 130 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=130 + endOffset: 321 +- name: 'Recommended Course: Python for Data Science & Machine Learning (Jose Portilla, + Udemy)' + startOffset: 321 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=321 + endOffset: 498 +- name: 'Kaggle Introduction: First Encounters and Community Motivation' + startOffset: 498 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=498 + endOffset: 583 +- name: 'Kaggle Notebooks: Iowa House Prices, Hyperparameter Tuning & Model Improvement' + startOffset: 583 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=583 + endOffset: 866 +- name: 'Project-Based Learning: Kaggle as Primary Practice Environment' + startOffset: 866 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=866 + endOffset: 942 +- name: 'Translating Academic Work: Dissertation and Masters Projects to Public Notebooks' + startOffset: 942 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=942 + endOffset: 1089 +- name: 'Mentorship & Hiring: Connecting with Gabi Preda via Kaggle' + startOffset: 1089 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1089 + endOffset: 1405 +- name: 'Job Search Process: Timeline and Application Strategy' + startOffset: 1405 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1405 + endOffset: 1567 +- name: 'Interview Challenges: Algorithmic Coding Tests vs Practical ML Skills' + startOffset: 1567 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1567 + endOffset: 1934 +- name: 'Showcasing Work: Kaggle Notebooks, GitHub and Portfolio Impact' + startOffset: 1934 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1934 + endOffset: 2201 +- name: 'Transferable Analyst Skills: Data Validation, Domain Knowledge & EDA' + startOffset: 2201 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2201 + endOffset: 2509 +- name: 'Coding Growth Plan: Learn by Doing Competitions and Reproducing Notebooks' + startOffset: 2509 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2509 + endOffset: 2716 +- name: 'Learning Technique: Decompose Notebooks, Reimplement and Debug' + startOffset: 2716 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2716 + endOffset: 2967 +- name: 'Master’s Degree Value: Structured Curriculum vs Independent Study' + startOffset: 2967 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2967 + endOffset: 3174 +- name: 'Self-Paced Pivot: Udemy, Kaggle and YouTube Path to Data Science in ~1 Year' + startOffset: 3174 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3174 + endOffset: 3421 +- name: 'Kaggle Ecosystem: Notebooks, Datasets and Community Discussions' + startOffset: 3421 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3421 + endOffset: 3660 +- name: 'Networking Strategy: Use LinkedIn & Twitter to Showcase Projects and Build + Community' + startOffset: 3660 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3660 + endOffset: 3781 +- name: Episode Wrap-Up and Final Advice + startOffset: 3781 + url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3781 + endOffset: 3741 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Transitioning from Analytics to Data Science' @@ -871,108 +965,6 @@ transcript: sec: 3838 time: '1:03:58' who: Alexey -description: Build a Kaggle portfolio, learn Python to pivot from analytics to data - science—hands-on notebooks, interview prep and hiring strategies to get hired. -intro: 'How do you move from analytics into a hireable data science role by building - a Kaggle notebook portfolio and learning Python fast? In this episode, Andrada Olteanu - — Data Scientist at Endava, Kaggle Notebooks Master, and Z by HP & NVIDIA Data Science - Ambassador — walks through her path from a statistics degree and data analyst role - at Avon to a master’s in DS and a practical, project-driven transition.

- We cover concrete steps: recommended courses like Jose Portilla’s “Python for Data - Science & Machine Learning,” using Kaggle as your primary practice environment, - and specific notebook work such as the Iowa House Prices project with hyperparameter - tuning. Andrada explains how to translate academic dissertations into public notebooks, - decompose and reimplement kernels to grow coding skills, and leverage mentorship - (including connecting with Gabi Preda on Kaggle) during the job search. Listeners - will also learn how to present work on Kaggle and GitHub, navigate interview expectations - (algorithmic coding tests vs practical ML), and use LinkedIn/Twitter for networking. -

If you’re building a Kaggle notebook portfolio, learning Python, and aiming - for data science roles, this episode gives a practical, step-by-step roadmap.' -dateadded: '2021-04-16' -duration: PT01H02M21S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=0 - endOffset: 97 -- name: 'Episode Overview: Transitioning from Analytics to Data Science' - startOffset: 97 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=97 - endOffset: 130 -- name: 'Career Path: Statistics Degree → Avon Data Analyst → Master’s → Data Scientist' - startOffset: 130 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=130 - endOffset: 321 -- name: 'Recommended Course: Python for Data Science & Machine Learning (Jose Portilla, - Udemy)' - startOffset: 321 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=321 - endOffset: 498 -- name: 'Kaggle Introduction: First Encounters and Community Motivation' - startOffset: 498 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=498 - endOffset: 583 -- name: 'Kaggle Notebooks: Iowa House Prices, Hyperparameter Tuning & Model Improvement' - startOffset: 583 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=583 - endOffset: 866 -- name: 'Project-Based Learning: Kaggle as Primary Practice Environment' - startOffset: 866 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=866 - endOffset: 942 -- name: 'Translating Academic Work: Dissertation and Masters Projects to Public Notebooks' - startOffset: 942 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=942 - endOffset: 1089 -- name: 'Mentorship & Hiring: Connecting with Gabi Preda via Kaggle' - startOffset: 1089 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1089 - endOffset: 1405 -- name: 'Job Search Process: Timeline and Application Strategy' - startOffset: 1405 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1405 - endOffset: 1567 -- name: 'Interview Challenges: Algorithmic Coding Tests vs Practical ML Skills' - startOffset: 1567 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1567 - endOffset: 1934 -- name: 'Showcasing Work: Kaggle Notebooks, GitHub and Portfolio Impact' - startOffset: 1934 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=1934 - endOffset: 2201 -- name: 'Transferable Analyst Skills: Data Validation, Domain Knowledge & EDA' - startOffset: 2201 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2201 - endOffset: 2509 -- name: 'Coding Growth Plan: Learn by Doing Competitions and Reproducing Notebooks' - startOffset: 2509 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2509 - endOffset: 2716 -- name: 'Learning Technique: Decompose Notebooks, Reimplement and Debug' - startOffset: 2716 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2716 - endOffset: 2967 -- name: 'Master’s Degree Value: Structured Curriculum vs Independent Study' - startOffset: 2967 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=2967 - endOffset: 3174 -- name: 'Self-Paced Pivot: Udemy, Kaggle and YouTube Path to Data Science in ~1 Year' - startOffset: 3174 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3174 - endOffset: 3421 -- name: 'Kaggle Ecosystem: Notebooks, Datasets and Community Discussions' - startOffset: 3421 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3421 - endOffset: 3660 -- name: 'Networking Strategy: Use LinkedIn & Twitter to Showcase Projects and Build - Community' - startOffset: 3660 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3660 - endOffset: 3781 -- name: Episode Wrap-Up and Final Advice - startOffset: 3781 - url: https://www.youtube.com/watch?v=ixmTewD5Waw&t=3781 - endOffset: 3741 --- Links: diff --git a/_podcast/s20e07-build-strong-career-in-data.md b/_podcast/applied-llm-research-and-career-growth-in-practice.md similarity index 95% rename from _podcast/s20e07-build-strong-career-in-data.md rename to _podcast/applied-llm-research-and-career-growth-in-practice.md index c12e1cca..f7da283c 100644 --- a/_podcast/s20e07-build-strong-career-in-data.md +++ b/_podcast/applied-llm-research-and-career-growth-in-practice.md @@ -1,20 +1,122 @@ --- +title: "Applied LLM Research & Career Growth: Long-Context Evaluation, Prototyping & Industry Publishing" +short: "Build a Strong Career in Data" +season: 20 episode: 7 guests: - lavanyagupta +image: images/podcast/applied-llm-research-and-career-growth-in-practice.jpg ids: - anchor: atalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p + anchor: datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p youtube: ekG5zJioyFs -image: images/podcast/s20e07-build-strong-career-in-data.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61phttps://creators.spotify.com/pod/show/datatalksclub/episodes/Build-a-Strong-Career-in-Data---Lavanya-Gupta-e32k61p apple: https://podcasts.apple.com/us/podcast/build-a-strong-career-in-data-lavanya-gupta/id1541710331?i=1000706988972 spotify: https://open.spotify.com/episode/2mJXd0lSZFPKJA0ZrG9iS2 youtube: https://www.youtube.com/watch?v=ekG5zJioyFs -season: 20 -short: Build a Strong Career in Data -title: 'Benchmarking Long-Context LLMs for Finance: Chunking, Retrieval, Summarization - & Career Tips' +description: "Learn LLM research tactics, long-context evaluation approaches and prototyping tips to boost your career, publish industry work, and ship impactful models." +topics: +- LLMs +- NLP +- MLOps +- applied research +- career growth +intro: "How do you evaluate and prototype long-context LLMs in a real-world setting while advancing a career as an applied researcher? In this episode Lavanya Gupta — a Carnegie Mellon Language Technologies Institute alum and Sr. AI/ML Applied Scientist at JPMorgan Chase’s Machine Learning Center of Excellence — walks through practical strategies for applied LLM research and career growth. With 5+ years of industrial research experience, public talks at WiDS, PyData, TensorFlow User Group and reviewer roles for NeurIPS 2024, ICLR 2025 and NAACL 2025, Lavanya connects technical practice with professional development.

We cover core topics including long-context evaluation methodologies for transformer models, rapid prototyping workflows for LLM systems, and best practices for industry publishing and technical communication. Listeners will get actionable guidance on setting up reproducible experiments, balancing research rigor with product timelines, and positioning industry work for peer-reviewed venues. This episode is for machine learning engineers, NLP researchers, and applied scientists seeking concrete tactics for prototyping LLMs, conducting robust long-context evaluations, and growing a research-oriented career in industry." +dateadded: 2025-05-12 +duration: PT00H58M10S +quotableClips: +- name: Episode Introduction & Topic Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=0 + endOffset: 122 +- name: 'Career Overview: From Software Engineering to ML & Master''s' + startOffset: 122 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=122 + endOffset: 205 +- name: 'Origin of ML Interest: Hackathons and Computer Vision' + startOffset: 205 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=205 + endOffset: 295 +- name: 'Early Project Case Study: OCR for Organization Charts' + startOffset: 295 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=295 + endOffset: 523 +- name: 'Role Snapshot: LLM Benchmarking at a Financial Institution' + startOffset: 523 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=523 + endOffset: 615 +- name: 'Research Focus: Evaluating Long-Context LLMs' + startOffset: 615 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=615 + endOffset: 756 +- name: 'Empirical Findings: Context Window Performance Droparound 32k–64k' + startOffset: 756 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=756 + endOffset: 894 +- name: 'Practical Approach: Chunking, Retrieval, and Summarization for Large Docs' + startOffset: 894 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=894 + endOffset: 928 +- name: 'Published Work: "Long Context LLMs on Financial Concepts" (EMNLP)' + startOffset: 928 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=928 + endOffset: 1048 +- name: 'Industry Research Practices: Publishing from Corporate Teams' + startOffset: 1048 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1048 + endOffset: 1185 +- name: 'Motivation for Publications: Manager Support and Community Sharing' + startOffset: 1185 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1185 + endOffset: 1330 +- name: 'Dissemination Paths: arXiv, Endorsement, and Early Publications' + startOffset: 1330 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1330 + endOffset: 1501 +- name: 'Self-Learning & MLOps: Zoom Camps, Tutorials, and Mentoring' + startOffset: 1501 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1501 + endOffset: 1814 +- name: 'Rapid Prototyping Tools: Streamlit for Demos and Feedback' + startOffset: 1814 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1814 + endOffset: 2004 +- name: 'Kaggle Success Story: Building and Licensing a High-Impact Dataset' + startOffset: 2004 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2004 + endOffset: 2252 +- name: 'Community Contribution: Women in Data Science and Open Mentoring' + startOffset: 2252 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2252 + endOffset: 2473 +- name: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows" + startOffset: 2473 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2473 + endOffset: 2724 +- name: 'Career Pivot Guidance: Non-CS Backgrounds Entering Data Roles' + startOffset: 2724 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2724 + endOffset: 2908 +- name: 'Networking & Mentorship: Cold Outreach and Building Rapport' + startOffset: 2908 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2908 + endOffset: 3088 +- name: 'Portfolio Strategy: Community Visibility vs. Job-Targeted Projects' + startOffset: 3088 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3088 + endOffset: 3273 +- name: 'Interview Preparation: LeetCode, Conceptual Mastery, and Mock Interviews' + startOffset: 3273 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3273 + endOffset: 3416 +- name: 'Project Selection: Industry-Backed Work for Real-World Impact' + startOffset: 3416 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3416 + endOffset: 3466 +- name: Episode Wrap-Up & Final Career Advice + startOffset: 3466 + url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3466 + endOffset: 3490 transcript: - header: Episode Introduction & Topic Overview - line: This week we'll talk about building a strong career in data and we have a @@ -766,7 +868,7 @@ transcript: sec: 2403 time: '40:03' who: Alexey -- header: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows"' +- header: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows" - line: Yeah, definitely. I mentioned luck because, as you said, at that time-during COVID-people were really active on Kaggle. Maybe the timing was luck, but it wasn't a random decision to just get up one day and scrape Google Play Store. @@ -1055,120 +1157,12 @@ transcript: sec: 3490 time: '58:10' who: Alexey -description: Discover long-context LLMs, chunking and retrieval for finance benchmarking—learn - 32k–64k context limits, summarization tips, prototyping & career advice. -intro: How do you evaluate and deploy long-context LLMs for real-world financial documents—when - context windows stretch into tens of thousands of tokens? In this episode, Lavanya - Gupta, a CMU LTI alum and Sr. AI/ML Applied Scientist at JPMorgan Chase’s MLCOE, - walks through practical benchmarking and production strategies for long-context - LLMs in finance. Drawing on her published work "Long Context LLMs on Financial Concepts" - (EMNLP) and 5+ years of industrial research, Lavanya explains empirical findings - around context-window performance (a notable droparound 32k–64k), and outlines the - pragmatic trio of chunking, retrieval, and summarization for processing large documents. - She also discusses industry research practices—publishing from corporate teams, - dissemination via arXiv and endorsements—and rapid prototyping techniques like Streamlit - for demos and feedback. Listeners will get concrete guidance on LLM benchmarking, - context window trade-offs, dataset and licensing lessons from a Kaggle success, - and actionable career advice on transitioning into ML roles, networking, portfolios, - and interview prep. Tune in to learn how to benchmark long-context LLMs for financial - NLP and translate research into production-ready workflows. -dateadded: '2025-05-12' -duration: PT00H58M10S -quotableClips: -- name: Episode Introduction & Topic Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=0 - endOffset: 122 -- name: 'Career Overview: From Software Engineering to ML & Master''s' - startOffset: 122 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=122 - endOffset: 205 -- name: 'Origin of ML Interest: Hackathons and Computer Vision' - startOffset: 205 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=205 - endOffset: 295 -- name: 'Early Project Case Study: OCR for Organization Charts' - startOffset: 295 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=295 - endOffset: 523 -- name: 'Role Snapshot: LLM Benchmarking at a Financial Institution' - startOffset: 523 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=523 - endOffset: 615 -- name: 'Research Focus: Evaluating Long-Context LLMs' - startOffset: 615 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=615 - endOffset: 756 -- name: 'Empirical Findings: Context Window Performance Droparound 32k–64k' - startOffset: 756 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=756 - endOffset: 894 -- name: 'Practical Approach: Chunking, Retrieval, and Summarization for Large Docs' - startOffset: 894 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=894 - endOffset: 928 -- name: 'Published Work: "Long Context LLMs on Financial Concepts" (EMNLP)' - startOffset: 928 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=928 - endOffset: 1048 -- name: 'Industry Research Practices: Publishing from Corporate Teams' - startOffset: 1048 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1048 - endOffset: 1185 -- name: 'Motivation for Publications: Manager Support and Community Sharing' - startOffset: 1185 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1185 - endOffset: 1330 -- name: 'Dissemination Paths: arXiv, Endorsement, and Early Publications' - startOffset: 1330 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1330 - endOffset: 1501 -- name: 'Self-Learning & MLOps: Zoom Camps, Tutorials, and Mentoring' - startOffset: 1501 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1501 - endOffset: 1814 -- name: 'Rapid Prototyping Tools: Streamlit for Demos and Feedback' - startOffset: 1814 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=1814 - endOffset: 2004 -- name: 'Kaggle Success Story: Building and Licensing a High-Impact Dataset' - startOffset: 2004 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2004 - endOffset: 2252 -- name: 'Community Contribution: Women in Data Science and Open Mentoring' - startOffset: 2252 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2252 - endOffset: 2473 -- name: 'Opportunity & Persistence: Timing, Luck, and "Shooting Arrows"' - startOffset: 2473 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2473 - endOffset: 2724 -- name: 'Career Pivot Guidance: Non-CS Backgrounds Entering Data Roles' - startOffset: 2724 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2724 - endOffset: 2908 -- name: 'Networking & Mentorship: Cold Outreach and Building Rapport' - startOffset: 2908 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=2908 - endOffset: 3088 -- name: 'Portfolio Strategy: Community Visibility vs. Job-Targeted Projects' - startOffset: 3088 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3088 - endOffset: 3273 -- name: 'Interview Preparation: LeetCode, Conceptual Mastery, and Mock Interviews' - startOffset: 3273 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3273 - endOffset: 3416 -- name: 'Project Selection: Industry-Backed Work for Real-World Impact' - startOffset: 3416 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3416 - endOffset: 3466 -- name: Episode Wrap-Up & Final Career Advice - startOffset: 3466 - url: https://www.youtube.com/watch?v=ekG5zJioyFs&t=3466 - endOffset: 3490 +context: 'A practical, curiosity-driven bridge between research and engineering: relentlessly + iterate with hands-on prototyping, rigorous evaluation, and open dissemination to + solve real-world ML problems (ex: long-context LLMs), while leveraging community, + mentorship, and strategic projects to accelerate career growth and drive measurable + impact.' --- - Links: * [Linkedin](https://www.linkedin.com/in/lgupta18/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s17e04-bayesian-modeling-and-probabilistic-programming.md b/_podcast/bayesian-modeling-workflows-and-tools.md similarity index 95% rename from _podcast/s17e04-bayesian-modeling-and-probabilistic-programming.md rename to _podcast/bayesian-modeling-workflows-and-tools.md index c168f4a1..27a5d661 100644 --- a/_podcast/s17e04-bayesian-modeling-and-probabilistic-programming.md +++ b/_podcast/bayesian-modeling-workflows-and-tools.md @@ -1,20 +1,129 @@ --- +title: "Bayesian Modeling: PyMC, Stan and Probabilistic Programming Workflows" +short: "Bayesian Modeling and Probabilistic Programming" +season: 17 episode: 4 guests: - robzinkov +image: images/podcast/bayesian-modeling-workflows-and-tools.jpg ids: - anchor: atatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 + anchor: datatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 youtube: kcKvUSInm-M -image: images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Bayesian-Modeling-and-Probabilistic-Programming---Rob-Zinkov-e2dokr5 apple: https://podcasts.apple.com/us/podcast/bayesian-modeling-and-probabilistic-programming-rob/id1541710331?i=1000642253191 spotify: https://open.spotify.com/episode/5WUKDcTYv8ZvnqeHSQT7FF?si=K10siPBHQwmegCCXJ1VpIA youtube: https://www.youtube.com/watch?v=kcKvUSInm-M -season: 17 -short: Bayesian Modeling and Probabilistic Programming -title: 'Master Bayesian Modeling & Probabilistic Programming: MCMC, HMC/NUTS, Sampling - with Hakaru & PyMC' +description: "Discover Bayesian modeling with PyMC and Stan: learn priors, MCMC/HMC sampling, probabilistic programming workflows to build, debug and refine robust models." +topics: +- probabilistic programming +- bayesian statistics +- machine learning +- tools +intro: "How do you move from point estimates to full uncertainty-aware models and choose the right tools and workflows for Bayesian modeling? In this episode Rob Zinkov, a machine learning engineer and former Indiana University research scientist who led development of the Hakaru probabilistic programming language, walks through practical Bayesian workflows and tool choices. We cover the core challenge of encoding priors, likelihoods, and posteriors; why integrals become intractable and how numerical integration and sampling (MCMC, Hamiltonian Monte Carlo, NUTS) approximate expectations; and the trade-offs between probabilistic languages and libraries. Rob explains career lessons on moving from software engineering to ML research, the essential math (calculus, linear algebra, optimization), and self-study strategies for statistics. Concrete topics include PyMC examples (a rainfall model and computational graph), Stan’s advances in efficient sampling, composing hierarchical and spatial models, diagnosing multimodality and uncertainty, and automating model tasks with probabilistic programming (Hakaru). Listen to gain a clearer, practical understanding of Bayesian modeling, when to use PyMC vs Stan, how samplers work, and recommended resources to build your workflow." +dateadded: '2024-01-22' +duration: PT01H05M05S +quotableClips: +- name: Episode Introduction & Topic Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=0 + endOffset: 104 +- name: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming project' + startOffset: 104 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=104 + endOffset: 166 +- name: 'Career Journey: From software engineering to machine learning research' + startOffset: 166 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=166 + endOffset: 237 +- name: 'Industry vs Academia: Applying Bayesian tools in real problems' + startOffset: 237 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=237 + endOffset: 400 +- name: 'Transitioning Skills: Embracing calculus, integrals, and optimization' + startOffset: 400 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=400 + endOffset: 492 +- name: 'Core Technical Skills: Linear algebra and optimization for ML' + startOffset: 492 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=492 + endOffset: 572 +- name: 'Self-Study Path: Learning statistics without formal classes' + startOffset: 572 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=572 + endOffset: 887 +- name: 'Statistical Paradigms: Frequentist point estimates vs Bayesian distributions' + startOffset: 887 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=887 + endOffset: 1146 +- name: 'Bayesian Workflow: Priors, likelihoods, and posterior distributions' + startOffset: 1146 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1146 + endOffset: 1291 +- name: 'Bayesian Advantages: Composability and incremental model building' + startOffset: 1291 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1291 + endOffset: 1425 +- name: 'Probabilistic Programming: Automating Bayesian model tasks' + startOffset: 1425 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1425 + endOffset: 1469 +- name: 'Why Integrals Matter: Intractable integrals in probabilistic models' + startOffset: 1469 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1469 + endOffset: 1600 +- name: 'Numerical Integration: Sampling as an approximation technique' + startOffset: 1600 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1600 + endOffset: 1757 +- name: 'Samplers Overview: Using draws to estimate posterior expectations' + startOffset: 1757 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1757 + endOffset: 2028 +- name: 'MCMC Fundamentals: Markov chains and exploring high-probability regions' + startOffset: 2028 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2028 + endOffset: 2199 +- name: 'Probabilistic Languages: Hakaru’s role in generating samplers' + startOffset: 2199 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2199 + endOffset: 2378 +- name: 'Language vs Library: Model semantics, control flow, and ASTs' + startOffset: 2378 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2378 + endOffset: 2600 +- name: 'PyMC Example: Building a rainfall model and computational graph' + startOffset: 2600 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2600 + endOffset: 2890 +- name: 'Interpreting Posteriors: Model checks and iterative refinement' + startOffset: 2890 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2890 + endOffset: 3077 +- name: 'Encoding Dependencies: Spatial models and hierarchical structure' + startOffset: 3077 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3077 + endOffset: 3192 +- name: 'Multimodality & Uncertainty: Representing multiple plausible outcomes' + startOffset: 3192 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3192 + endOffset: 3341 +- name: 'Stan & HMC/NUTS: Advances in efficient sampling algorithms' + startOffset: 3341 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3341 + endOffset: 3647 +- name: 'Learning Resources: PyMC book, Statistical Rethinking course, and tutorials' + startOffset: 3647 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3647 + endOffset: 3953 +- name: 'Consulting & Contact: Rob’s statistical consulting and email' + startOffset: 3953 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3953 + endOffset: 3991 +- name: Episode Wrap-up, Links, and Next Steps + startOffset: 3991 + url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3991 + endOffset: 3905 transcript: - header: Episode Introduction & Topic Overview - header: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming @@ -178,7 +287,7 @@ transcript: sec: 557 time: '9:17' who: Alexey -- header: 'Self‑Study Path: Learning statistics without formal classes' +- header: 'Self-Study Path: Learning statistics without formal classes' - line: It was just dabbling. You're just sort of dabbling in the problems, you're reading about them. Because in some sense, if you're like, “Oh, I want to learn machine learning. Okay, I'm going to learn how to run a random forest or implement @@ -619,7 +728,7 @@ transcript: sec: 2023 time: '33:43' who: Alexey -- header: 'MCMC Fundamentals: Markov chains and exploring high‑probability regions' +- header: 'MCMC Fundamentals: Markov chains and exploring high-probability regions' - line: Our model essentially returns to us, “What's the probability the parameter has this value, given the data we return?” So we have these probabilities there. But we can't know in advance what the high probability regions are going to be. @@ -1169,7 +1278,7 @@ transcript: sec: 3986 time: '1:06:26' who: Rob -- header: Episode Wrap‑up, Links, and Next Steps +- header: Episode Wrap-up, Links, and Next Steps - line: Okay. We will also include the email in the description. I posted two links in the live chat – I will also post them in the description. I guess that's all for today. Thanks a lot, Rob, for joining us today. And thanks, everyone, for @@ -1185,129 +1294,20 @@ transcript: sec: 4009 time: '1:06:49' who: Alexey -description: Master Bayesian modeling, MCMC/HMC/NUTS and probabilistic programming - with Hakaru & PyMC—learn sampling, priors, posteriors and practical model building. -intro: 'How do you move from point estimates to full Bayesian models and pick the - right sampler for real problems? In this episode, Rob Zinkov — machine learning engineer, - data scientist, and former lead developer of the Hakaru probabilistic programming - language — walks through mastering Bayesian modeling and probabilistic programming, - focusing on practical tools like MCMC, HMC/NUTS, sampling, Hakaru, and PyMC.

- We cover the core Bayesian workflow: priors, likelihoods, and posterior distributions; - why integrals become intractable and how numerical integration via sampling approximates - expectations; and the fundamentals of Markov chain Monte Carlo for exploring high‑probability - regions. Rob contrasts frequentist point estimates with Bayesian distributions, - explains composability and incremental model building in probabilistic languages, - and discusses language vs library design and Hakaru’s role in generating samplers. - You’ll hear a concrete PyMC rainfall model example, strategies for interpreting - posteriors, encoding spatial and hierarchical dependencies, and handling multimodality - and uncertainty. The episode closes with practical learning resources (PyMC book, - Statistical Rethinking) to support your self‑study. Tune in to get actionable guidance - on building, sampling, and refining Bayesian models.' -dateadded: '2024-01-22' -duration: PT01H05M05S -quotableClips: -- name: Episode Introduction & Topic Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=0 - endOffset: 104 -- name: 'Guest Introduction: Rob Zinkov and the Hakaru probabilistic programming project' - startOffset: 104 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=104 - endOffset: 166 -- name: 'Career Journey: From software engineering to machine learning research' - startOffset: 166 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=166 - endOffset: 237 -- name: 'Industry vs Academia: Applying Bayesian tools in real problems' - startOffset: 237 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=237 - endOffset: 400 -- name: 'Transitioning Skills: Embracing calculus, integrals, and optimization' - startOffset: 400 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=400 - endOffset: 492 -- name: 'Core Technical Skills: Linear algebra and optimization for ML' - startOffset: 492 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=492 - endOffset: 572 -- name: 'Self‑Study Path: Learning statistics without formal classes' - startOffset: 572 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=572 - endOffset: 887 -- name: 'Statistical Paradigms: Frequentist point estimates vs Bayesian distributions' - startOffset: 887 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=887 - endOffset: 1146 -- name: 'Bayesian Workflow: Priors, likelihoods, and posterior distributions' - startOffset: 1146 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1146 - endOffset: 1291 -- name: 'Bayesian Advantages: Composability and incremental model building' - startOffset: 1291 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1291 - endOffset: 1425 -- name: 'Probabilistic Programming: Automating Bayesian model tasks' - startOffset: 1425 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1425 - endOffset: 1469 -- name: 'Why Integrals Matter: Intractable integrals in probabilistic models' - startOffset: 1469 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1469 - endOffset: 1600 -- name: 'Numerical Integration: Sampling as an approximation technique' - startOffset: 1600 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1600 - endOffset: 1757 -- name: 'Samplers Overview: Using draws to estimate posterior expectations' - startOffset: 1757 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=1757 - endOffset: 2028 -- name: 'MCMC Fundamentals: Markov chains and exploring high‑probability regions' - startOffset: 2028 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2028 - endOffset: 2199 -- name: 'Probabilistic Languages: Hakaru’s role in generating samplers' - startOffset: 2199 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2199 - endOffset: 2378 -- name: 'Language vs Library: Model semantics, control flow, and ASTs' - startOffset: 2378 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2378 - endOffset: 2600 -- name: 'PyMC Example: Building a rainfall model and computational graph' - startOffset: 2600 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2600 - endOffset: 2890 -- name: 'Interpreting Posteriors: Model checks and iterative refinement' - startOffset: 2890 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=2890 - endOffset: 3077 -- name: 'Encoding Dependencies: Spatial models and hierarchical structure' - startOffset: 3077 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3077 - endOffset: 3192 -- name: 'Multimodality & Uncertainty: Representing multiple plausible outcomes' - startOffset: 3192 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3192 - endOffset: 3341 -- name: 'Stan & HMC/NUTS: Advances in efficient sampling algorithms' - startOffset: 3341 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3341 - endOffset: 3647 -- name: 'Learning Resources: PyMC book, Statistical Rethinking course, and tutorials' - startOffset: 3647 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3647 - endOffset: 3953 -- name: 'Consulting & Contact: Rob’s statistical consulting and email' - startOffset: 3953 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3953 - endOffset: 3991 -- name: Episode Wrap‑up, Links, and Next Steps - startOffset: 3991 - url: https://www.youtube.com/watch?v=kcKvUSInm-M&t=3991 - endOffset: 3905 ---- +context: 'Context: This episode centers on Rob Zinkov and the Hakaru probabilistic + programming project, tracing his career shift into Bayesian machine learning, contrasting + tools (Hakaru, PyMC, Stan), and practical techniques (priors, likelihoods, sampling, + MCMC/HMC/NUTS) alongside the skills and learning resources needed to apply them. + Central narrative: Probabilistic programming and the Bayesian workflow offer a practical, + composable way to bring honest uncertainty quantification into real-world problems + by turning statistical models into executable programs—models you can build incrementally, + check, and refine. The core unifying idea is that encoding assumptions as programs + makes intractable integrals manageable through numerical approximation (sampling + and MCMC), lets you compose and reuse model parts, and shifts modeling toward an + iterative, testable practice; doing this effectively requires foundational math + and a mindset of principled model-building rather than chasing point estimates.' +--- Links: * [Book 1](https://bayesiancomputationbook.com/welcome.html){:target="_blank"} diff --git a/_podcast/s16e09-become-data-freelancer.md b/_podcast/becoming-data-freelancer.md similarity index 96% rename from _podcast/s16e09-become-data-freelancer.md rename to _podcast/becoming-data-freelancer.md index 25280ce4..c929bddf 100644 --- a/_podcast/s16e09-become-data-freelancer.md +++ b/_podcast/becoming-data-freelancer.md @@ -1,20 +1,141 @@ --- +title: "Becoming a Data Freelancer: Pricing, Client Acquisition and Contract Strategy" +short: "Become a Data Freelancer" +season: 16 episode: 9 guests: - dimitrivisnadi -date: 2025-11-07 +image: images/podcast/becoming-data-freelancer.jpg ids: - anchor: atatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 + anchor: datatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 youtube: R_EnSa9aZtE -image: images/podcast/s16e09-become-data-freelancer.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Become-a-Data-Freelancer---Dimitri-Visnadi-e2cslo2 apple: https://podcasts.apple.com/us/podcast/become-a-data-freelancer-dimitri-visnadi/id1541710331?i=1000637962993 spotify: https://open.spotify.com/episode/5OJfRiQ64JtLUmIkvadohg?si=uUEdvZwARN2hVGEfz73URg youtube: https://www.youtube.com/watch?v=R_EnSa9aZtE -season: 16 -short: Become a Data Freelancer -title: 'Launch Your Data Freelancer Career: Pricing, Outreach, Contracts & Risk' +description: "Master data freelancer pricing, client acquisition and contract strategy—learn rate benchmarking, outreach tactics, client vetting and runway planning." +topics: +- freelance +- consulting +- data analytics +- career transition +- career growth +- data science + +intro: "How do you move from corporate analytics to independent data consulting while pricing services fairly, finding steady clients, and avoiding contract pitfalls? In this episode Dimitri Visnadi — an independent data consultant who has advised brands like Unilever, Ferrero, Heineken and Red Bull and who trained at UCL and HP — walks through the practical realities of becoming a data freelancer.

We cover the full transition: career pivoting from marketing to data, early startup and corporate analytics experience, and the decision to resign and pursue freelancing. Dimitri breaks down client acquisition tactics (cold outreach, recruiter channels, proactive self-marketing), pricing strategy (platforms vs direct contracting, rate benchmarking, project pricing), and contract strategy (dependent contractor risk, platform terms vs direct agreements, subcontracting). He also addresses vetting clients, payment delays, financial runway recommendations, common pitfalls like mispositioning and mispricing, and learning resources including the Data Freelancer newsletter.

Listen to get actionable guidance on pricing, client acquisition, contract formats, and the business practices that help sustain a freelance data consultancy." +dateadded: 2023-12-09 +date: 2025-11-07 +duration: PT00H59M49S +quotableClips: +- name: Podcast Introduction + startOffset: 91 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=91 + endOffset: 140 +- name: 'Career Path: From Marketing to Data' + startOffset: 140 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=140 + endOffset: 184 +- name: 'Startup Experience: Translation, SQL & User Analysis' + startOffset: 184 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=184 + endOffset: 343 +- name: 'Corporate Analytics: Hewlett Packard Sales BI & KPIs' + startOffset: 343 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=343 + endOffset: 458 +- name: 'Education & Transition: UCL Master''s to Data Scientist' + startOffset: 458 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=458 + endOffset: 548 +- name: 'Consulting Exposure: Google Partner & Consulting Foundations' + startOffset: 548 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=548 + endOffset: 685 +- name: 'Motivation to Freelance: Frustration, Freedom & Purpose' + startOffset: 685 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=685 + endOffset: 809 +- name: 'Making the Leap: Resignation, Outreach & Early Leads' + startOffset: 809 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=809 + endOffset: 953 +- name: 'Market Research: Cold Outreach to Established Freelancers' + startOffset: 953 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=953 + endOffset: 1042 +- name: 'Practical Setup: Mentors, Registration & Logistics' + startOffset: 1042 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1042 + endOffset: 1100 +- name: 'Recruiter Channels: Engaging Agencies Before Launch' + startOffset: 1100 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1100 + endOffset: 1270 +- name: 'Contracting Risks: Dependent Contractor & Legal Considerations' + startOffset: 1270 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1270 + endOffset: 1524 +- name: 'Pricing Strategy: Platforms, Recruiters & Rate Benchmarking' + startOffset: 1524 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1524 + endOffset: 1927 +- name: 'Client Relationships: Building Referrals & Long-Term Pipeline' + startOffset: 1927 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1927 + endOffset: 2024 +- name: 'Proactive Outreach: Creative Self-Marketing Tactics' + startOffset: 2024 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2024 + endOffset: 2137 +- name: 'Independence & Offerings: Freelance Business Model Explained' + startOffset: 2137 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2137 + endOffset: 2230 +- name: 'Contract Formats: Platform Terms vs Direct Agreements' + startOffset: 2230 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2230 + endOffset: 2330 +- name: 'Direct Client Work: Project Pricing, Subcontracting & Cutting the Middleman' + startOffset: 2330 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2330 + endOffset: 2621 +- name: 'Vetting Clients: Ratings, Company Research & Payment Assurance' + startOffset: 2621 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2621 + endOffset: 2785 +- name: 'Payment Challenges: Corporate Bureaucracy & Delays' + startOffset: 2785 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2785 + endOffset: 2905 +- name: 'Transition Paths: Weekend, Part-Time & Full-Time Approaches' + startOffset: 2905 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2905 + endOffset: 3040 +- name: 'Running the Business: Income Variability, Risk & Purpose' + startOffset: 3040 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3040 + endOffset: 3251 +- name: 'Financial Planning: Recommended Runway Before Quitting' + startOffset: 3251 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3251 + endOffset: 3301 +- name: 'Common Pitfalls: Mispositioning, Mispricing & False Expectations' + startOffset: 3301 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3301 + endOffset: 3454 +- name: 'Learning Resources: Marketing Courses, Mentors & Newsletters' + startOffset: 3454 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3454 + endOffset: 3651 +- name: 'Follow-Up: The Data Freelancer Newsletter & Contact Channels' + startOffset: 3651 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3651 + endOffset: 3667 +- name: Episode Wrap-Up + startOffset: 3667 + url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3667 + endOffset: 3589 transcript: - header: Podcast Introduction - line: This week, we'll talk about doing data freelancing. We have a very special @@ -1178,134 +1299,12 @@ transcript: sec: 3680 time: '1:01:20' who: Dimitri -intro: 'How do you move from corporate data roles into a sustainable freelance data - career while setting rates, winning clients, and managing legal risk? In this episode - Dimitri Visnadi — an independent data consultant who has advised brands like Unilever, - Ferrero, Heineken and Red Bull, worked in HP’s data teams and a Google‑partner consulting - firm, and holds an MSc in Business Analytics from UCL — walks through the practical - steps he used to launch The Data Freelancer.

We cover the full arc of transition: - career pivot and early outreach, market research and recruiter channels, pricing - strategy across platforms vs direct clients, subcontracting and cutting out middlemen, - and the contract risks around dependent contractor status. Dimitri also breaks down - client vetting, handling corporate payment delays, recommended runway before quitting, - and common pitfalls like mispositioning and mispricing. Listeners will leave with - concrete tactics for freelance data consulting — outreach scripts, benchmarking - approaches for rates, contract checkpoints, and resources (courses, mentors, newsletters) - to reduce risk and build a reliable pipeline. Ideal for aspiring data freelancers - seeking practical guidance on pricing, outreach, contracts and risk.' -description: 'Launch your data freelancer career: pricing, outreach & contracts tactics, - client vetting, legal risk and runway tips to win steady projects.' -dateadded: '2023-12-09' -duration: PT00H59M49S -quotableClips: -- name: Podcast Introduction - startOffset: 91 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=91 - endOffset: 140 -- name: 'Career Path: From Marketing to Data' - startOffset: 140 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=140 - endOffset: 184 -- name: 'Startup Experience: Translation, SQL & User Analysis' - startOffset: 184 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=184 - endOffset: 343 -- name: 'Corporate Analytics: Hewlett Packard Sales BI & KPIs' - startOffset: 343 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=343 - endOffset: 458 -- name: 'Education & Transition: UCL Master''s to Data Scientist' - startOffset: 458 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=458 - endOffset: 548 -- name: 'Consulting Exposure: Google Partner & Consulting Foundations' - startOffset: 548 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=548 - endOffset: 685 -- name: 'Motivation to Freelance: Frustration, Freedom & Purpose' - startOffset: 685 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=685 - endOffset: 809 -- name: 'Making the Leap: Resignation, Outreach & Early Leads' - startOffset: 809 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=809 - endOffset: 953 -- name: 'Market Research: Cold Outreach to Established Freelancers' - startOffset: 953 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=953 - endOffset: 1042 -- name: 'Practical Setup: Mentors, Registration & Logistics' - startOffset: 1042 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1042 - endOffset: 1100 -- name: 'Recruiter Channels: Engaging Agencies Before Launch' - startOffset: 1100 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1100 - endOffset: 1270 -- name: 'Contracting Risks: Dependent Contractor & Legal Considerations' - startOffset: 1270 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1270 - endOffset: 1524 -- name: 'Pricing Strategy: Platforms, Recruiters & Rate Benchmarking' - startOffset: 1524 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1524 - endOffset: 1927 -- name: 'Client Relationships: Building Referrals & Long-Term Pipeline' - startOffset: 1927 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=1927 - endOffset: 2024 -- name: 'Proactive Outreach: Creative Self-Marketing Tactics' - startOffset: 2024 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2024 - endOffset: 2137 -- name: 'Independence & Offerings: Freelance Business Model Explained' - startOffset: 2137 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2137 - endOffset: 2230 -- name: 'Contract Formats: Platform Terms vs Direct Agreements' - startOffset: 2230 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2230 - endOffset: 2330 -- name: 'Direct Client Work: Project Pricing, Subcontracting & Cutting the Middleman' - startOffset: 2330 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2330 - endOffset: 2621 -- name: 'Vetting Clients: Ratings, Company Research & Payment Assurance' - startOffset: 2621 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2621 - endOffset: 2785 -- name: 'Payment Challenges: Corporate Bureaucracy & Delays' - startOffset: 2785 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2785 - endOffset: 2905 -- name: 'Transition Paths: Weekend, Part-Time & Full-Time Approaches' - startOffset: 2905 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=2905 - endOffset: 3040 -- name: 'Running the Business: Income Variability, Risk & Purpose' - startOffset: 3040 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3040 - endOffset: 3251 -- name: 'Financial Planning: Recommended Runway Before Quitting' - startOffset: 3251 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3251 - endOffset: 3301 -- name: 'Common Pitfalls: Mispositioning, Mispricing & False Expectations' - startOffset: 3301 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3301 - endOffset: 3454 -- name: 'Learning Resources: Marketing Courses, Mentors & Newsletters' - startOffset: 3454 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3454 - endOffset: 3651 -- name: 'Follow-Up: The Data Freelancer Newsletter & Contact Channels' - startOffset: 3651 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3651 - endOffset: 3667 -- name: Episode Wrap-Up - startOffset: 3667 - url: https://www.youtube.com/watch?v=R_EnSa9aZtE&t=3667 - endOffset: 3589 +context: 'This episode centers on one clear idea: transitioning from employee to sustainable + data freelancer is not a leap of faith but a deliberate, staged business transformation—one + that combines technical credibility with market research, proactive outreach, sound + pricing and contract choices, client vetting, and financial/legal safeguards so + you can manage risk, build repeatable pipelines, and turn independence into a reliable, + purpose-driven career.' --- Links: diff --git a/_podcast/s06e05-post-doctoral-research.md b/_podcast/big-data-analytics-and-postdoc-research.md similarity index 96% rename from _podcast/s06e05-post-doctoral-research.md rename to _podcast/big-data-analytics-and-postdoc-research.md index 9a051905..f45fa345 100644 --- a/_podcast/s06e05-post-doctoral-research.md +++ b/_podcast/big-data-analytics-and-postdoc-research.md @@ -1,12 +1,11 @@ --- -title: 'Master Spatial Big Data Analytics: Nebula Stream Systems, Postdoc Mentoring - & PhD Tips' -short: 'Advancing Big Data Analytics: Post-Doctoral Research' -guests: -- elenitziritazacharatou -image: images/podcast/s06e05-post-doctoral-research.jpg +title: "Master Spatial Big Data Analytics: Nebula Stream Systems, Postdoc Mentoring & PhD Tips" +short: "Advancing Big Data Analytics: Post-Doctoral Research" season: 6 episode: 5 +guests: +- elenitziritazacharatou +image: images/podcast/big-data-analytics-and-postdoc-research.jpg ids: youtube: 7jgmIQGMhGE anchor: Advancing-Big-Data-Analytics-Post-Doctoral-Research---Eleni-Tzirita-Zacharatou-e1b6f41 @@ -15,8 +14,132 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Advancing-Big-Data-Analytics-Post-Doctoral-Research---Eleni-Tzirita-Zacharatou-e1b6f41 spotify: https://open.spotify.com/episode/6rgBSTPRvgNcJ7ouFyZmbH apple: https://podcasts.apple.com/us/podcast/advancing-big-data-analytics-post-doctoral-research/id1541710331?i=1000543884294 + +description: "Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, publishing, time-management and stream-processing tactics to boost your research." +intro: "How do you master spatial big data analytics while navigating the demands of postdoc research, systems building, and preparing for a PhD? In this episode, Eleni Tzirita-Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with a PhD from EPFL and award-winning work in data management — breaks down practical approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) and robust stream processing for IoT. We cover systems-driven research like the Nebula Stream and Agora infrastructure, spotting research trends via conferences and reviewing, and aligning academic work with industry needs. Eleni also outlines the postdoc role (mentoring, teaching, reviewing, dissemination), time management strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD or master’s thesis. Listeners will gain concrete guidance on research priorities beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, data cleaning evaluation challenges, and steps to increase diversity in CS. Tune in for actionable postdoc mentoring and PhD tips grounded in spatial big data and stream processing research" +topics: +- academia +- big data analytics +- tools +- data engineering +dateadded: 2021-12-05 + +duration: PT01H01M37S + +quotableClips: +- name: 'Guest Introduction: Eleni Tzirita-Zacharatou, postdoctoral researcher at + DIMA, TU Berlin' + startOffset: 73 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=73 + endOffset: 169 +- name: 'Academic Journey: Athens undergrad → EPFL PhD → Berlin postdoc' + startOffset: 169 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=169 + endOffset: 230 +- name: 'Spatial Big Data Analytics: Definitions, examples (GPS, trajectories, satellite + imagery)' + startOffset: 230 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=230 + endOffset: 356 +- name: 'Postdoc Role Overview: Research, mentoring, teaching, reviewing, dissemination' + startOffset: 356 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=356 + endOffset: 449 +- name: 'Time Management for Research: Focused days vs. multitasking' + startOffset: 449 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=449 + endOffset: 526 +- name: 'Publishing Realities: When research yields publishable outcomes' + startOffset: 526 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=526 + endOffset: 603 +- name: 'Top Data Management Venues: VLDB, SIGMOD, ICDE' + startOffset: 603 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=603 + endOffset: 693 +- name: 'Postdoc vs PhD: Increased responsibility, mentoring, and leadership' + startOffset: 693 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=693 + endOffset: 1018 +- name: 'Mentoring Strategy: Advising BSc/MSc topics and evaluating PhD proposals' + startOffset: 1018 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1018 + endOffset: 1388 +- name: 'DIMA Research Programs: Nebula Stream (IoT/stream processing) and Agora infrastructure' + startOffset: 1388 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1388 + endOffset: 1455 +- name: 'System-Driven Research: From Apache Flink legacy to new Nebula stream systems' + startOffset: 1455 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1455 + endOffset: 1495 +- name: 'Spotting Research Trends: Conferences, reviewing, and community roadmaps' + startOffset: 1495 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1495 + endOffset: 1710 +- name: 'Industry Engagement: Interfacing academic research with industry needs' + startOffset: 1710 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1710 + endOffset: 1827 +- name: 'Peer Reviewing: Invitations, visibility, and networking in academic service' + startOffset: 1827 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1827 + endOffset: 2018 +- name: 'Beyond Performance Metrics: Usability, energy, adoption as research priorities' + startOffset: 2018 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2018 + endOffset: 2181 +- name: 'Data Cleaning Research: Automation challenges and evaluation difficulties' + startOffset: 2181 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2181 + endOffset: 2320 +- name: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross-domain + work' + startOffset: 2320 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2320 + endOffset: 2470 +- name: 'Facilitating Cross-Group Collaboration: Physical spaces and informal interactions' + startOffset: 2470 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2470 + endOffset: 2657 +- name: 'Preparing for PhD Applications: Field choice, research quality, and AI/ML + trends' + startOffset: 2657 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2657 + endOffset: 2826 +- name: 'Master''s Thesis Selection: Advertised topics, mentor fit, internships and + skill alignment' + startOffset: 2826 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2826 + endOffset: 3127 +- name: 'Deciding on a PhD: Trial research in Master’s or internships before committing' + startOffset: 3127 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3127 + endOffset: 3299 +- name: 'PhD Expectations: Publication requirements and top-conference pressure' + startOffset: 3299 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3299 + endOffset: 3319 +- name: 'Increasing Female Participation in CS: Early outreach, role models, institutional + support' + startOffset: 3319 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3319 + endOffset: 3651 +- name: 'Personal Reflections: Stereotypes, belonging, and career persistence' + startOffset: 3651 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3651 + endOffset: 3714 +- name: 'Contact and Follow-Up: DIMA page and email for questions' + startOffset: 3714 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3714 + endOffset: 3722 +- name: Episode Conclusion and Thanks + startOffset: 3722 + url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3722 + endOffset: 3697 + transcript: -- header: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at +- header: 'Guest Introduction: Eleni Tzirita-Zacharatou, postdoctoral researcher at DIMA, TU Berlin' - line: This week, we'll talk about doing postdoctoral research. We have a special guest today, Eleni. Eleni is a postdoctoral researcher at the DIMA Group at TU @@ -382,7 +505,7 @@ transcript: sec: 1439 time: '23:59' who: Alexey -- header: 'System‑Driven Research: From Apache Flink legacy to new Nebula stream systems' +- header: 'System-Driven Research: From Apache Flink legacy to new Nebula stream systems' - line: Not really. Basically, in a sense, this ‘nebula stream’ system is kind of the next Flick, you could say. This is representative of how the DIMA group works. Before there was Flink and a lot of researchers were working on different problems @@ -633,7 +756,7 @@ transcript: sec: 2293 time: '38:13' who: Alexey -- header: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross‑domain +- header: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross-domain work' - line: Yeah. I have some experience working with people that are not in Data Management. In general, there are connections – it's easy to find connections – from data @@ -667,7 +790,7 @@ transcript: sec: 2434 time: '40:34' who: Alexey -- header: 'Facilitating Cross‑Group Collaboration: Physical spaces and informal interactions' +- header: 'Facilitating Cross-Group Collaboration: Physical spaces and informal interactions' - line: Yeah, that's a good point. Maybe now it has improved, I would say. Actually, there is one issue that I find to be a problem concerning where the groups are, which is the building itself. I think it's actually not so great that the DIMA @@ -862,7 +985,7 @@ transcript: sec: 3267 time: '54:27' who: Alexey -- header: 'PhD Expectations: Publication requirements and top‑conference pressure' +- header: 'PhD Expectations: Publication requirements and top-conference pressure' - line: Yeah, different groups have different requirements in terms of how many papers you are expected to publish? But yeah, in general, it's always at least one. Broadly speaking, at DIMA, it’s typically three and they have to be at top conferences. @@ -980,7 +1103,7 @@ transcript: sec: 3710 time: '1:01:50' who: Alexey -- header: 'Contact and Follow‑Up: DIMA page and email for questions' +- header: 'Contact and Follow-Up: DIMA page and email for questions' - line: This information is up to date. There is my email on my website – I guess that's the easiest way. sec: 3714 @@ -1022,137 +1145,6 @@ transcript: sec: 3770 time: '1:02:50' who: Eleni -description: 'Discover Spatial Big Data, Nebula Stream & postdoc mentoring: PhD tips, - publishing, time-management and stream-processing tactics to boost your research.' -intro: How do you master spatial big data analytics while navigating the demands of - postdoc research, systems building, and preparing for a PhD? In this episode, Eleni - Tzirita‑Zacharatou — a postdoctoral researcher at the DIMA Group, TU Berlin, with - a PhD from EPFL and award‑winning work in data management — breaks down practical - approaches to spatial big data analytics (GPS traces, trajectories, satellite imagery) - and robust stream processing for IoT. We cover systems‑driven research like the - Nebula Stream and Agora infrastructure, spotting research trends via conferences - and reviewing, and aligning academic work with industry needs. Eleni also outlines - the postdoc role (mentoring, teaching, reviewing, dissemination), time management - strategies, realities of publishing and top venues (VLDB, SIGMOD, ICDE), mentoring - tactics for BSc/MSc/PhD students, and advice on choosing and preparing for a PhD - or master’s thesis. Listeners will gain concrete guidance on research priorities - beyond raw performance (usability, energy, adoption), multidisciplinary collaboration, - data cleaning evaluation challenges, and steps to increase diversity in CS. Tune - in for actionable postdoc mentoring and PhD tips grounded in spatial big data and - stream processing research. -dateadded: '2021-12-05' -duration: PT01H01M37S -quotableClips: -- name: 'Guest Introduction: Eleni Tzirita‑Zacharatou, postdoctoral researcher at - DIMA, TU Berlin' - startOffset: 73 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=73 - endOffset: 169 -- name: 'Academic Journey: Athens undergrad → EPFL PhD → Berlin postdoc' - startOffset: 169 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=169 - endOffset: 230 -- name: 'Spatial Big Data Analytics: Definitions, examples (GPS, trajectories, satellite - imagery)' - startOffset: 230 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=230 - endOffset: 356 -- name: 'Postdoc Role Overview: Research, mentoring, teaching, reviewing, dissemination' - startOffset: 356 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=356 - endOffset: 449 -- name: 'Time Management for Research: Focused days vs. multitasking' - startOffset: 449 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=449 - endOffset: 526 -- name: 'Publishing Realities: When research yields publishable outcomes' - startOffset: 526 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=526 - endOffset: 603 -- name: 'Top Data Management Venues: VLDB, SIGMOD, ICDE' - startOffset: 603 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=603 - endOffset: 693 -- name: 'Postdoc vs PhD: Increased responsibility, mentoring, and leadership' - startOffset: 693 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=693 - endOffset: 1018 -- name: 'Mentoring Strategy: Advising BSc/MSc topics and evaluating PhD proposals' - startOffset: 1018 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1018 - endOffset: 1388 -- name: 'DIMA Research Programs: Nebula Stream (IoT/stream processing) and Agora infrastructure' - startOffset: 1388 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1388 - endOffset: 1455 -- name: 'System‑Driven Research: From Apache Flink legacy to new Nebula stream systems' - startOffset: 1455 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1455 - endOffset: 1495 -- name: 'Spotting Research Trends: Conferences, reviewing, and community roadmaps' - startOffset: 1495 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1495 - endOffset: 1710 -- name: 'Industry Engagement: Interfacing academic research with industry needs' - startOffset: 1710 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1710 - endOffset: 1827 -- name: 'Peer Reviewing: Invitations, visibility, and networking in academic service' - startOffset: 1827 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=1827 - endOffset: 2018 -- name: 'Beyond Performance Metrics: Usability, energy, adoption as research priorities' - startOffset: 2018 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2018 - endOffset: 2181 -- name: 'Data Cleaning Research: Automation challenges and evaluation difficulties' - startOffset: 2181 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2181 - endOffset: 2320 -- name: 'Multidisciplinary Collaboration: Remote sensing, neuroscience, and cross‑domain - work' - startOffset: 2320 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2320 - endOffset: 2470 -- name: 'Facilitating Cross‑Group Collaboration: Physical spaces and informal interactions' - startOffset: 2470 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2470 - endOffset: 2657 -- name: 'Preparing for PhD Applications: Field choice, research quality, and AI/ML - trends' - startOffset: 2657 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2657 - endOffset: 2826 -- name: 'Master''s Thesis Selection: Advertised topics, mentor fit, internships and - skill alignment' - startOffset: 2826 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=2826 - endOffset: 3127 -- name: 'Deciding on a PhD: Trial research in Master’s or internships before committing' - startOffset: 3127 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3127 - endOffset: 3299 -- name: 'PhD Expectations: Publication requirements and top‑conference pressure' - startOffset: 3299 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3299 - endOffset: 3319 -- name: 'Increasing Female Participation in CS: Early outreach, role models, institutional - support' - startOffset: 3319 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3319 - endOffset: 3651 -- name: 'Personal Reflections: Stereotypes, belonging, and career persistence' - startOffset: 3651 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3651 - endOffset: 3714 -- name: 'Contact and Follow‑Up: DIMA page and email for questions' - startOffset: 3714 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3714 - endOffset: 3722 -- name: Episode Conclusion and Thanks - startOffset: 3722 - url: https://www.youtube.com/watch?v=7jgmIQGMhGE&t=3722 - endOffset: 3697 --- Links: diff --git a/_podcast/s04e03-big-data-engineer-vs-data-scientist.md b/_podcast/big-data-engineer-vs-data-scientist.md similarity index 97% rename from _podcast/s04e03-big-data-engineer-vs-data-scientist.md rename to _podcast/big-data-engineer-vs-data-scientist.md index 74a9dec4..5dccb708 100644 --- a/_podcast/s04e03-big-data-engineer-vs-data-scientist.md +++ b/_podcast/big-data-engineer-vs-data-scientist.md @@ -1,11 +1,11 @@ --- -title: 'Big Data Engineer vs Data Scientist: Skills, Tools, and Career Paths' -short: Big Data Engineer vs Data Scientist -guests: -- roksolanadiachuk -image: images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg +title: "Big Data Engineer vs Data Scientist: Skills, Tools, and Career Paths" +short: "Big Data Engineer vs Data Scientist" season: 4 episode: 3 +guests: +- roksolanadiachuk +image: images/podcast/big-data-engineer-vs-data-scientist.jpg ids: youtube: yg3d1lFd7Uo anchor: Big-Data-Engineer-vs-Data-Scientist---Roksolana-Diachuk-e139sl8 @@ -14,6 +14,150 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Big-Data-Engineer-vs-Data-Scientist---Roksolana-Diachuk-e139sl8 spotify: https://open.spotify.com/episode/08Mb5JOOo6sWOFgsXILVsj apple: https://podcasts.apple.com/us/podcast/big-data-engineer-vs-data-scientist-roksolana-diachuk/id1541710331?i=1000528386609 + +description: "Discover how Big Data Engineer vs Data Scientist roles differ — skills, performance optimization, ETL pipelines and ML deployment tips to advance your career" +intro: "How do the day-to-day responsibilities and skill sets really differ between a Big Data Engineer and a Data Scientist—and what should you learn to move between those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her career transition from backend Java into big data engineering and R&D.

We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala and Parquet formats—plus performance tuning: Spark job optimization, cluster resource planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries (data cleaning and feature engineering for data scientists vs pipeline design and formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning with Delta Lake, observability, documentation, starter projects and learning resources.

Listen to learn which skills, tools and projects will help you choose or transition between careers, and what to prioritize when building scalable data pipelines, deploying models, and ensuring data quality." +topics: +- career transition +- software engineering +- data engineering +- data science +dateadded: 2021-07-10 + +duration: PT01H01M27S + +quotableClips: +- name: Episode Overview & Guest Introduction + startOffset: 112 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=112 + endOffset: 148 +- name: 'Career Path: From Backend Java to Big Data Engineering (Scala, R&D, Captify)' + startOffset: 148 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=148 + endOffset: 266 +- name: 'Core Responsibilities: Building ETL Data Pipelines, HDFS/S3, Impala' + startOffset: 266 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=266 + endOffset: 398 +- name: 'Performance Focus: Spark Job Optimization & Cluster Resource Planning' + startOffset: 398 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=398 + endOffset: 438 +- name: 'Big Data Tooling: Spark, S3/HDFS, Kubernetes, Prometheus, Grafana, Scala + libs' + startOffset: 438 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=438 + endOffset: 484 +- name: 'Storytelling in Tech Talks: "Alice" Series and Conference Presentations' + startOffset: 484 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=484 + endOffset: 552 +- name: 'Role Comparison: Big Data Engineer vs Data Engineer (formats: Avro, Parquet, + ProtoBuf)' + startOffset: 552 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=552 + endOffset: 667 +- name: 'Essential Skills: Coding, SQL, Distributed Systems & Infrastructure Awareness' + startOffset: 667 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=667 + endOffset: 836 +- name: 'Data Scientist Scope: Data Cleaning, Feature Engineering, Model Cycle & Deployment' + startOffset: 836 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=836 + endOffset: 932 +- name: 'Tool Overlap: Spark & Python vs ML Libraries for Modeling' + startOffset: 932 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=932 + endOffset: 986 +- name: 'Collaboration Model: File Interfaces (Parquet) and Team Structures' + startOffset: 986 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=986 + endOffset: 1134 +- name: 'Case Study: Recommendation System — Streaming and Batch Pipeline Design' + startOffset: 1134 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1134 + endOffset: 1371 +- name: 'Streaming vs Batch Choices: Flink for Streaming, Spark for Batch, Parquet + on S3' + startOffset: 1371 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1371 + endOffset: 1420 +- name: 'ML Deployment Stack: MLflow, Kubeflow, Kubernetes & ML Engineer Roles' + startOffset: 1420 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1420 + endOffset: 1489 +- name: 'Cross-Skill Expectations: What Data Scientists Should Know About Pipelines' + startOffset: 1489 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1489 + endOffset: 1650 +- name: 'Upskilling for Engineers: Data Engineers Learning ML Inputs/Outputs (not + algorithms)' + startOffset: 1650 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1650 + endOffset: 1853 +- name: 'Transition Path: Analyst/Data Scientist → Data Engineer (coding, DBs, infra)' + startOffset: 1853 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1853 + endOffset: 2093 +- name: 'Databases to Learn: PostgreSQL, MySQL, MongoDB, Neo4j (SQL vs NoSQL)' + startOffset: 2093 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2093 + endOffset: 2167 +- name: 'Infrastructure Essentials: Docker, Cloud Services, Intro to Kubernetes' + startOffset: 2167 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2167 + endOffset: 2349 +- name: 'Data Quality & Monitoring: Flow Metrics, Spikes, and Schema Change Alerts' + startOffset: 2349 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2349 + endOffset: 2617 +- name: 'Data Documentation & Governance: Schema Descriptions, Confluence, HypeSQL' + startOffset: 2617 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2617 + endOffset: 2774 +- name: 'Software Engineering for Data Scientists: Code Quality, Reproducibility, + DB Skills' + startOffset: 2774 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2774 + endOffset: 2906 +- name: 'Hands-on Learning Resources: Katacoda, Google Codelabs, Databricks Trainings' + startOffset: 2906 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2906 + endOffset: 2969 +- name: 'Career Advice for Graduates: Choosing Data Engineering vs Data Science' + startOffset: 2969 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2969 + endOffset: 3076 +- name: 'Starter Projects: Word Count, Twitter Streaming, Elasticsearch + Kibana' + startOffset: 3076 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3076 + endOffset: 3208 +- name: 'Datasets for Practice: Wikipedia Dumps, CommonCrawl, NASA APIs, Social Media' + startOffset: 3208 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3208 + endOffset: 3368 +- name: 'Pre-built ETL Platforms vs Custom Pipelines: Trade-offs & Scalability' + startOffset: 3368 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3368 + endOffset: 3485 +- name: 'Operational Challenges: Deduplication, Historical Reprocessing, Risk Management' + startOffset: 3485 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3485 + endOffset: 3625 +- name: 'Data Versioning & Time Travel: Delta Lake for Reprocessing and Auditing' + startOffset: 3625 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3625 + endOffset: 3640 +- name: 'Learning Recommendations: Coursera Big Data Specialization; Spark & Data + books' + startOffset: 3640 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3640 + endOffset: 3754 +- name: 'Guest Links & Talks: Twitter, LinkedIn, YouTube (Alice & Kubernetes talks)' + startOffset: 3754 + url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3754 + endOffset: 3687 + transcript: - header: Episode Overview & Guest Introduction - line: Today we will talk about the difference between big data engineers and data @@ -1318,157 +1462,6 @@ transcript: sec: 3799 time: '1:03:19' who: Alexey -description: Discover how Big Data Engineer vs Data Scientist roles differ — skills, - performance optimization, ETL pipelines and ML deployment tips to advance your career. -intro: 'How do the day‑to‑day responsibilities and skill sets really differ between - a Big Data Engineer and a Data Scientist—and what should you learn to move between - those roles? In this episode, Roksolana Diachuk, a Big Data Engineer at Captify, - Women Who Code Kyiv lead and speaker on Scala and Kubernetes, walks through her - career transition from backend Java into big data engineering and R&D.

- We cover core responsibilities—building ETL data pipelines, HDFS/S3 storage, Impala - and Parquet formats—plus performance tuning: Spark job optimization, cluster resource - planning and monitoring with Prometheus/Grafana. Roksolana compares role boundaries - (data cleaning and feature engineering for data scientists vs pipeline design and - formats like Avro/Parquet/ProtoBuf), explores streaming vs batch tradeoffs (Flink - vs Spark), and outlines ML deployment stacks (MLflow, Kubeflow, Kubernetes). Practical - topics include databases to learn (Postgres, MySQL, MongoDB, Neo4j), data versioning - with Delta Lake, observability, documentation, starter projects and learning resources. -

Listen to learn which skills, tools and projects will help you choose or - transition between careers, and what to prioritize when building scalable data pipelines, - deploying models, and ensuring data quality.' -dateadded: '2021-07-10' -duration: PT01H01M27S -quotableClips: -- name: Episode Overview & Guest Introduction - startOffset: 112 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=112 - endOffset: 148 -- name: 'Career Path: From Backend Java to Big Data Engineering (Scala, R&D, Captify)' - startOffset: 148 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=148 - endOffset: 266 -- name: 'Core Responsibilities: Building ETL Data Pipelines, HDFS/S3, Impala' - startOffset: 266 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=266 - endOffset: 398 -- name: 'Performance Focus: Spark Job Optimization & Cluster Resource Planning' - startOffset: 398 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=398 - endOffset: 438 -- name: 'Big Data Tooling: Spark, S3/HDFS, Kubernetes, Prometheus, Grafana, Scala - libs' - startOffset: 438 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=438 - endOffset: 484 -- name: 'Storytelling in Tech Talks: "Alice" Series and Conference Presentations' - startOffset: 484 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=484 - endOffset: 552 -- name: 'Role Comparison: Big Data Engineer vs Data Engineer (formats: Avro, Parquet, - ProtoBuf)' - startOffset: 552 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=552 - endOffset: 667 -- name: 'Essential Skills: Coding, SQL, Distributed Systems & Infrastructure Awareness' - startOffset: 667 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=667 - endOffset: 836 -- name: 'Data Scientist Scope: Data Cleaning, Feature Engineering, Model Cycle & Deployment' - startOffset: 836 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=836 - endOffset: 932 -- name: 'Tool Overlap: Spark & Python vs ML Libraries for Modeling' - startOffset: 932 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=932 - endOffset: 986 -- name: 'Collaboration Model: File Interfaces (Parquet) and Team Structures' - startOffset: 986 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=986 - endOffset: 1134 -- name: 'Case Study: Recommendation System — Streaming and Batch Pipeline Design' - startOffset: 1134 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1134 - endOffset: 1371 -- name: 'Streaming vs Batch Choices: Flink for Streaming, Spark for Batch, Parquet - on S3' - startOffset: 1371 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1371 - endOffset: 1420 -- name: 'ML Deployment Stack: MLflow, Kubeflow, Kubernetes & ML Engineer Roles' - startOffset: 1420 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1420 - endOffset: 1489 -- name: 'Cross-Skill Expectations: What Data Scientists Should Know About Pipelines' - startOffset: 1489 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1489 - endOffset: 1650 -- name: 'Upskilling for Engineers: Data Engineers Learning ML Inputs/Outputs (not - algorithms)' - startOffset: 1650 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1650 - endOffset: 1853 -- name: 'Transition Path: Analyst/Data Scientist → Data Engineer (coding, DBs, infra)' - startOffset: 1853 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=1853 - endOffset: 2093 -- name: 'Databases to Learn: PostgreSQL, MySQL, MongoDB, Neo4j (SQL vs NoSQL)' - startOffset: 2093 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2093 - endOffset: 2167 -- name: 'Infrastructure Essentials: Docker, Cloud Services, Intro to Kubernetes' - startOffset: 2167 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2167 - endOffset: 2349 -- name: 'Data Quality & Monitoring: Flow Metrics, Spikes, and Schema Change Alerts' - startOffset: 2349 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2349 - endOffset: 2617 -- name: 'Data Documentation & Governance: Schema Descriptions, Confluence, HypeSQL' - startOffset: 2617 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2617 - endOffset: 2774 -- name: 'Software Engineering for Data Scientists: Code Quality, Reproducibility, - DB Skills' - startOffset: 2774 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2774 - endOffset: 2906 -- name: 'Hands-on Learning Resources: Katacoda, Google Codelabs, Databricks Trainings' - startOffset: 2906 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2906 - endOffset: 2969 -- name: 'Career Advice for Graduates: Choosing Data Engineering vs Data Science' - startOffset: 2969 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=2969 - endOffset: 3076 -- name: 'Starter Projects: Word Count, Twitter Streaming, Elasticsearch + Kibana' - startOffset: 3076 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3076 - endOffset: 3208 -- name: 'Datasets for Practice: Wikipedia Dumps, CommonCrawl, NASA APIs, Social Media' - startOffset: 3208 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3208 - endOffset: 3368 -- name: 'Pre-built ETL Platforms vs Custom Pipelines: Trade-offs & Scalability' - startOffset: 3368 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3368 - endOffset: 3485 -- name: 'Operational Challenges: Deduplication, Historical Reprocessing, Risk Management' - startOffset: 3485 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3485 - endOffset: 3625 -- name: 'Data Versioning & Time Travel: Delta Lake for Reprocessing and Auditing' - startOffset: 3625 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3625 - endOffset: 3640 -- name: 'Learning Recommendations: Coursera Big Data Specialization; Spark & Data - books' - startOffset: 3640 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3640 - endOffset: 3754 -- name: 'Guest Links & Talks: Twitter, LinkedIn, YouTube (Alice & Kubernetes talks)' - startOffset: 3754 - url: https://www.youtube.com/watch?v=yg3d1lFd7Uo&t=3754 - endOffset: 3687 --- Links: diff --git a/_podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.md b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md similarity index 97% rename from _podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.md rename to _podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md index 68a0d4fa..588e027a 100644 --- a/_podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.md +++ b/_podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.md @@ -1,20 +1,127 @@ --- +title: "Actionable Biohacks to Boost Productivity: Sleep, Circadian Light, Dopamine & Habits" +short: "Biohacking for Data Scientists and ML Engineers" +season: 13 episode: 3 guests: - ruslanshchuchkin +image: images/podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.jpg ids: anchor: ow/datatalksclub/episodes/Biohacking-for-Data-Scientists-and-ML-Engineers---Ruslan-Shchuchkin-e1vpm1i youtube: uyxUBADZYpU -image: images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Biohacking-for-Data-Scientists-and-ML-Engineers---Ruslan-Shchuchkin-e1vpm1i apple: https://podcasts.apple.com/us/podcast/biohacking-for-data-scientists-and-ml-engineers/id1541710331?i=1000603633848 spotify: https://open.spotify.com/episode/6IuHKMK4CJdcVJNq9uQ9lm?si=PgXZHBCNSu21Nma1ToxGyQ youtube: https://www.youtube.com/watch?v=uyxUBADZYpU -season: 13 -short: Biohacking for Data Scientists and ML Engineers -title: 'Actionable Biohacks to Boost Productivity: Sleep, Circadian Light, Dopamine - & Habits' + +description: "Discover actionable biohacks for sleep and dopamine to boost productivity with 90-min cycles, morning light, habit tracking and energy-focused routines" +intro: "How do small, science-aligned biohacks actually move the needle on focus and productivity? In this episode, Ruslan Shchuchkin, a Berlin-based data scientist who transitioned from business/marketing into data science after experimenting with many techniques to stay focused, walks through practical, evidence-minded strategies for improving performance. We cover the root causes of procrastination and perfectionism, behavioral biohacking versus chemical interventions, and how dopamine-driven problem-solving fuels habits. Ruslan explains meditation and NSDR for prefrontal focus, morning sun and circadian light exposure to regulate cortisol and melatonin, and daylight lamps and wake lighting for low-daylight homes. He shares sleep planning tips based on 90-minute cycles, protein-forward nutrition for sustained focus, and caffeine timing trade-offs. You’ll also hear about habit tracking (logs and Notion dashboards), voluntary discomfort as a dopamine reset, failed experiments worth avoiding, safety considerations, and a simple prioritization framework. If you want actionable biohacks—sleep, circadian light, dopamine management, habit tracking, and meditation—to boost sustainable productivity, this episode offers concrete, practical steps grounded in real-world experience" +topics: +- biohacking +- productivity +dateadded: 2023-03-11 + +duration: PT00H57M58S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=0 + endOffset: 87 +- name: Episode Overview & Guest Introduction + startOffset: 87 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=87 + endOffset: 138 +- name: 'Career Journey: From Business/Marketing to Data Science' + startOffset: 138 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=138 + endOffset: 291 +- name: 'Procrastination & Perfectionism: Acceptance and Deadline Effects' + startOffset: 291 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=291 + endOffset: 416 +- name: 'Biohacking Defined: Behavioral Approaches vs. Chemical Interventions' + startOffset: 416 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=416 + endOffset: 571 +- name: 'Dopamine & Motivation: Problem-Solving Rewards and Habit Drivers' + startOffset: 571 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=571 + endOffset: 742 +- name: 'Meditation Benefits: Focus, Prefrontal Cortex, and Non-Sleep Deep Rest' + startOffset: 742 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=742 + endOffset: 1121 +- name: 'Light Exposure & Circadian Health: Morning Sun, Cortisol, Melatonin' + startOffset: 1121 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1121 + endOffset: 1336 +- name: 'Evolutionary Perspective: Simple Behavioral Biohacks for Productivity' + startOffset: 1336 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1336 + endOffset: 1574 +- name: 'Daylight Lamps & Wake Lighting: Alternatives for Low-Daylight Homes' + startOffset: 1574 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1574 + endOffset: 1670 +- name: 'Sleep Planning: 90-Minute Cycles and Alarm Timing Strategies' + startOffset: 1670 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1670 + endOffset: 1978 +- name: 'Nutrition for Focus: Protein Breakfasts, Lunch Effects, and Energy' + startOffset: 1978 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1978 + endOffset: 2287 +- name: 'Productivity Tracking: Logs, Notion Dashboards, and Self-Reflection' + startOffset: 2287 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2287 + endOffset: 2476 +- name: 'Failed Experiments: Intermittent Fasting, Cold Showers, and Limits' + startOffset: 2476 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2476 + endOffset: 2605 +- name: 'Voluntary Discomfort & Dopamine Resets: Stoic Challenges' + startOffset: 2605 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2605 + endOffset: 2747 +- name: 'Safety Considerations: Evidence-Based Biohacking and Medical Advice' + startOffset: 2747 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2747 + endOffset: 2832 +- name: 'Caffeine Strategy: Coffee, Timing, and Sleep Trade-Offs' + startOffset: 2832 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2832 + endOffset: 2901 +- name: 'Habit Tracking in Practice: Steps, Exercise, Hydration Metrics' + startOffset: 2901 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2901 + endOffset: 3051 +- name: 'Mindset Shifts: Gratitude, Prioritization, and Sustainable Goals' + startOffset: 3051 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3051 + endOffset: 3178 +- name: 'Stoicism Reading Recommendation: Meditations and Mental Models' + startOffset: 3178 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3178 + endOffset: 3225 +- name: 'Prioritization Framework: Focusing on Five Impactful Goals' + startOffset: 3225 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3225 + endOffset: 3321 +- name: 'Overcoming Perfectionism: Self-Compassion and Temporal Perspective' + startOffset: 3321 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3321 + endOffset: 3443 +- name: 'Resources for Learning Biohacking: Huberman Lab Podcast & Top Episodes' + startOffset: 3443 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3443 + endOffset: 3533 +- name: Key Takeaways, Next Steps, and Episode Close + startOffset: 3533 + url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3533 + endOffset: 3478 + transcript: - header: Podcast Introduction - header: Episode Overview & Guest Introduction @@ -1421,123 +1528,6 @@ transcript: sec: 3565 time: '59:25' who: Alexey -description: Discover actionable biohacks for sleep and dopamine to boost productivity - with 90-min cycles, morning light, habit tracking and energy-focused routines. -intro: How do small, science-aligned biohacks actually move the needle on focus and - productivity? In this episode, Ruslan Shchuchkin, a Berlin-based data scientist who - transitioned from business/marketing into data science after experimenting with - many techniques to stay focused, walks through practical, evidence-minded strategies - for improving performance. We cover the root causes of procrastination and perfectionism, - behavioral biohacking versus chemical interventions, and how dopamine-driven problem-solving - fuels habits. Ruslan explains meditation and NSDR for prefrontal focus, morning - sun and circadian light exposure to regulate cortisol and melatonin, and daylight - lamps and wake lighting for low-daylight homes. He shares sleep planning tips based - on 90-minute cycles, protein-forward nutrition for sustained focus, and caffeine - timing trade-offs. You’ll also hear about habit tracking (logs and Notion dashboards), - voluntary discomfort as a dopamine reset, failed experiments worth avoiding, safety - considerations, and a simple prioritization framework. If you want actionable biohacks—sleep, - circadian light, dopamine management, habit tracking, and meditation—to boost sustainable - productivity, this episode offers concrete, practical steps grounded in real-world - experience. -dateadded: '2023-03-11' -duration: PT00H57M58S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=0 - endOffset: 87 -- name: Episode Overview & Guest Introduction - startOffset: 87 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=87 - endOffset: 138 -- name: 'Career Journey: From Business/Marketing to Data Science' - startOffset: 138 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=138 - endOffset: 291 -- name: 'Procrastination & Perfectionism: Acceptance and Deadline Effects' - startOffset: 291 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=291 - endOffset: 416 -- name: 'Biohacking Defined: Behavioral Approaches vs. Chemical Interventions' - startOffset: 416 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=416 - endOffset: 571 -- name: 'Dopamine & Motivation: Problem-Solving Rewards and Habit Drivers' - startOffset: 571 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=571 - endOffset: 742 -- name: 'Meditation Benefits: Focus, Prefrontal Cortex, and Non-Sleep Deep Rest' - startOffset: 742 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=742 - endOffset: 1121 -- name: 'Light Exposure & Circadian Health: Morning Sun, Cortisol, Melatonin' - startOffset: 1121 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1121 - endOffset: 1336 -- name: 'Evolutionary Perspective: Simple Behavioral Biohacks for Productivity' - startOffset: 1336 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1336 - endOffset: 1574 -- name: 'Daylight Lamps & Wake Lighting: Alternatives for Low-Daylight Homes' - startOffset: 1574 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1574 - endOffset: 1670 -- name: 'Sleep Planning: 90-Minute Cycles and Alarm Timing Strategies' - startOffset: 1670 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1670 - endOffset: 1978 -- name: 'Nutrition for Focus: Protein Breakfasts, Lunch Effects, and Energy' - startOffset: 1978 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=1978 - endOffset: 2287 -- name: 'Productivity Tracking: Logs, Notion Dashboards, and Self-Reflection' - startOffset: 2287 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2287 - endOffset: 2476 -- name: 'Failed Experiments: Intermittent Fasting, Cold Showers, and Limits' - startOffset: 2476 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2476 - endOffset: 2605 -- name: 'Voluntary Discomfort & Dopamine Resets: Stoic Challenges' - startOffset: 2605 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2605 - endOffset: 2747 -- name: 'Safety Considerations: Evidence-Based Biohacking and Medical Advice' - startOffset: 2747 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2747 - endOffset: 2832 -- name: 'Caffeine Strategy: Coffee, Timing, and Sleep Trade-Offs' - startOffset: 2832 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2832 - endOffset: 2901 -- name: 'Habit Tracking in Practice: Steps, Exercise, Hydration Metrics' - startOffset: 2901 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=2901 - endOffset: 3051 -- name: 'Mindset Shifts: Gratitude, Prioritization, and Sustainable Goals' - startOffset: 3051 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3051 - endOffset: 3178 -- name: 'Stoicism Reading Recommendation: Meditations and Mental Models' - startOffset: 3178 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3178 - endOffset: 3225 -- name: 'Prioritization Framework: Focusing on Five Impactful Goals' - startOffset: 3225 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3225 - endOffset: 3321 -- name: 'Overcoming Perfectionism: Self-Compassion and Temporal Perspective' - startOffset: 3321 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3321 - endOffset: 3443 -- name: 'Resources for Learning Biohacking: Huberman Lab Podcast & Top Episodes' - startOffset: 3443 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3443 - endOffset: 3533 -- name: Key Takeaways, Next Steps, and Episode Close - startOffset: 3533 - url: https://www.youtube.com/watch?v=uyxUBADZYpU&t=3533 - endOffset: 3478 --- Links: diff --git a/_podcast/s22e03-from-biotechnology-to-bioinformatics-software.md b/_podcast/bioinformatics-worflows-tools-and-data-science.md similarity index 93% rename from _podcast/s22e03-from-biotechnology-to-bioinformatics-software.md rename to _podcast/bioinformatics-worflows-tools-and-data-science.md index d239051e..62777571 100644 --- a/_podcast/s22e03-from-biotechnology-to-bioinformatics-software.md +++ b/_podcast/bioinformatics-worflows-tools-and-data-science.md @@ -1,20 +1,126 @@ --- +title: "Bioinformatics Workflows in Practice: Sequencing, Metagenomics, and Open-Source Tools" +short: "Applying Data Science Concepts, Tools, and Workflows to Accelerate Biological Research" +season: 22 episode: 3 guests: - sebastianayalaruano +image: images/podcast/bioinformatics-worflows-tools-and-data-science.jpg ids: anchor: datatalksclub/episodes/From-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-RuanoFrom-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-Ruano-e39vsv6 youtube: ZFrcrTtnB1Q -image: images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-RuanoFrom-Biotechnology-to-Bioinformatics-Software---Sebastian-Ayala-Ruano-e39vsv6 apple: https://podcasts.apple.com/us/podcast/from-biotechnology-to-bioinformatics-software-sebastian/id1541710331?i=1000733347636 spotify: https://open.spotify.com/episode/3CohNIXZdooLYoIyIbr6EF youtube: https://www.youtube.com/watch?v=ZFrcrTtnB1Q -season: 22 -short: From Biotechnology to Bioinformatics Software -title: 'Wastewater Metagenomics & Knowledge Graphs: Network Inference, AlphaFold & - Open-Source Tools' +description: "Master bioinformatics workflows for sequencing & metagenomics with open-source tools, streamline pipelines, boost reproducibility, and speed analyses." +topics: +- bioinformatics +- LLMs +- MLOps +- open-source +- tools +intro: "How do you build reproducible, scalable bioinformatics workflows for sequencing and metagenomics using open-source tools? In this episode we explore practical answers with Sebastian Ayala Ruano, a bioinformatics software developer and Master's student in Systems Biology at Maastricht University. Sebastian has contributed to open-source projects such as MicW2Graph, VueGen, and VueCore to simplify multi-omics data analysis and has a background in cheminformatics, peptide discovery, and network-based analysis.

We discuss real-world sequencing and metagenomics workflows, trade-offs in pipeline design, and how open-source tools and educational software can accelerate reproducible research. Sebastian also outlines how machine learning and network science concepts inform analysis strategies for complex biological data. Key topics include sequencing data processing, metagenomic analysis approaches, workflow automation, and practical considerations for integrating multi-omics datasets.

Listeners will gain concrete guidance for designing bioinformatics pipelines, selecting open-source tools, and applying network- and ML-driven methods to improve interpretation. This episode is useful for researchers and developers wanting actionable perspectives on sequencing, metagenomics, and building reliable workflows backed by community tools and resources." +dateadded: 2025-10-27 +duration: PT00H55M13S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=0 + endOffset: 69 +- name: 'Career Transition: Biotechnology to Bioinformatics Software' + startOffset: 69 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=69 + endOffset: 221 +- name: 'Master’s Thesis Overview: Wastewater Microbiome Knowledge Graph' + startOffset: 221 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=221 + endOffset: 387 +- name: 'Bioinformatics Role: Reducing Lab Experiments with Computational Analysis' + startOffset: 387 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=387 + endOffset: 503 +- name: 'Wet Lab vs Dry Lab: Experimental Work vs Computational Pipelines' + startOffset: 503 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=503 + endOffset: 681 +- name: 'Bioinformatics as Data Science: From Sequencing to Analysis' + startOffset: 681 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=681 + endOffset: 755 +- name: 'Genomic Data Basics: Nucleotides and DNA Sequences' + startOffset: 755 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=755 + endOffset: 930 +- name: DNA Sequencing Workflow and Reference Genomes + startOffset: 930 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=930 + endOffset: 1076 +- name: 'Metagenomics: Environmental Sampling and Abundance Tables' + startOffset: 1076 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1076 + endOffset: 1181 +- name: 'Building Microbial Networks: Co-abundance and Association Inference' + startOffset: 1181 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1181 + endOffset: 1471 +- name: 'Network Inference Methodology: CC Lasso, Correlations, and Thresholding' + startOffset: 1471 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1471 + endOffset: 1626 +- name: 'Molecular Simulations: Protein–Ligand Dynamics and Water Boxes' + startOffset: 1626 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1626 + endOffset: 1798 +- name: 'Protein Folding Revolution: AlphaFold Impact on Structure Prediction' + startOffset: 1798 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1798 + endOffset: 2180 +- name: 'Open-Source Projects Overview: MCW2 Graph, VueGen, and VueCore' + startOffset: 2180 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2180 + endOffset: 2311 +- name: 'Knowledge Graph Exploration: Neo4j, Streamlit, and Graph Algorithms' + startOffset: 2311 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2311 + endOffset: 2400 +- name: 'Report Automation with VueGen: Quarto, Streamlit, and Export Formats' + startOffset: 2400 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2400 + endOffset: 2549 +- name: 'Package Ecosystem: Bioconda, Bioconductor, and Bioinformatics Libraries' + startOffset: 2549 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2549 + endOffset: 2636 +- name: 'Omics Visualization: VueCore for Genomics, Proteomics, and Metabolomics' + startOffset: 2636 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2636 + endOffset: 2708 +- name: 'Portfolio Advice: Beginner Bioinformatics Projects and Tools to Showcase' + startOffset: 2708 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2708 + endOffset: 2870 +- name: 'AI & LLMs in Bioinformatics: Documentation, MLOps, and Coding Assistants' + startOffset: 2870 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2870 + endOffset: 3025 +- name: 'Language Tradeoffs: R vs Python and Scaling Scientific Tools' + startOffset: 3025 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3025 + endOffset: 3113 +- name: 'Visualization Workflows: Viewer and Supporting Plotting Libraries' + startOffset: 3113 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3113 + endOffset: 3197 +- name: 'Remote Work & Field Life: Working from Ecuador and Nature Notes' + startOffset: 3197 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3197 + endOffset: 3250 +- name: 'Episode Wrap-up: Open-Source Encouragement and Closing Remarks' + startOffset: 3250 + url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3250 + endOffset: 3313 transcript: - header: Podcast Introduction - line: Hi everyone, welcome to our event. This event is brought to you by Data Talks @@ -338,7 +444,7 @@ transcript: sec: 1169 time: '19:29' who: Alexey -- header: 'Building Microbial Networks: Co‑abundance and Association Inference' +- header: 'Building Microbial Networks: Co-abundance and Association Inference' - line: Yes. In our case, we focused on wastewater treatment plants. We analyzed data from different locations because there were many available samples. sec: 1181 @@ -626,7 +732,7 @@ transcript: sec: 2173 time: '36:13' who: Alexey -- header: 'Open‑Source Projects Overview: MCW2 Graph, VueGen, and VueCore' +- header: 'Open-Source Projects Overview: MCW2 Graph, VueGen, and VueCore' - line: You worked on quite a few projects. The document mentions MCW2 Graph, VueGen, and VueCore. What are these projects and what do they do? sec: 2180 @@ -941,7 +1047,7 @@ transcript: sec: 3243 time: '54:03' who: Sebastian -- header: 'Episode Wrap‑up: Open‑Source Encouragement and Closing Remarks' +- header: 'Episode Wrap-up: Open-Source Encouragement and Closing Remarks' - line: That must be amazing. I actually have another event starting soon, so I need to go. Sebastian, thanks a lot. It was really nice talking to you. I learned many new things. I suspected proteins were important not just for the gym but for other @@ -962,127 +1068,15 @@ transcript: sec: 3313 time: '55:13' who: Sebastian -description: Discover wastewater metagenomics knowledge graphs & AlphaFold-driven - network inference using open-source bioinformatics tools to map microbes and cut - lab tests. -intro: How can wastewater metagenomics and knowledge graphs reveal microbial interactions - while reducing wet‑lab experiments? In this episode, Sebastian Ayala Ruano — a bioinformatics - software developer and Master’s student in Systems Biology — walks through his wastewater - microbiome knowledge graph thesis and open‑source tooling for multi‑omics analysis. -

We cover metagenomics workflows from sequencing and abundance tables to - building microbial networks with co‑abundance and association inference (CC Lasso, - correlations, thresholding), plus network inference best practices. Sebastian also - explains molecular simulations, protein–ligand dynamics and the practical impact - of AlphaFold on structure prediction. Hear about MCW2 Graph, VueGen and VueCore, - knowledge graph exploration with Neo4j and Streamlit, report automation (Quarto - exports), and the bioinformatics package ecosystem (Bioconda, Bioconductor).

- Listeners will gain actionable approaches for integrating wastewater metagenomics, - network science, and knowledge graphs, practical open‑source tools to automate analysis - and visualization, and guidance on project portfolios, language tradeoffs (R vs - Python), and applying AI/LLMs in bioinformatics workflows. Ideal for researchers - and engineers wanting to turn sequencing data into reproducible network models and - automated reports. -dateadded: '2025-10-27' -duration: PT00H55M13S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=0 - endOffset: 69 -- name: 'Career Transition: Biotechnology to Bioinformatics Software' - startOffset: 69 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=69 - endOffset: 221 -- name: 'Master’s Thesis Overview: Wastewater Microbiome Knowledge Graph' - startOffset: 221 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=221 - endOffset: 387 -- name: 'Bioinformatics Role: Reducing Lab Experiments with Computational Analysis' - startOffset: 387 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=387 - endOffset: 503 -- name: 'Wet Lab vs Dry Lab: Experimental Work vs Computational Pipelines' - startOffset: 503 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=503 - endOffset: 681 -- name: 'Bioinformatics as Data Science: From Sequencing to Analysis' - startOffset: 681 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=681 - endOffset: 755 -- name: 'Genomic Data Basics: Nucleotides and DNA Sequences' - startOffset: 755 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=755 - endOffset: 930 -- name: DNA Sequencing Workflow and Reference Genomes - startOffset: 930 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=930 - endOffset: 1076 -- name: 'Metagenomics: Environmental Sampling and Abundance Tables' - startOffset: 1076 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1076 - endOffset: 1181 -- name: 'Building Microbial Networks: Co‑abundance and Association Inference' - startOffset: 1181 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1181 - endOffset: 1471 -- name: 'Network Inference Methodology: CC Lasso, Correlations, and Thresholding' - startOffset: 1471 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1471 - endOffset: 1626 -- name: 'Molecular Simulations: Protein–Ligand Dynamics and Water Boxes' - startOffset: 1626 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1626 - endOffset: 1798 -- name: 'Protein Folding Revolution: AlphaFold Impact on Structure Prediction' - startOffset: 1798 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=1798 - endOffset: 2180 -- name: 'Open‑Source Projects Overview: MCW2 Graph, VueGen, and VueCore' - startOffset: 2180 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2180 - endOffset: 2311 -- name: 'Knowledge Graph Exploration: Neo4j, Streamlit, and Graph Algorithms' - startOffset: 2311 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2311 - endOffset: 2400 -- name: 'Report Automation with VueGen: Quarto, Streamlit, and Export Formats' - startOffset: 2400 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2400 - endOffset: 2549 -- name: 'Package Ecosystem: Bioconda, Bioconductor, and Bioinformatics Libraries' - startOffset: 2549 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2549 - endOffset: 2636 -- name: 'Omics Visualization: VueCore for Genomics, Proteomics, and Metabolomics' - startOffset: 2636 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2636 - endOffset: 2708 -- name: 'Portfolio Advice: Beginner Bioinformatics Projects and Tools to Showcase' - startOffset: 2708 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2708 - endOffset: 2870 -- name: 'AI & LLMs in Bioinformatics: Documentation, MLOps, and Coding Assistants' - startOffset: 2870 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=2870 - endOffset: 3025 -- name: 'Language Tradeoffs: R vs Python and Scaling Scientific Tools' - startOffset: 3025 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3025 - endOffset: 3113 -- name: 'Visualization Workflows: Viewer and Supporting Plotting Libraries' - startOffset: 3113 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3113 - endOffset: 3197 -- name: 'Remote Work & Field Life: Working from Ecuador and Nature Notes' - startOffset: 3197 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3197 - endOffset: 3250 -- name: 'Episode Wrap‑up: Open‑Source Encouragement and Closing Remarks' - startOffset: 3250 - url: https://www.youtube.com/watch?v=ZFrcrTtnB1Q&t=3250 - endOffset: 3313 +context: At its core this episode is about how building open, reproducible computational + infrastructure and workflows lets us translate messy biological data into scalable, + actionable insight—bridging wet lab and dry lab work so researchers can ask better + questions, run fewer experiments, and move faster. From genomics and metagenomics + pipelines to network inference, molecular simulation, knowledge graphs, visualization, + and AI assistants, the through-line is empowering scientists with accessible tools, + automation, and community-driven software that make complex biology interpretable, + shareable, and useful in the real world. --- - Links: * [LinkedIn](https://www.linkedin.com/in/sayalaruano/){:target="_blank"} diff --git a/_podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md similarity index 90% rename from _podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md rename to _podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md index 5ffb3858..a0844aac 100644 --- a/_podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.md +++ b/_podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.md @@ -1,20 +1,147 @@ --- +title: "Building Agentic AI Systems: Pragmatic Agent Engineering, Tooling, Retrieval & Evaluation" +short: "Building reliable AI products in the era of Gen AI and Agents" +season: 22 episode: 1 guests: - ranjithakulkarni +image: images/podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg ids: anchor: datatalksclub/episodes/Building-reliable-AI-products-in-the-era-of-Gen-AI-and-Agents---Ranjitha-Kulkarni-e396m2u youtube: x2AAjqz2XmM -image: images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Building-reliable-AI-products-in-the-era-of-Gen-AI-and-Agents---Ranjitha-Kulkarni-e396m2u apple: https://podcasts.apple.com/us/podcast/building-reliable-ai-products-in-the-era-of-gen/id1541710331?i=1000731199709 spotify: https://open.spotify.com/episode/7c22vqYNuNLKKYEfYGOos8?si=NBFT2e80S6WErW_tDDrijA youtube: https://www.youtube.com/watch?v=x2AAjqz2XmM -season: 22 -short: Building reliable AI products in the era of Gen AI and Agents -title: 'Build & Evaluate Autonomous LLM Agents: RAG, Orchestration, Context Engineering - & SRE' +description: "Discover agentic AI tactics: practical agent engineering and retrieval strategies to build robust autonomous systems, boost performance and ensure reliability." +topics: +- LLMs +- AI +- agent engineering +- retrieval-augmented generation +- MLOps +- tools +intro: "How do you build reliable, agentic AI systems that balance practical engineering, tooling, retrieval, and robust evaluation? In this episode Ranjitha Kulkarni, Staff Machine Learning Engineer at NeuBird.ai and former engineer on LLM- and agent-powered product features at Dropbox Dash and Microsoft, explores pragmatic approaches to agent design. Drawing on her work in speech recognition, language modeling, assistant evaluation, and publications on voice query reformulation and automatic online evaluation, Ranjitha discusses key elements of agent engineering: selecting and integrating tools, designing effective retrieval pipelines, and establishing meaningful evaluation metrics for intelligent assistants.

Listeners will get a grounded look at the trade-offs of agentic AI in real products, how retrieval strategies impact reasoning and performance, and practical evaluation frameworks to measure assistant behavior. If you’re building LLM-powered agents, improving tool use, or defining evaluation for agentic systems, this episode offers actionable perspectives rooted in production experience and research. Keywords: agentic AI, agent engineering, agent tooling, retrieval, agent evaluation, LLM-powered products." +dateadded: 2025-10-21 +duration: PT00H59M23S +quotableClips: +- name: Event Introduction & Community Links + startOffset: 0 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=0 + endOffset: 192 +- name: 'Early ML Projects: Image Search with OpenCV' + startOffset: 192 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=192 + endOffset: 265 +- name: Speech Recognition & Language Modeling Experience + startOffset: 265 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=265 + endOffset: 297 +- name: Transition to Recommendation Systems at Dropbox + startOffset: 297 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=297 + endOffset: 352 +- name: Question Answering & Early Agent Experiments + startOffset: 352 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=352 + endOffset: 464 +- name: 'Joining Noird.ai: Automating On-call with Agents' + startOffset: 464 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=464 + endOffset: 660 +- name: 'Agent Definition: Autonomy, Objectives & LLMs' + startOffset: 660 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=660 + endOffset: 751 +- name: 'Agent Orchestration: Tools, Memory & Knowledge Stores' + startOffset: 751 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=751 + endOffset: 910 +- name: 'Planning Strategies: Single-step, Multi-pass & Self-reflection' + startOffset: 910 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=910 + endOffset: 1103 +- name: 'Implementation Approaches: Prompts, SDKs & Tool Wrappers' + startOffset: 1103 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1103 + endOffset: 1198 +- name: 'Code Agents vs Natural-Language Agents: Trade-offs' + startOffset: 1198 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1198 + endOffset: 1281 +- name: 'Context Engineering: Designing Effective LLM Inputs' + startOffset: 1281 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1281 + endOffset: 1370 +- name: 'SRE Workflows Modeled by Agents: Logs, Metrics & Remediation' + startOffset: 1370 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1370 + endOffset: 1499 +- name: 'Integration Abstractions: Handling Diverse Tooling' + startOffset: 1499 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1499 + endOffset: 1770 +- name: 'RAG Reality Check: Latency, Cost & Garbage-In/Garbage-Out' + startOffset: 1770 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1770 + endOffset: 1898 +- name: 'Retrieval Limitations: Reworking Backends for LLM Context' + startOffset: 1898 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1898 + endOffset: 1968 +- name: 'Context Engineering Techniques: Chunking, Metadata & Wrappers' + startOffset: 1968 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1968 + endOffset: 2171 +- name: 'Agentic RAG: Using Retrieval as a Tool Within Agents' + startOffset: 2171 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2171 + endOffset: 2259 +- name: 'Use Cases: When RAG Is Enough vs When Agents Are Needed' + startOffset: 2259 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2259 + endOffset: 2430 +- name: 'Dynamic Planning Example: Calendar & Meeting Assistant' + startOffset: 2430 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2430 + endOffset: 2586 +- name: Dropbox Dash & AI Productivity Assistants for Enterprises + startOffset: 2586 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2586 + endOffset: 2648 +- name: 'Framework Choices: Build from Scratch vs Use Libraries' + startOffset: 2648 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2648 + endOffset: 2760 +- name: 'Framework Trade-offs: LangChain, OpenAI Agents SDK, Small Agents' + startOffset: 2760 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2760 + endOffset: 2880 +- name: Agent Marketplaces & Tool Protocols (MCP) + startOffset: 2880 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2880 + endOffset: 3077 +- name: 'Evaluation Strategy: Custom Datasets & System Benchmarks' + startOffset: 3077 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3077 + endOffset: 3200 +- name: 'Testing Agents: Mocking Tools, Integration & Regression Tests' + startOffset: 3200 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3200 + endOffset: 3362 +- name: 'Goal-based Evaluation: Outcome Assertions Over Exact Paths' + startOffset: 3362 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3362 + endOffset: 3491 +- name: 'Specialization Challenge: Why Generic Agent Solutions Lag' + startOffset: 3491 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3491 + endOffset: 3546 +- name: Closing Thoughts & Future Outlook for Agent Engineering + startOffset: 3546 + url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3546 + endOffset: 3563 transcript: - header: Event Introduction & Community Links - line: Hi everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -143,7 +270,7 @@ transcript: sec: 394 time: '6:34' who: Ranjitha -- header: 'Joining Noird.ai: Automating On‑call with Agents' +- header: 'Joining Noird.ai: Automating On-call with Agents' - line: After working on agents at Dropbox, I was drawn to Noird, where I am now. I’m fully immersed in the potential of these agents. We are trying to solve the problem of engineering on call, taking that away from users and letting agents @@ -279,7 +406,7 @@ transcript: sec: 904 time: '15:04' who: Ranjitha -- header: 'Planning Strategies: Single‑step, Multi‑pass & Self‑reflection' +- header: 'Planning Strategies: Single-step, Multi-pass & Self-reflection' - line: Would you agree with the definition that an agent is just an LLM with tools? sec: 910 time: '15:10' @@ -326,7 +453,7 @@ transcript: sec: 1146 time: '19:06' who: Ranjitha -- header: 'Code Agents vs Natural‑Language Agents: Trade‑offs' +- header: 'Code Agents vs Natural-Language Agents: Trade-offs' - line: Some agents plan in plain English, others in code so-called code agents. The choice depends on the task complexity. For natural language problems, natural language-based agents work. For very complex tasks with many steps and conditionals, @@ -497,7 +624,7 @@ transcript: sec: 1732 time: '28:52' who: Ranjitha -- header: 'RAG Reality Check: Latency, Cost & Garbage‑In/Garbage‑Out' +- header: 'RAG Reality Check: Latency, Cost & Garbage-In/Garbage-Out' - line: We still need to reduce the amount of noise that we put into an LLM’s context, and that’s what context engineering is. sec: 1770 @@ -734,7 +861,7 @@ transcript: sec: 2752 time: '45:52' who: Alexey -- header: 'Framework Trade‑offs: LangChain, OpenAI Agents SDK, Small Agents' +- header: 'Framework Trade-offs: LangChain, OpenAI Agents SDK, Small Agents' - line: LangChain has its uses, but I haven’t used it much for agents. Early on, it couldn’t handle ambiguity in natural language. It has improved and has new agents to experiment with. @@ -814,7 +941,7 @@ transcript: sec: 3313 time: '55:13' who: Alexey -- header: 'Goal‑based Evaluation: Outcome Assertions Over Exact Paths' +- header: 'Goal-based Evaluation: Outcome Assertions Over Exact Paths' - line: 'I wouldn’t evaluate each path too strictly because LLMs can accomplish the same goal differently. Tool calls must consult the true source. For example, two ways exist to find a skip level: directly or by traversing an org chart. Both @@ -851,146 +978,35 @@ transcript: sec: 3563 time: '59:23' who: Alexey -description: Build autonomous LLM agents with RAG, orchestration & context engineering - - master SRE automation, testing, evaluation metrics and latency/cost tradeoffs. -intro: 'How do you build and evaluate truly autonomous LLM agents that balance retrieval, - orchestration, and real-world SRE needs? In this episode, Ranjitha Gurunath Kulkarni - — Staff ML Engineer at NeuBird.ai with earlier LLM and assistant work at Dropbox - and Microsoft and an LTI master’s from Carnegie Mellon — walks through practical - engineering trade-offs for autonomous LLM agents and retrieval-augmented generation - (RAG).

We cover a clear agent definition (autonomy, objectives, LLMs), - agent orchestration tools and memory/knowledge stores, planning strategies from - single-step to self-reflection, and implementation choices: prompts, SDKs, tool - wrappers, and the code‑vs‑natural‑language agent trade-offs. Ranjitha digs into - context engineering techniques (chunking, metadata, wrappers), RAG realities (latency, - cost, GIGO), and when retrieval alone suffices versus when full agents are needed. - She also maps SRE workflows to agents (logs, metrics, remediation), integration - abstractions, framework trade-offs (LangChain, OpenAI Agents SDK, Small Agents), - and evaluation strategy: custom datasets, mocking tools, regression tests, and goal‑based - outcome assertions.

Listen to learn practical guidance for building, testing, - and deploying autonomous LLM agents, and which architectures and evaluation approaches - work best for production systems.' -dateadded: '2025-10-21' -duration: PT00H59M23S -quotableClips: -- name: Event Introduction & Community Links - startOffset: 0 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=0 - endOffset: 192 -- name: 'Early ML Projects: Image Search with OpenCV' - startOffset: 192 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=192 - endOffset: 265 -- name: Speech Recognition & Language Modeling Experience - startOffset: 265 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=265 - endOffset: 297 -- name: Transition to Recommendation Systems at Dropbox - startOffset: 297 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=297 - endOffset: 352 -- name: Question Answering & Early Agent Experiments - startOffset: 352 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=352 - endOffset: 464 -- name: 'Joining Noird.ai: Automating On‑call with Agents' - startOffset: 464 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=464 - endOffset: 660 -- name: 'Agent Definition: Autonomy, Objectives & LLMs' - startOffset: 660 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=660 - endOffset: 751 -- name: 'Agent Orchestration: Tools, Memory & Knowledge Stores' - startOffset: 751 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=751 - endOffset: 910 -- name: 'Planning Strategies: Single‑step, Multi‑pass & Self‑reflection' - startOffset: 910 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=910 - endOffset: 1103 -- name: 'Implementation Approaches: Prompts, SDKs & Tool Wrappers' - startOffset: 1103 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1103 - endOffset: 1198 -- name: 'Code Agents vs Natural‑Language Agents: Trade‑offs' - startOffset: 1198 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1198 - endOffset: 1281 -- name: 'Context Engineering: Designing Effective LLM Inputs' - startOffset: 1281 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1281 - endOffset: 1370 -- name: 'SRE Workflows Modeled by Agents: Logs, Metrics & Remediation' - startOffset: 1370 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1370 - endOffset: 1499 -- name: 'Integration Abstractions: Handling Diverse Tooling' - startOffset: 1499 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1499 - endOffset: 1770 -- name: 'RAG Reality Check: Latency, Cost & Garbage‑In/Garbage‑Out' - startOffset: 1770 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1770 - endOffset: 1898 -- name: 'Retrieval Limitations: Reworking Backends for LLM Context' - startOffset: 1898 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1898 - endOffset: 1968 -- name: 'Context Engineering Techniques: Chunking, Metadata & Wrappers' - startOffset: 1968 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=1968 - endOffset: 2171 -- name: 'Agentic RAG: Using Retrieval as a Tool Within Agents' - startOffset: 2171 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2171 - endOffset: 2259 -- name: 'Use Cases: When RAG Is Enough vs When Agents Are Needed' - startOffset: 2259 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2259 - endOffset: 2430 -- name: 'Dynamic Planning Example: Calendar & Meeting Assistant' - startOffset: 2430 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2430 - endOffset: 2586 -- name: Dropbox Dash & AI Productivity Assistants for Enterprises - startOffset: 2586 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2586 - endOffset: 2648 -- name: 'Framework Choices: Build from Scratch vs Use Libraries' - startOffset: 2648 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2648 - endOffset: 2760 -- name: 'Framework Trade‑offs: LangChain, OpenAI Agents SDK, Small Agents' - startOffset: 2760 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2760 - endOffset: 2880 -- name: Agent Marketplaces & Tool Protocols (MCP) - startOffset: 2880 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=2880 - endOffset: 3077 -- name: 'Evaluation Strategy: Custom Datasets & System Benchmarks' - startOffset: 3077 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3077 - endOffset: 3200 -- name: 'Testing Agents: Mocking Tools, Integration & Regression Tests' - startOffset: 3200 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3200 - endOffset: 3362 -- name: 'Goal‑based Evaluation: Outcome Assertions Over Exact Paths' - startOffset: 3362 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3362 - endOffset: 3491 -- name: 'Specialization Challenge: Why Generic Agent Solutions Lag' - startOffset: 3491 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3491 - endOffset: 3546 -- name: Closing Thoughts & Future Outlook for Agent Engineering - startOffset: 3546 - url: https://www.youtube.com/watch?v=x2AAjqz2XmM&t=3546 - endOffset: 3563 ---- +context: 'Context: The episode traces a practitioner’s journey from early ML and language + work into building agentic systems for real-world SRE and productivity problems—covering + definitions, architectures, planning, retrieval, tooling, frameworks, testing, and + evaluation. + Core (single unifying idea): Pragmatic agent engineering: turning LLMs into reliable, + task-oriented autonomous systems by engineering around their capabilities and limits—designing + objectives, orchestration, context/retrieval, tooling integrations, planning strategies, + and rigorous evaluation so agents can safely, efficiently, and predictably perform + real operational and enterprise tasks. + + Why this unifies the episode: - Defines what an “agent” means in practice (autonomy + + objectives + LLMs) and why design choices matter. - Shows orchestration needs + (tools, memory, knowledge stores) to ground LLM reasoning in real data and actions. + - Contrasts planning styles (single-step, multi-pass, self-reflection) and implementation + tradeoffs (prompts vs SDKs, code vs natural-language agents) as engineering choices, + not academic ones. - Treats retrieval/RAG as an engineering component with latency/cost/GIGO + constraints and explores agentic RAG when RAG alone falls short. - Emphasizes integration + abstractions and framework tradeoffs for production deployment (from bespoke stacks + to marketplaces and SDKs). - Centers testing and evaluation—mocking tools, regression + tests, goal-based benchmarks—to ensure outcomes over narrative plausibility. - Highlights + specialization and domain constraints: generic agents struggle; practical value + comes from adapting agents to workflows, data, and operational requirements. + + Bottom line: The episode’s through-line is that successful agent projects are not + just about large models: they are systems engineering problems requiring explicit + choices about autonomy, grounding, tooling, planning, and measurement to deliver + dependable, useful automation.' +--- Links: * [Linkedin](https://www.linkedin.com/in/ranjitha-gurunath-kulkarni){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s16e08-ai-for-digital-health.md b/_podcast/building-ai-digital-health-startups.md similarity index 94% rename from _podcast/s16e08-ai-for-digital-health.md rename to _podcast/building-ai-digital-health-startups.md index 564dbd07..73f3ea3e 100644 --- a/_podcast/s16e08-ai-for-digital-health.md +++ b/_podcast/building-ai-digital-health-startups.md @@ -1,19 +1,143 @@ --- +title: "Building Digital Health Startups: MVP Strategy, AI Diagnosis and Telemedicine" +short: "AI for Digital Health" +season: 16 episode: 8 guests: - mariabruckert +image: images/podcast/building-ai-digital-health-startups.jpg ids: - anchor: atatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc + anchor: datatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc youtube: whpkDmVVGUE -image: images/podcast/s16e08-ai-for-digital-health.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/AI-for-Digital-Health---Maria-Bruckert-e2cejoc apple: https://podcasts.apple.com/us/podcast/ai-for-digital-health-maria-bruckert/id1541710331?i=1000637212773 spotify: https://open.spotify.com/episode/2NE0vbiYwXxOuqychHIqBR?si=QdRyuJvSRE2V3bLwHaEv-Q youtube: https://www.youtube.com/watch?v=whpkDmVVGUE -season: 16 -short: AI for Digital Health -title: 'Build & Scale a Digital Clinic: AI Skin Health, Telemedicine & AR MVP' +description: "Discover actionable digital health MVP strategy, telemedicine tactics to build, validate and monetize a scalable healthcare startup with faster remote care." +topics: +- AI +- computer vision +- data strategy +- product management +- startups +- healthcare +intro: "How do you build a digital health startup that ships a focused MVP, uses AI for diagnosis, and delivers care via telemedicine while overcoming data gaps and legacy workflows? In this episode Maria-Liisa Bruckert, Co-Founder and Co-CEO of SQIN and recipient of the Google Play Best of 2020 award and Google Female Founder Immersion 2020, walks through her transition from electrical engineering to health tech and the practical playbook she uses to de-risk product development.

We cover MVP strategy and market research tactics—cold outreach, accelerators, clinical meetings—and unconventional experiments like an AR “lipstick try-on” to collect engagement data. Maria explains how SQIN aligns AI diagnosis with concrete business cases, builds a digital clinic flow from diagnosis to prescription, and uses telemedicine for remote follow-up and prescriptions. You’ll also hear about data strategy and community bootstrapping, ethics and UX for sensitive AI messaging, go-to-market choices for regional rollout, and monetization through SaaS integrations and partnerships.

Listen for actionable insights on product-market fit, hiring priorities for AI and full-stack roles, and practical steps to launch a digital health startup that balances technical credibility with patient access." +dateadded: 2023-12-03 +duration: PT00H52M27S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=0 + endOffset: 49 +- name: 'Career Journey: From Electrical Engineering to Founding SQIN' + startOffset: 49 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=49 + endOffset: 125 +- name: 'Founder Approach: Industry Immersion, MVP Development' + startOffset: 125 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=125 + endOffset: 250 +- name: 'Why Healthcare: Digitization Opportunity in Medical Systems' + startOffset: 250 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=250 + endOffset: 307 +- name: 'Healthcare Challenges: Data Gaps, Rural Access, and Legacy Workflows' + startOffset: 307 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=307 + endOffset: 371 +- name: 'Legacy Infrastructure: Fax, Fragmentation, and Slow Adoption' + startOffset: 371 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=371 + endOffset: 493 +- name: 'Regional Perspective: Access Issues in Southern Brandenburg' + startOffset: 493 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=493 + endOffset: 740 +- name: 'Market Research Tactics: Cold Outreach, Accelerators, Clinical Meetings' + startOffset: 740 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=740 + endOffset: 775 +- name: 'AR MVP: Lipstick Try-On as a Data Collection & Engagement Tool' + startOffset: 775 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=775 + endOffset: 949 +- name: 'Discovery: Skin Health Signals Hidden in Lifestyle Interactions' + startOffset: 949 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=949 + endOffset: 1107 +- name: 'Founder Lessons: Experimentation, Pivoting, and Plan B Flexibility' + startOffset: 1107 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1107 + endOffset: 1292 +- name: 'Product-Market Fit: Aligning AI Capabilities with Business Cases' + startOffset: 1292 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1292 + endOffset: 1420 +- name: 'SQIN Product: Digital Clinic Flow from Diagnosis to Prescription' + startOffset: 1420 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1420 + endOffset: 1448 +- name: 'Ethics & UX: Sensitive AI Messaging and Inclusive Design' + startOffset: 1448 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1448 + endOffset: 1665 +- name: 'Go-to-Market Strategy: Regional Focus, Limitations, and Fallbacks' + startOffset: 1665 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1665 + endOffset: 1783 +- name: 'Data Strategy: Leveraging Community Reach to Bootstrap Datasets' + startOffset: 1783 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1783 + endOffset: 1844 +- name: 'Community Productization: Daily Lifestyle Integration & Retention' + startOffset: 1844 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1844 + endOffset: 1960 +- name: 'Audience Expansion: Reaching Multiple Genders and Demographics' + startOffset: 1960 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1960 + endOffset: 2157 +- name: 'Telemedicine Impact: Remote Follow-Up, Prescriptions, and Efficiency' + startOffset: 2157 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2157 + endOffset: 2285 +- name: 'Feedback Loops: Support Channels and User Bug Reporting' + startOffset: 2285 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2285 + endOffset: 2370 +- name: 'Personalization: Archetypes, Gamification, and Educational Content' + startOffset: 2370 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2370 + endOffset: 2492 +- name: 'Company Growth: Team Size, Hiring Needs (AI, Full-Stack, Backend)' + startOffset: 2492 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2492 + endOffset: 2624 +- name: 'Fundraising: Proving Profitability and Technical Credibility to Investors' + startOffset: 2624 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2624 + endOffset: 2768 +- name: 'Monetization: SaaS Integrations, Partnerships, and E-commerce Cuts' + startOffset: 2768 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2768 + endOffset: 2879 +- name: 'Leadership Structure: First Hires and Product vs. CEO Roles' + startOffset: 2879 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2879 + endOffset: 2914 +- name: 'Work-Life Integration: Parenting While Building a Startup' + startOffset: 2914 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2914 + endOffset: 3067 +- name: 'Cultural Upside: Entrepreneurial Mindset Passed to Children' + startOffset: 3067 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3067 + endOffset: 3138 +- name: Closing Remarks and Next Steps + startOffset: 3138 + url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3138 + endOffset: 3147 transcript: - header: Podcast Introduction - line: This week, we will talk about AI for digital healthcare. We have a special @@ -888,7 +1012,7 @@ transcript: sec: 2760 time: '46:00' who: Alexey -- header: 'Monetization: SaaS Integrations, Partnerships, and E‑commerce Cuts' +- header: 'Monetization: SaaS Integrations, Partnerships, and E-commerce Cuts' - line: We integrate our AI to different partners. We do the health checks on different points of sale. First of all, of course, in our own application, SQIN, where it’s a digital clinic – but we also have other applications and other points of sale, @@ -1045,141 +1169,23 @@ transcript: sec: 3147 time: '52:27' who: Maria -description: Discover how to build a digital clinic with AI skin health and telemedicine—learn - go-to-market, data strategy, monetization, and hiring wins. -intro: How do you build and scale a digital clinic that blends AI-driven skin health, - telemedicine, and an AR MVP? In this episode, Maria-Liisa Bruckert, Co‑Founder and - Co‑CEO of SQIN and recipient of the Google Female Founder Immersion 2020 and Google - Play Best of 2020, walks through the practical steps she took to turn an engineering - mindset into a digital health business.

We cover industry immersion and - MVP development, why healthcare digitization matters, and real operational challenges - like data gaps, rural access, and legacy workflows. Maria explains the AR lipstick - try-on as a data collection and engagement tactic, how to surface skin health signals - from everyday interactions, and aligning AI capabilities with clear business cases. - You’ll also hear about building a digital clinic flow from diagnosis to prescription, - telemedicine’s role in remote follow-up and efficiency, ethics and inclusive UX, - regional go-to-market tactics, data strategy for bootstrapping datasets, and early - hiring, fundraising, and monetization approaches.

Listeners interested - in digital clinic design, AI skin health, telemedicine implementation, or launching - an AR MVP will find actionable tactics and lessons to apply to product-market fit, - data strategy, and go-to-market execution. -dateadded: '2023-12-03' -duration: PT00H52M27S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=0 - endOffset: 49 -- name: 'Career Journey: From Electrical Engineering to Founding SQIN' - startOffset: 49 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=49 - endOffset: 125 -- name: 'Founder Approach: Industry Immersion, MVP Development' - startOffset: 125 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=125 - endOffset: 250 -- name: 'Why Healthcare: Digitization Opportunity in Medical Systems' - startOffset: 250 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=250 - endOffset: 307 -- name: 'Healthcare Challenges: Data Gaps, Rural Access, and Legacy Workflows' - startOffset: 307 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=307 - endOffset: 371 -- name: 'Legacy Infrastructure: Fax, Fragmentation, and Slow Adoption' - startOffset: 371 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=371 - endOffset: 493 -- name: 'Regional Perspective: Access Issues in Southern Brandenburg' - startOffset: 493 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=493 - endOffset: 740 -- name: 'Market Research Tactics: Cold Outreach, Accelerators, Clinical Meetings' - startOffset: 740 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=740 - endOffset: 775 -- name: 'AR MVP: Lipstick Try-On as a Data Collection & Engagement Tool' - startOffset: 775 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=775 - endOffset: 949 -- name: 'Discovery: Skin Health Signals Hidden in Lifestyle Interactions' - startOffset: 949 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=949 - endOffset: 1107 -- name: 'Founder Lessons: Experimentation, Pivoting, and Plan B Flexibility' - startOffset: 1107 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1107 - endOffset: 1292 -- name: 'Product-Market Fit: Aligning AI Capabilities with Business Cases' - startOffset: 1292 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1292 - endOffset: 1420 -- name: 'SQIN Product: Digital Clinic Flow from Diagnosis to Prescription' - startOffset: 1420 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1420 - endOffset: 1448 -- name: 'Ethics & UX: Sensitive AI Messaging and Inclusive Design' - startOffset: 1448 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1448 - endOffset: 1665 -- name: 'Go-to-Market Strategy: Regional Focus, Limitations, and Fallbacks' - startOffset: 1665 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1665 - endOffset: 1783 -- name: 'Data Strategy: Leveraging Community Reach to Bootstrap Datasets' - startOffset: 1783 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1783 - endOffset: 1844 -- name: 'Community Productization: Daily Lifestyle Integration & Retention' - startOffset: 1844 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1844 - endOffset: 1960 -- name: 'Audience Expansion: Reaching Multiple Genders and Demographics' - startOffset: 1960 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=1960 - endOffset: 2157 -- name: 'Telemedicine Impact: Remote Follow-Up, Prescriptions, and Efficiency' - startOffset: 2157 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2157 - endOffset: 2285 -- name: 'Feedback Loops: Support Channels and User Bug Reporting' - startOffset: 2285 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2285 - endOffset: 2370 -- name: 'Personalization: Archetypes, Gamification, and Educational Content' - startOffset: 2370 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2370 - endOffset: 2492 -- name: 'Company Growth: Team Size, Hiring Needs (AI, Full-Stack, Backend)' - startOffset: 2492 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2492 - endOffset: 2624 -- name: 'Fundraising: Proving Profitability and Technical Credibility to Investors' - startOffset: 2624 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2624 - endOffset: 2768 -- name: 'Monetization: SaaS Integrations, Partnerships, and E‑commerce Cuts' - startOffset: 2768 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2768 - endOffset: 2879 -- name: 'Leadership Structure: First Hires and Product vs. CEO Roles' - startOffset: 2879 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2879 - endOffset: 2914 -- name: 'Work-Life Integration: Parenting While Building a Startup' - startOffset: 2914 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=2914 - endOffset: 3067 -- name: 'Cultural Upside: Entrepreneurial Mindset Passed to Children' - startOffset: 3067 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3067 - endOffset: 3138 -- name: Closing Remarks and Next Steps - startOffset: 3138 - url: https://www.youtube.com/watch?v=whpkDmVVGUE&t=3138 - endOffset: 3147 ---- +context: 'Context: The episode traces a founder’s shift from engineering to healthcare + entrepreneurship, driven by the opportunity to digitize fragmented medical systems. + It covers pragmatic founder tactics (immersion, rapid MVPs, cold outreach), an unusual + AR MVP to collect engagement data, and a discovery that everyday lifestyle interactions + reveal skin-health signals. The conversation ties product experimentation and iterative + pivots to building a digital clinic flow (diagnosis → prescription → telemedicine), + while confronting legacy infrastructure, rural access gaps, ethical UX, and regional + go-to-market limits. Growth topics — community-first data strategies, personalization, + retention, hiring, fundraising, and monetization via SaaS/partnerships — are framed + alongside human considerations like leadership choices and work-life integration. + Core theme: Building an ethical, product-first digital healthcare startup by using + rapid experimentation and community-driven engagement to bootstrap meaningful clinical + data and align AI capabilities with real patient workflows and viable business models—solving + legacy access and workflow problems regionally, iterating from MVP to product-market + fit, and scaling sustainably while keeping human needs and ethics central.' +--- Links: * [Maria's LinkedIn profile](https://www.linkedin.com/in/mariabruckert/){:target="_blank"} diff --git a/_podcast/s07e03-product-management-essentials.md b/_podcast/building-and-scaling-ai-data-products-with-mlops.md similarity index 95% rename from _podcast/s07e03-product-management-essentials.md rename to _podcast/building-and-scaling-ai-data-products-with-mlops.md index d48b5730..cbf4d012 100644 --- a/_podcast/s07e03-product-management-essentials.md +++ b/_podcast/building-and-scaling-ai-data-products-with-mlops.md @@ -1,11 +1,11 @@ --- -title: 'Build & Scale Data Products for AI: Roadmaps, MLOps, Customer Research & Metrics' -short: Product Management Essentials for Data Professionals -guests: -- gregcoquillo -image: images/podcast/s07e03-product-management-essentials.jpg +title: "Build & Scale Data Products for AI: Roadmaps, MLOps, Customer Research & Metrics" +short: "Product Management Essentials for Data Professionals" season: 7 episode: 3 +guests: +- gregcoquillo +image: images/podcast/building-and-scaling-ai-data-products-with-mlops.jpg ids: youtube: p4wg0Vd2uD4 anchor: Product-Management-Essentials-for-Data-Professionals---Greg-Coquillo-e1dr8g5 @@ -14,6 +14,100 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Product-Management-Essentials-for-Data-Professionals---Greg-Coquillo-e1dr8g5 spotify: https://open.spotify.com/episode/1Oh6ewUJ2c1jiVcKxWIwDU apple: https://podcasts.apple.com/us/podcast/product-management-essentials-for-data-professionals/id1541710331?i=1000550093434 + +description: "Build scalable data products with MLOps roadmaps, customer research and metric-driven templates - prioritize impact, reduce failures, and measure success" +topics: +- product management +- MLOps +- data engineering +- data science +intro: "How do you move from proofs-of-concept to scalable AI data products that deliver measurable business value? In this episode, Greg Coquillo, a Technology Manager at Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, walks through practical approaches for building and scaling data products, MLOps, customer research, and metrics.

We cover Greg’s transition into AI product work and the role of data product managers (internal vs. external), then dive into customer journey mapping, domain knowledge, and structured customer research—interview techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how to work backwards from business problems, contribute technical input to roadmaps with T-shirt sizing, and prioritize MLOps by spotting unscalable manual processes. Greg outlines three-year roadmap thinking (impact, effort, cost), a pragmatic Excel template (problems → solutions → metrics), and SMART and operational metrics like pipeline failures, SLAs, and data quality. He also addresses operating without a PM, aligning team mental models, and on-the-job product skill development.

Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer research, and defining success metrics for AI-driven data products" +dateadded: 2022-02-06 + +duration: PT00H59M41S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=0 + endOffset: 103 +- name: Career Background & Transition to AI Products + startOffset: 103 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=103 + endOffset: 401 +- name: Role & Responsibilities of Data Product Managers (Internal vs External) + startOffset: 401 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=401 + endOffset: 843 +- name: Customer Journey & Domain Knowledge for Data Professionals + startOffset: 843 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=843 + endOffset: 1081 +- name: 'Customer Research Techniques: Interviews & Documentation' + startOffset: 1081 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1081 + endOffset: 1228 +- name: Structuring Interviews with Business Partners & the Five Whys + startOffset: 1228 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1228 + endOffset: 1400 +- name: Hypothesis Testing & Working Backwards from Business Problems + startOffset: 1400 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1400 + endOffset: 1585 +- name: Product Sense & Product Mindset Explained + startOffset: 1585 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1585 + endOffset: 1733 +- name: 'Contributing to Roadmaps: Technical Input & T-Shirt Sizing' + startOffset: 1733 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1733 + endOffset: 1905 +- name: 'Working Backwards: Problem-First Feature Design' + startOffset: 1905 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1905 + endOffset: 2134 +- name: 'Roadmap Types: Technical Roadmaps, MLOps & Scaling Strategies' + startOffset: 2134 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2134 + endOffset: 2341 +- name: Identifying Unscalable Manual Processes & ML Ops Prioritization + startOffset: 2341 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2341 + endOffset: 2504 +- name: 'Three-Year Roadmap: Prioritization by Impact, Effort & Cost' + startOffset: 2504 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2504 + endOffset: 2838 +- name: 'Practical Roadmap Template in Excel: Problems → Solutions → Metrics' + startOffset: 2838 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2838 + endOffset: 3071 +- name: Success Metrics & SMART Goals for Internal Data Platforms + startOffset: 3071 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3071 + endOffset: 3207 +- name: 'Operational Metrics: Pipeline Failures, SLAs & Data Quality' + startOffset: 3207 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3207 + endOffset: 3332 +- name: 'Operating Without a PM: Identify Customers & Validate Work' + startOffset: 3332 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3332 + endOffset: 3464 +- name: Team Autonomy & Aligning Mental Models for Product Success + startOffset: 3464 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3464 + endOffset: 3522 +- name: 'Career Advice: Learn Product Skills on the Job & Follow-up Resources' + startOffset: 3522 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3522 + endOffset: 3647 +- name: Episode Close & How to Connect with Guest + startOffset: 3647 + url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3647 + endOffset: 3581 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we'll talk about learning product management. We have a special @@ -397,7 +491,7 @@ transcript: sec: 1685 time: '28:05' who: Alexey -- header: 'Contributing to Roadmaps: Technical Input & T‑Shirt Sizing' +- header: 'Contributing to Roadmaps: Technical Input & T-Shirt Sizing' - line: If you're a data professional, you're on the business team as a data analyst, a business analyst, or you're on the tech side, even an ML engineer or a data engineer, etc. Since product roadmaps are led by product managers, the best practice @@ -437,7 +531,7 @@ transcript: sec: 1881 time: '31:21' who: Greg -- header: 'Working Backwards: Problem‑First Feature Design' +- header: 'Working Backwards: Problem-First Feature Design' - line: I guess another thing – I'm not sure if we talked about this – but you mentioned it a few times that you should start with an end in mind. I guess this is something that we can also do. Let's say, when we discuss any feature, we can say “Let's @@ -576,7 +670,7 @@ transcript: sec: 2424 time: '40:24' who: Greg -- header: 'Three‑Year Roadmap: Prioritization by Impact, Effort & Cost' +- header: 'Three-Year Roadmap: Prioritization by Impact, Effort & Cost' - line: I think you said “driving the roadmap” multiple times. Let's say I work in a team that has a product manager, and a bunch of other people. So then “driving the roadmap” of this team means taking active part in discussions when we talk @@ -833,7 +927,7 @@ transcript: sec: 3495 time: '58:15' who: Greg -- header: 'Career Advice: Learn Product Skills on the Job & Follow‑up Resources' +- header: 'Career Advice: Learn Product Skills on the Job & Follow-up Resources' - line: Yeah. Okay. I think we should be wrapping up. Do you want to say anything before we finish? sec: 3522 @@ -867,107 +961,6 @@ transcript: sec: 3647 time: '1:00:47' who: Alexey -description: Build scalable data products with MLOps roadmaps, customer research and - metric-driven templates - prioritize impact, reduce failures, and measure success. -intro: How do you move from proofs-of-concept to scalable AI data products that deliver - measurable business value? In this episode, Greg Coquillo, a Technology Manager at - Amazon who builds AI roadmaps for Private Brands’ product safety and compliance, - walks through practical approaches for building and scaling data products, MLOps, - customer research, and metrics.

We cover Greg’s transition into AI product - work and the role of data product managers (internal vs. external), then dive into - customer journey mapping, domain knowledge, and structured customer research—interview - techniques, documentation, the Five Whys, and hypothesis testing. You’ll hear how - to work backwards from business problems, contribute technical input to roadmaps - with T‑shirt sizing, and prioritize MLOps by spotting unscalable manual processes. - Greg outlines three‑year roadmap thinking (impact, effort, cost), a pragmatic Excel - template (problems → solutions → metrics), and SMART and operational metrics like - pipeline failures, SLAs, and data quality. He also addresses operating without a - PM, aligning team mental models, and on‑the‑job product skill development.

- Listen to learn actionable methods for roadmap planning, MLOps prioritization, customer - research, and defining success metrics for AI-driven data products. -dateadded: '2022-02-06' -duration: PT00H59M41S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=0 - endOffset: 103 -- name: Career Background & Transition to AI Products - startOffset: 103 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=103 - endOffset: 401 -- name: Role & Responsibilities of Data Product Managers (Internal vs External) - startOffset: 401 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=401 - endOffset: 843 -- name: Customer Journey & Domain Knowledge for Data Professionals - startOffset: 843 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=843 - endOffset: 1081 -- name: 'Customer Research Techniques: Interviews & Documentation' - startOffset: 1081 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1081 - endOffset: 1228 -- name: Structuring Interviews with Business Partners & the Five Whys - startOffset: 1228 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1228 - endOffset: 1400 -- name: Hypothesis Testing & Working Backwards from Business Problems - startOffset: 1400 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1400 - endOffset: 1585 -- name: Product Sense & Product Mindset Explained - startOffset: 1585 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1585 - endOffset: 1733 -- name: 'Contributing to Roadmaps: Technical Input & T‑Shirt Sizing' - startOffset: 1733 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1733 - endOffset: 1905 -- name: 'Working Backwards: Problem‑First Feature Design' - startOffset: 1905 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=1905 - endOffset: 2134 -- name: 'Roadmap Types: Technical Roadmaps, MLOps & Scaling Strategies' - startOffset: 2134 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2134 - endOffset: 2341 -- name: Identifying Unscalable Manual Processes & ML Ops Prioritization - startOffset: 2341 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2341 - endOffset: 2504 -- name: 'Three‑Year Roadmap: Prioritization by Impact, Effort & Cost' - startOffset: 2504 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2504 - endOffset: 2838 -- name: 'Practical Roadmap Template in Excel: Problems → Solutions → Metrics' - startOffset: 2838 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=2838 - endOffset: 3071 -- name: Success Metrics & SMART Goals for Internal Data Platforms - startOffset: 3071 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3071 - endOffset: 3207 -- name: 'Operational Metrics: Pipeline Failures, SLAs & Data Quality' - startOffset: 3207 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3207 - endOffset: 3332 -- name: 'Operating Without a PM: Identify Customers & Validate Work' - startOffset: 3332 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3332 - endOffset: 3464 -- name: Team Autonomy & Aligning Mental Models for Product Success - startOffset: 3464 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3464 - endOffset: 3522 -- name: 'Career Advice: Learn Product Skills on the Job & Follow‑up Resources' - startOffset: 3522 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3522 - endOffset: 3647 -- name: Episode Close & How to Connect with Guest - startOffset: 3647 - url: https://www.youtube.com/watch?v=p4wg0Vd2uD4&t=3647 - endOffset: 3581 --- Links: diff --git a/_podcast/s15e09-data-engineering-for-fraud-prevention.md b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md similarity index 97% rename from _podcast/s15e09-data-engineering-for-fraud-prevention.md rename to _podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md index 9c00ddce..f645c285 100644 --- a/_podcast/s15e09-data-engineering-for-fraud-prevention.md +++ b/_podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.md @@ -1,20 +1,154 @@ --- +title: "Build and Scale Data Engineering Systems for Fraud Detection: Feature Pipelines, Real-Time Inference, Graph Databases & Production Debugging" +short: "Data Engineering for Fraud Prevention" +season: 15 episode: 9 guests: - angelaramirez +image: images/podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.jpg ids: - anchor: atatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab + anchor: datatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab youtube: ZXNKjrrKU_I -image: images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Data-Engineering-for-Fraud-Prevention---Angela-Ramirez-e29rkab apple: https://podcasts.apple.com/us/podcast/data-engineering-for-fraud-prevention-angela-ramirez/id1541710331?i=1000630468398 spotify: https://open.spotify.com/episode/4wpYwS8XTlNdws39Zynakf?si=OFAHIkVsQlKvdTnlFNaLGg youtube: https://www.youtube.com/watch?v=ZXNKjrrKU_I -season: 15 -short: Data Engineering for Fraud Prevention -title: 'Retail Fraud Detection with Data Engineering: Real-Time Scoring, Graphs & - MLOps' + +description: "Learn retail fraud detection with real-time scoring and MLOps: build data pipelines, graph investigations, and instant cashier decisions to cut losses." +topics: +- data engineering +- MLOps +- fraud detection +- graph databases +- software engineering +intro: "How do you build data infrastructure that stops stolen-card transactions and return abuse in real time? In this episode, Angela Ramirez, a Sam’s Club data engineer who moved from Sephora and specializes in machine learning for fraud prevention, walks through the engineering behind retail fraud detection. Drawing on her background in NLP and four years as a data engineer, Angela explains pipelines, feature engineering workflows that combine daily batches with real-time scoring, and the MLOps responsibilities for model metrics, deployment, and monitoring.

We cover system design best practices—stakeholder alignment, timing, documentation—and data modeling tradeoffs across relational, document (Elasticsearch), and graph databases (SPARQL, Neo4j) to support network features connecting members, transactions, and products. Angela also discusses hybrid architectures for instant inference, tooling like PySpark, Pandas/PyArrow, Cassandra, GCP/Dataproc, and data quality practices (Great Expectations), plus operational debugging and scaling patterns. Listen to learn practical approaches to real-time scoring, graph-powered investigations, and the engineering decisions that make retail fraud detection reliable and actionable" +dateadded: 2023-10-07 + +duration: PT00H59M19S + +quotableClips: +- name: Podcast Introduction & Guest Overview (Angela Ramirez) + startOffset: 0 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=0 + endOffset: 161 +- name: 'Career Journey: Sephora to Sam''s Club' + startOffset: 161 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=161 + endOffset: 225 +- name: 'Fraud Detection in Retail: Stolen Cards & Return Abuse' + startOffset: 225 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=225 + endOffset: 382 +- name: 'Data Engineering for Fraud: Pipelines, Features, Dashboards' + startOffset: 382 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=382 + endOffset: 504 +- name: 'Feature Engineering Workflow: Daily Batches + Real-Time Scoring' + startOffset: 504 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=504 + endOffset: 588 +- name: 'MLOps Responsibilities: Model Metrics, Deployment, Monitoring' + startOffset: 588 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=588 + endOffset: 679 +- name: 'Team Structure: Data Engineers, ML Engineers, Data Scientists' + startOffset: 679 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=679 + endOffset: 768 +- name: 'Academic Background: Cognitive Science, NLP, HCI' + startOffset: 768 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=768 + endOffset: 854 +- name: 'Data-Centric Mindset: Why Data Engineering Powers ML' + startOffset: 854 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=854 + endOffset: 962 +- name: 'Career Transition: Process Improvement → Data Analyst → Data Engineer' + startOffset: 962 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=962 + endOffset: 1155 +- name: 'System Design Best Practices: Stakeholders, Timing, Documentation' + startOffset: 1155 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1155 + endOffset: 1230 +- name: 'Data Modeling Decisions: Relational vs Document vs Graph' + startOffset: 1230 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1230 + endOffset: 1290 +- name: Elasticsearch & Document Indexing for Entity Data + startOffset: 1290 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1290 + endOffset: 1384 +- name: 'Graph Databases & SPARQL: Wikidata and Entity Relationships' + startOffset: 1384 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1384 + endOffset: 1755 +- name: 'Network Features for Fraud: Members, Transactions, Products' + startOffset: 1755 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1755 + endOffset: 2014 +- name: 'Real-Time Decisioning: Front-End Signals for Cashiers & Security' + startOffset: 2014 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2014 + endOffset: 2086 +- name: 'Hybrid Architecture: Batch Computation with Instant Inference' + startOffset: 2086 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2086 + endOffset: 2133 +- name: 'Database Selection Criteria: Static Schema vs Dynamic Data' + startOffset: 2133 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2133 + endOffset: 2291 +- name: 'Graph Visualization for Investigations: Neo4j Use Cases' + startOffset: 2291 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2291 + endOffset: 2450 +- name: 'Software Engineering for Data Engineers: Testing & Code Quality (PySpark)' + startOffset: 2450 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2450 + endOffset: 2608 +- name: 'Data Quality Tooling: Great Expectations and Cloud Monitoring' + startOffset: 2608 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2608 + endOffset: 2681 +- name: 'Operational Challenges: Job Failures, Schema Changes, Scaling' + startOffset: 2681 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2681 + endOffset: 2901 +- name: 'Debugging Playbook: Logs, Runbooks, and Error Documentation' + startOffset: 2901 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2901 + endOffset: 3023 +- name: 'Tech Stack Overview: GCP, Dataproc/Databricks, PySpark, Cassandra' + startOffset: 3023 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3023 + endOffset: 3083 +- name: 'Managed vs Serverless Spark: Dataproc, EMR, Serverless Execution' + startOffset: 3083 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3083 + endOffset: 3198 +- name: 'Pandas & PyArrow: Performance Improvements for Big Data' + startOffset: 3198 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3198 + endOffset: 3297 +- name: 'Cassandra Use Cases: Scalability, Fault Tolerance, Clusters' + startOffset: 3297 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3297 + endOffset: 3379 +- name: 'External Data Integration: APIs, Data Contracts, Stability' + startOffset: 3379 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3379 + endOffset: 3600 +- name: 'Recommended Resources: Designing Data-Intensive Applications, PySpark, SQL' + startOffset: 3600 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3600 + endOffset: 3675 +- name: Episode Wrap-Up & Contact Links + startOffset: 3675 + url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3675 + endOffset: 3559 + transcript: - header: Podcast Introduction & Guest Overview (Angela Ramirez) - line: This week, we will talk about data engineering and fraud detection. We have @@ -1151,146 +1285,6 @@ transcript: sec: 3675 time: '1:01:15' who: Alexey -description: 'Learn retail fraud detection with real-time scoring and MLOps: build - data pipelines, graph investigations, and instant cashier decisions to cut losses.' -intro: How do you build data infrastructure that stops stolen-card transactions and - return abuse in real time? In this episode, Angela Ramirez, a Sam’s Club data engineer - who moved from Sephora and specializes in machine learning for fraud prevention, - walks through the engineering behind retail fraud detection. Drawing on her background - in NLP and four years as a data engineer, Angela explains pipelines, feature engineering - workflows that combine daily batches with real-time scoring, and the MLOps responsibilities - for model metrics, deployment, and monitoring.

We cover system design best - practices—stakeholder alignment, timing, documentation—and data modeling tradeoffs - across relational, document (Elasticsearch), and graph databases (SPARQL, Neo4j) - to support network features connecting members, transactions, and products. Angela - also discusses hybrid architectures for instant inference, tooling like PySpark, - Pandas/PyArrow, Cassandra, GCP/Dataproc, and data quality practices (Great Expectations), - plus operational debugging and scaling patterns. Listen to learn practical approaches - to real-time scoring, graph-powered investigations, and the engineering decisions - that make retail fraud detection reliable and actionable. -dateadded: '2023-10-07' -duration: PT00H59M19S -quotableClips: -- name: Podcast Introduction & Guest Overview (Angela Ramirez) - startOffset: 0 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=0 - endOffset: 161 -- name: 'Career Journey: Sephora to Sam''s Club' - startOffset: 161 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=161 - endOffset: 225 -- name: 'Fraud Detection in Retail: Stolen Cards & Return Abuse' - startOffset: 225 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=225 - endOffset: 382 -- name: 'Data Engineering for Fraud: Pipelines, Features, Dashboards' - startOffset: 382 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=382 - endOffset: 504 -- name: 'Feature Engineering Workflow: Daily Batches + Real-Time Scoring' - startOffset: 504 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=504 - endOffset: 588 -- name: 'MLOps Responsibilities: Model Metrics, Deployment, Monitoring' - startOffset: 588 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=588 - endOffset: 679 -- name: 'Team Structure: Data Engineers, ML Engineers, Data Scientists' - startOffset: 679 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=679 - endOffset: 768 -- name: 'Academic Background: Cognitive Science, NLP, HCI' - startOffset: 768 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=768 - endOffset: 854 -- name: 'Data-Centric Mindset: Why Data Engineering Powers ML' - startOffset: 854 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=854 - endOffset: 962 -- name: 'Career Transition: Process Improvement → Data Analyst → Data Engineer' - startOffset: 962 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=962 - endOffset: 1155 -- name: 'System Design Best Practices: Stakeholders, Timing, Documentation' - startOffset: 1155 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1155 - endOffset: 1230 -- name: 'Data Modeling Decisions: Relational vs Document vs Graph' - startOffset: 1230 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1230 - endOffset: 1290 -- name: Elasticsearch & Document Indexing for Entity Data - startOffset: 1290 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1290 - endOffset: 1384 -- name: 'Graph Databases & SPARQL: Wikidata and Entity Relationships' - startOffset: 1384 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1384 - endOffset: 1755 -- name: 'Network Features for Fraud: Members, Transactions, Products' - startOffset: 1755 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=1755 - endOffset: 2014 -- name: 'Real-Time Decisioning: Front-End Signals for Cashiers & Security' - startOffset: 2014 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2014 - endOffset: 2086 -- name: 'Hybrid Architecture: Batch Computation with Instant Inference' - startOffset: 2086 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2086 - endOffset: 2133 -- name: 'Database Selection Criteria: Static Schema vs Dynamic Data' - startOffset: 2133 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2133 - endOffset: 2291 -- name: 'Graph Visualization for Investigations: Neo4j Use Cases' - startOffset: 2291 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2291 - endOffset: 2450 -- name: 'Software Engineering for Data Engineers: Testing & Code Quality (PySpark)' - startOffset: 2450 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2450 - endOffset: 2608 -- name: 'Data Quality Tooling: Great Expectations and Cloud Monitoring' - startOffset: 2608 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2608 - endOffset: 2681 -- name: 'Operational Challenges: Job Failures, Schema Changes, Scaling' - startOffset: 2681 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2681 - endOffset: 2901 -- name: 'Debugging Playbook: Logs, Runbooks, and Error Documentation' - startOffset: 2901 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=2901 - endOffset: 3023 -- name: 'Tech Stack Overview: GCP, Dataproc/Databricks, PySpark, Cassandra' - startOffset: 3023 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3023 - endOffset: 3083 -- name: 'Managed vs Serverless Spark: Dataproc, EMR, Serverless Execution' - startOffset: 3083 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3083 - endOffset: 3198 -- name: 'Pandas & PyArrow: Performance Improvements for Big Data' - startOffset: 3198 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3198 - endOffset: 3297 -- name: 'Cassandra Use Cases: Scalability, Fault Tolerance, Clusters' - startOffset: 3297 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3297 - endOffset: 3379 -- name: 'External Data Integration: APIs, Data Contracts, Stability' - startOffset: 3379 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3379 - endOffset: 3600 -- name: 'Recommended Resources: Designing Data-Intensive Applications, PySpark, SQL' - startOffset: 3600 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3600 - endOffset: 3675 -- name: Episode Wrap-Up & Contact Links - startOffset: 3675 - url: https://www.youtube.com/watch?v=ZXNKjrrKU_I&t=3675 - endOffset: 3559 --- Links: diff --git a/_podcast/s11e05-building-data-science-practice.md b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md similarity index 96% rename from _podcast/s11e05-building-data-science-practice.md rename to _podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md index 14edcd69..2041abfd 100644 --- a/_podcast/s11e05-building-data-science-practice.md +++ b/_podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.md @@ -1,20 +1,118 @@ --- +title: "Building and Scaling Data Science Practice in Industrial Enterprises: AI Adoption, MLOps Maturity & Career Growth" +short: "Building and Scaling Data Science Practice in Industrial Enterprises" +season: 11 episode: 5 guests: - andreyshtylenko +image: images/podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.jpg ids: anchor: Building-Data-Science-Practice---Andrey-Shtylenko-e1q2ka6 youtube: XbDQv8FTA4U -image: images/podcast/s11e05-building-data-science-practice.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Building-Data-Science-Practice---Andrey-Shtylenko-e1q2ka6 apple: https://podcasts.apple.com/us/podcast/building-data-science-practice-andrey-shtylenko/id1541710331?i=1000585100407 spotify: https://open.spotify.com/episode/0M7Y77MFToxtKuyfdF5W22?si=jgWR6EchQnWe6nYWW44ZxQ youtube: https://www.youtube.com/watch?v=XbDQv8FTA4U -season: 11 -short: Building Data Science Practice -title: 'Scale Industrial AI: MLOps, Sensorization, POC Strategy & Hub‑and‑Spoke Data - Teams' + +description: "Discover Industrial AI tactics, MLOps & sensorization to scale projects: hub-and-spoke data teams, proving value, tooling and career steps to productionize ML." +intro: "How do industrial enterprises move from pilots to production-ready AI—and what team structures, MLOps practices, and career moves make that possible? In this episode Andrey Shtylenko, Director of Engineering at Honeywell and leader of its Advanced Technology Group and AI practice, walks through practical approaches for building and scaling data science teams in industrial enterprises. Drawing on Honeywell use cases—smart sensors, computer vision, and robotics—Andrey explains the data and machine learning practices that enable AI adoption, the role of sensorization and cloud processing, and the common challenges traditional industrial companies face.

You’ll hear a concrete data practice maturity model (crawl → walk → run), POC strategy recommendations for proving value with end-to-end projects, and trade-offs between centralized, embedded, and hybrid hub-and-spoke team models. We cover MLOps standardization, shared services (experiment tracking, annotation, procurement), reporting-line impacts (CTO vs CIO vs CEO), and career guidance for engineers pivoting into data science or production ML roles. Listen to gain frameworks and actionable insights to structure teams, mature MLOps, and grow careers within industrial AI initiatives." +topics: +- data science +- industrial AI +- ai adoption +- ai +- MLOps +dateadded: 2022-11-05 + +duration: PT01H49S + +quotableClips: +- name: Introduction & Live Chat Poll Results + startOffset: 0 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=0 + endOffset: 149 +- name: 'Guest Introduction: Andrey Shtylenko, Honeywell' + startOffset: 149 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=149 + endOffset: 196 +- name: 'Career Journey: Startups, Organizational Development, and Honeywell' + startOffset: 196 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=196 + endOffset: 534 +- name: 'Honeywell Use Cases: Smart Sensors, Computer Vision, and Robotics' + startOffset: 534 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=534 + endOffset: 682 +- name: Defining Organizational Data and Machine Learning Practices + startOffset: 682 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=682 + endOffset: 826 +- name: Challenges of AI Adoption in Traditional Industrial Companies + startOffset: 826 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=826 + endOffset: 942 +- name: Sensorization and Cloud Processing to Enable Advanced Models + startOffset: 942 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=942 + endOffset: 1146 +- name: 'Reporting Line Impact: CTO vs CIO vs CMO vs CEO' + startOffset: 1146 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1146 + endOffset: 1466 +- name: 'Data Practice Maturity Model: Crawl → Walk → Run' + startOffset: 1466 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1466 + endOffset: 1920 +- name: 'POC Strategy: Single End-to-End Project to Prove Value' + startOffset: 1920 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1920 + endOffset: 2306 +- name: 'Centralized Team: Roles, Tooling, and MLOps Standardization' + startOffset: 2306 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2306 + endOffset: 2619 +- name: 'Transition Risks: Centralized vs Decentralized Approaches' + startOffset: 2619 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2619 + endOffset: 2764 +- name: 'Embedded Teams: Reporting Structure, Ownership, and Trust' + startOffset: 2764 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2764 + endOffset: 2893 +- name: 'Hybrid Hub-and-Spoke Model: Balancing Autonomy and Standards' + startOffset: 2893 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2893 + endOffset: 3014 +- name: 'Shared Services: Experiment Tracking, Annotation, and Procurement' + startOffset: 3014 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3014 + endOffset: 3107 +- name: Recommended Reading and Resources for Building Data Teams + startOffset: 3107 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3107 + endOffset: 3159 +- name: 'Career Pivot: From Software Engineer to Data Scientist Internally' + startOffset: 3159 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3159 + endOffset: 3307 +- name: Timing and Strategies for Internal Role Transitions + startOffset: 3307 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3307 + endOffset: 3404 +- name: 'Research vs Production: ML Engineers and Productionizing Models' + startOffset: 3404 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3404 + endOffset: 3584 +- name: 'Career Advice: Expanding Scope to Increase Organizational Impact' + startOffset: 3584 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3584 + endOffset: 3693 +- name: 'Connect with Andrey: LinkedIn and Follow-up Resources' + startOffset: 3693 + url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3693 + endOffset: 3649 + transcript: - header: Introduction & Live Chat Poll Results - line: You might notice that there is a poll right now in the live chat, which asks @@ -955,111 +1053,6 @@ transcript: sec: 3702 time: '1:01:42' who: Alexey -description: 'Discover Industrial AI tactics, MLOps & sensorization to scale projects: - hub-and-spoke data teams, proving value, tooling and career steps to productionize - ML.' -intro: How do you move industrial AI from pilots to production at scale while keeping - MLOps, sensorization, and organizational design aligned? In this episode, Andrey - Shtylenko, Director of Engineering at Honeywell and head of the Advanced Technology - Group, walks through practical approaches to scaling industrial AI across healthcare, - industrial, and logistics verticals. Drawing on Honeywell use cases in smart sensors, - computer vision, and robotics, Andrey outlines the core challenges of AI adoption - in traditional industrial companies and the technical levers—sensorization, cloud - processing, and model productionization—that enable advanced models.

We - cover a data practice maturity model (crawl → walk → run), a focused POC strategy - using a single end‑to‑end project to prove value, and how to standardize MLOps through - centralized tooling and shared services like experiment tracking, annotation, and - procurement. Andrey also discusses organizational tradeoffs—centralized, embedded, - and hybrid hub‑and‑spoke data teams—and the reporting-line impacts on velocity and - trust. Listeners will get concrete guidance on building data teams, deploying MLOps, - and transitioning research into production-ready machine learning systems. -dateadded: '2022-11-05' -duration: PT01H49S -quotableClips: -- name: Introduction & Live Chat Poll Results - startOffset: 0 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=0 - endOffset: 149 -- name: 'Guest Introduction: Andrey Shtylenko, Honeywell' - startOffset: 149 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=149 - endOffset: 196 -- name: 'Career Journey: Startups, Organizational Development, and Honeywell' - startOffset: 196 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=196 - endOffset: 534 -- name: 'Honeywell Use Cases: Smart Sensors, Computer Vision, and Robotics' - startOffset: 534 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=534 - endOffset: 682 -- name: Defining Organizational Data and Machine Learning Practices - startOffset: 682 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=682 - endOffset: 826 -- name: Challenges of AI Adoption in Traditional Industrial Companies - startOffset: 826 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=826 - endOffset: 942 -- name: Sensorization and Cloud Processing to Enable Advanced Models - startOffset: 942 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=942 - endOffset: 1146 -- name: 'Reporting Line Impact: CTO vs CIO vs CMO vs CEO' - startOffset: 1146 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1146 - endOffset: 1466 -- name: 'Data Practice Maturity Model: Crawl → Walk → Run' - startOffset: 1466 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1466 - endOffset: 1920 -- name: 'POC Strategy: Single End-to-End Project to Prove Value' - startOffset: 1920 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=1920 - endOffset: 2306 -- name: 'Centralized Team: Roles, Tooling, and MLOps Standardization' - startOffset: 2306 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2306 - endOffset: 2619 -- name: 'Transition Risks: Centralized vs Decentralized Approaches' - startOffset: 2619 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2619 - endOffset: 2764 -- name: 'Embedded Teams: Reporting Structure, Ownership, and Trust' - startOffset: 2764 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2764 - endOffset: 2893 -- name: 'Hybrid Hub-and-Spoke Model: Balancing Autonomy and Standards' - startOffset: 2893 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=2893 - endOffset: 3014 -- name: 'Shared Services: Experiment Tracking, Annotation, and Procurement' - startOffset: 3014 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3014 - endOffset: 3107 -- name: Recommended Reading and Resources for Building Data Teams - startOffset: 3107 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3107 - endOffset: 3159 -- name: 'Career Pivot: From Software Engineer to Data Scientist Internally' - startOffset: 3159 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3159 - endOffset: 3307 -- name: Timing and Strategies for Internal Role Transitions - startOffset: 3307 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3307 - endOffset: 3404 -- name: 'Research vs Production: ML Engineers and Productionizing Models' - startOffset: 3404 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3404 - endOffset: 3584 -- name: 'Career Advice: Expanding Scope to Increase Organizational Impact' - startOffset: 3584 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3584 - endOffset: 3693 -- name: 'Connect with Andrey: LinkedIn and Follow-up Resources' - startOffset: 3693 - url: https://www.youtube.com/watch?v=XbDQv8FTA4U&t=3693 - endOffset: 3649 --- Links: diff --git a/_podcast/s05e06-building-and-leading-data-teams.md b/_podcast/building-and-scaling-data-team.md similarity index 96% rename from _podcast/s05e06-building-and-leading-data-teams.md rename to _podcast/building-and-scaling-data-team.md index 7606c39b..09f2158e 100644 --- a/_podcast/s05e06-building-and-leading-data-teams.md +++ b/_podcast/building-and-scaling-data-team.md @@ -1,12 +1,11 @@ --- -title: 'How to Build & Scale a Data Team: Hiring, Production ML, Forecasting & Driving - Adoption' -short: Building and Leading Data Teams -guests: -- tammyliang -image: images/podcast/s05e06-building-and-leading-data-teams.jpg +title: "How to Build & Scale a Data Team: Hiring, Production ML, Forecasting & Driving Adoption" +short: "Building and Leading Data Teams" season: 5 episode: 6 +guests: +- tammyliang +image: images/podcast/building-and-scaling-data-team.jpg ids: youtube: kI4V2iBbaH0 anchor: Building-and-Leading-Data-Teams---Tammy-Liang-e18efdl @@ -15,6 +14,138 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-and-Leading-Data-Teams---Tammy-Liang-e18efdl spotify: https://open.spotify.com/episode/3hlzKwORlOsCPKrawuW4YQ apple: https://podcasts.apple.com/us/podcast/building-and-leading-data-teams-tammy-liang/id1541710331?i=1000537994433 + +description: "Learn to build a scalable data team: hiring, production ML delivery, demand forecasting and driving adoption—practical staffing, stack, and governance tips." +intro: "How do you build and scale a data team that moves beyond dashboards to production ML, reliable forecasting, and real adoption across the business? In this episode Tammy Liang, Chief of Data at Platanomelón and co-host of Data for Future, walks through her journey building data capabilities for marketing, e-commerce, and operations at a mission-driven consumer brand.

Tammy breaks down practical hiring decisions—why she hired an analyst first, then a data engineer, and why early senior hires matter—plus the tradeoffs between analyst, engineer, and business-facing roles. She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, Notion) to enable forecasting and production ML, and describes common model delivery challenges moving work out of notebooks. The conversation also covers demand forecasting, time-series and basic machine learning skills, data accuracy and governance, dbt tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building trust.

Listen to learn concrete steps for hiring a data team, setting up a data warehouse for forecasting, delivering models to production, and creating data products that stakeholders actually use" +topics: +- team building +- data teams +- data engineering +- data analytics +- leadership +dateadded: 2021-10-09 + +duration: PT00H59M10S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=0 + endOffset: 74 +- name: 'Guest Background: Tammy Liang’s career path into data' + startOffset: 74 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=74 + endOffset: 247 +- name: 'Chief of Data Responsibilities: Marketing, e-commerce, and operations' + startOffset: 247 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=247 + endOffset: 404 +- name: 'Data Challenges for Sensitive Products: Social media restrictions & creative + tracking' + startOffset: 404 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=404 + endOffset: 442 +- name: 'First Project: Business health monitoring and dashboards' + startOffset: 442 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=442 + endOffset: 531 +- name: 'Cross-team Collaboration: Streamlining reporting and building trust' + startOffset: 531 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=531 + endOffset: 606 +- name: 'Handling Resistance: Spreadsheet culture and adoption hurdles' + startOffset: 606 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=606 + endOffset: 720 +- name: Scaling from Dashboards to Predictive Projects + startOffset: 720 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=720 + endOffset: 883 +- name: 'Model Delivery Challenges: From notebooks to production' + startOffset: 883 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=883 + endOffset: 904 +- name: 'Hiring Progression: First analyst then data engineer' + startOffset: 904 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=904 + endOffset: 1031 +- name: Building a Data Warehouse to Enable Forecasting + startOffset: 1031 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1031 + endOffset: 1121 +- name: 'Business-Facing Role: Hiring for adoption and communication' + startOffset: 1121 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1121 + endOffset: 1352 +- name: 'Data Stack Overview: Stitch, GCP, dbt, Data Studio, and Notion wiki' + startOffset: 1352 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1352 + endOffset: 1391 +- name: 'Rethinking Hiring Order: Importance of senior hires early' + startOffset: 1391 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1391 + endOffset: 1586 +- name: 'Prioritizing Roles: Analyst, engineer, and business analyst tradeoffs' + startOffset: 1586 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1586 + endOffset: 1760 +- name: 'Demand Forecasting: Data provision, stakeholder input, and iteration' + startOffset: 1760 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1760 + endOffset: 1857 +- name: 'Analyst Skills: Time series and basic machine learning as advantages' + startOffset: 1857 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1857 + endOffset: 1989 +- name: 'First-Hire Qualities: Business alignment and leadership mindset' + startOffset: 1989 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1989 + endOffset: 2138 +- name: 'Data Accuracy & Governance: Errors, playbook, and rebuilding trust' + startOffset: 2138 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2138 + endOffset: 2409 +- name: 'Data Testing & Monitoring: dbt tests and regular dashboard checks' + startOffset: 2409 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2409 + endOffset: 2502 +- name: 'Timely Insights: Operational visibility and campaign monitoring' + startOffset: 2502 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2502 + endOffset: 2739 +- name: 'Offline Attribution: Surveys, community sampling, and measuring TV/banners' + startOffset: 2739 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2739 + endOffset: 2828 +- name: 'Useful Data Products: Product mindset and business alignment' + startOffset: 2828 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2828 + endOffset: 2940 +- name: 'Driving Adoption: Workshops, Q&A sessions, and building data culture' + startOffset: 2940 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2940 + endOffset: 3052 +- name: 'Leadership Approach: Delegation, ownership, and team empowerment' + startOffset: 3052 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3052 + endOffset: 3159 +- name: 'Resources for New Data Leaders: Communities, courses, and mentors' + startOffset: 3159 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3159 + endOffset: 3249 +- name: 'Data For Future Podcast: Data + sustainability focus' + startOffset: 3249 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3249 + endOffset: 3379 +- name: 'Supporting Stuck Team Members: Google, communities, and networks' + startOffset: 3379 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3379 + endOffset: 3537 +- name: Closing Remarks & Where to Find Tammy (LinkedIn, dataforfuture.org) + startOffset: 3537 + url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3537 + endOffset: 3550 + transcript: - header: Podcast Introduction - header: 'Guest Background: Tammy Liang’s career path into data' @@ -62,7 +193,7 @@ transcript: sec: 157 time: '2:37' who: Tammy -- header: 'Chief of Data Responsibilities: Marketing, e‑commerce, and operations' +- header: 'Chief of Data Responsibilities: Marketing, e-commerce, and operations' - line: You started as the first data person that's really cool. What do you do as chief of data? sec: 247 @@ -144,7 +275,7 @@ transcript: sec: 508 time: '8:28' who: Alexey -- header: 'Cross‑team Collaboration: Streamlining reporting and building trust' +- header: 'Cross-team Collaboration: Streamlining reporting and building trust' - line: I would say that it's a little bit the other way around. It’s more about the management realizing, “Okay, we do have the need. Everyone is talking about data and for the company to grow further it’s maybe something we need.” But people @@ -305,7 +436,7 @@ transcript: sec: 1031 time: '17:11' who: Tammy -- header: 'Business‑Facing Role: Hiring for adoption and communication' +- header: 'Business-Facing Role: Hiring for adoption and communication' - line: As the data team we need to work a lot, because if we do not tell the team what we are producing, the tools are developed, but they just sit there and no one uses them. So we would just be wasting our time and energy. Therefore, the @@ -626,7 +757,7 @@ transcript: sec: 1930 time: '32:10' who: Tammy -- header: 'First‑Hire Qualities: Business alignment and leadership mindset' +- header: 'First-Hire Qualities: Business alignment and leadership mindset' - line: Yeah, thanks. I have a lot of questions that I didn't send you in the list of questions that I have prepared. So the question I have is, “What kind of qualities do you need to have as the first data person in a company? Should you be more @@ -1186,144 +1317,6 @@ transcript: sec: 3624 time: '1:00:24' who: Alexey -description: 'Learn to build a scalable data team: hiring, production ML delivery, - demand forecasting and driving adoption—practical staffing, stack, and governance - tips.' -intro: How do you build and scale a data team that moves beyond dashboards to production - ML, reliable forecasting, and real adoption across the business? In this episode - Tammy Liang, Chief of Data at Platanomelón and co‑host of Data for Future, walks - through her journey building data capabilities for marketing, e‑commerce, and operations - at a mission‑driven consumer brand.

Tammy breaks down practical hiring - decisions—why she hired an analyst first, then a data engineer, and why early senior - hires matter—plus the tradeoffs between analyst, engineer, and business‑facing roles. - She explains the technical foundation she built (Stitch, GCP, dbt, Data Studio, - Notion) to enable forecasting and production ML, and describes common model delivery - challenges moving work out of notebooks. The conversation also covers demand forecasting, - time‑series and basic machine learning skills, data accuracy and governance, dbt - tests and monitoring, and tactics for driving adoption—workshops, Q&A, and building - trust.

Listen to learn concrete steps for hiring a data team, setting up - a data warehouse for forecasting, delivering models to production, and creating - data products that stakeholders actually use. -dateadded: '2021-10-09' -duration: PT00H59M10S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=0 - endOffset: 74 -- name: 'Guest Background: Tammy Liang’s career path into data' - startOffset: 74 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=74 - endOffset: 247 -- name: 'Chief of Data Responsibilities: Marketing, e‑commerce, and operations' - startOffset: 247 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=247 - endOffset: 404 -- name: 'Data Challenges for Sensitive Products: Social media restrictions & creative - tracking' - startOffset: 404 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=404 - endOffset: 442 -- name: 'First Project: Business health monitoring and dashboards' - startOffset: 442 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=442 - endOffset: 531 -- name: 'Cross‑team Collaboration: Streamlining reporting and building trust' - startOffset: 531 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=531 - endOffset: 606 -- name: 'Handling Resistance: Spreadsheet culture and adoption hurdles' - startOffset: 606 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=606 - endOffset: 720 -- name: Scaling from Dashboards to Predictive Projects - startOffset: 720 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=720 - endOffset: 883 -- name: 'Model Delivery Challenges: From notebooks to production' - startOffset: 883 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=883 - endOffset: 904 -- name: 'Hiring Progression: First analyst then data engineer' - startOffset: 904 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=904 - endOffset: 1031 -- name: Building a Data Warehouse to Enable Forecasting - startOffset: 1031 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1031 - endOffset: 1121 -- name: 'Business‑Facing Role: Hiring for adoption and communication' - startOffset: 1121 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1121 - endOffset: 1352 -- name: 'Data Stack Overview: Stitch, GCP, dbt, Data Studio, and Notion wiki' - startOffset: 1352 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1352 - endOffset: 1391 -- name: 'Rethinking Hiring Order: Importance of senior hires early' - startOffset: 1391 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1391 - endOffset: 1586 -- name: 'Prioritizing Roles: Analyst, engineer, and business analyst tradeoffs' - startOffset: 1586 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1586 - endOffset: 1760 -- name: 'Demand Forecasting: Data provision, stakeholder input, and iteration' - startOffset: 1760 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1760 - endOffset: 1857 -- name: 'Analyst Skills: Time series and basic machine learning as advantages' - startOffset: 1857 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1857 - endOffset: 1989 -- name: 'First‑Hire Qualities: Business alignment and leadership mindset' - startOffset: 1989 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=1989 - endOffset: 2138 -- name: 'Data Accuracy & Governance: Errors, playbook, and rebuilding trust' - startOffset: 2138 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2138 - endOffset: 2409 -- name: 'Data Testing & Monitoring: dbt tests and regular dashboard checks' - startOffset: 2409 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2409 - endOffset: 2502 -- name: 'Timely Insights: Operational visibility and campaign monitoring' - startOffset: 2502 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2502 - endOffset: 2739 -- name: 'Offline Attribution: Surveys, community sampling, and measuring TV/banners' - startOffset: 2739 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2739 - endOffset: 2828 -- name: 'Useful Data Products: Product mindset and business alignment' - startOffset: 2828 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2828 - endOffset: 2940 -- name: 'Driving Adoption: Workshops, Q&A sessions, and building data culture' - startOffset: 2940 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=2940 - endOffset: 3052 -- name: 'Leadership Approach: Delegation, ownership, and team empowerment' - startOffset: 3052 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3052 - endOffset: 3159 -- name: 'Resources for New Data Leaders: Communities, courses, and mentors' - startOffset: 3159 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3159 - endOffset: 3249 -- name: 'Data For Future Podcast: Data + sustainability focus' - startOffset: 3249 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3249 - endOffset: 3379 -- name: 'Supporting Stuck Team Members: Google, communities, and networks' - startOffset: 3379 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3379 - endOffset: 3537 -- name: Closing Remarks & Where to Find Tammy (LinkedIn, dataforfuture.org) - startOffset: 3537 - url: https://www.youtube.com/watch?v=kI4V2iBbaH0&t=3537 - endOffset: 3550 --- diff --git a/_podcast/s16e03-collaborative-data-science-in-business.md b/_podcast/building-data-products-lead-data-scientist.md similarity index 97% rename from _podcast/s16e03-collaborative-data-science-in-business.md rename to _podcast/building-data-products-lead-data-scientist.md index 0342a444..962088a0 100644 --- a/_podcast/s16e03-collaborative-data-science-in-business.md +++ b/_podcast/building-data-products-lead-data-scientist.md @@ -1,20 +1,141 @@ --- +title: "Building Data Products at Scale: Intake, A/B Testing, and MLOps in a Marketing Organization" +short: "Collaborative Data Science in Business" +season: 16 episode: 3 guests: - ioannismesionis +image: images/podcast/s16e03-collaborative-data-science-in-business.jpg ids: - anchor: atatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c + anchor: datatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c youtube: 1pExOVuCF8Q -image: images/podcast/s16e03-collaborative-data-science-in-business.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Collaborative-Data-Science-in-Business---Ioannis-Mesionis-e2app0c apple: https://podcasts.apple.com/us/podcast/collaborative-data-science-in-business-ioannis-mesionis/id1541710331?i=1000632860980 spotify: https://open.spotify.com/episode/46DN6rAlufvvXaqdOomoTe?si=OMPDN8m5QZWsc5kJY8IcAA youtube: https://www.youtube.com/watch?v=1pExOVuCF8Q -season: 16 -short: Collaborative Data Science in Business -title: 'MLOps & Data Product Operating Model: Prioritization, A/B Testing & Model - Monitoring' + +description: "Discover MLOps tactics to prioritize data products, run A/B testing and enable model monitoring for faster validation, reliable rollouts and stakeholder buy-in" +topics: +- MLOps +- machine learning +- data science +- tools +- product management +intro: "How do you prioritize data product work, validate models in production, and keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a practical data product operating model for tackling those challenges.

Drawing on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains a four‑phase funnel with a "single front door" intake, a Definition of Done template with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR feasibility. He breaks down when to treat work as analytics vs. research, how R&D sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies for production rollout as MLOps capabilities evolve. Technical tooling and monitoring get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection — plus pragmatic dashboarding and alerting patterns. Listeners will come away with actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder engagement, and the estimation and cadence practices that make ML teams productive" +dateadded: 2023-10-29 + +duration: PT01H14S + +quotableClips: +- name: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) + startOffset: 100 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=100 + endOffset: 154 +- name: Career origin & early projects (mathematics degree, master's, internship model) + startOffset: 154 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=154 + endOffset: 443 +- name: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' + startOffset: 443 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=443 + endOffset: 512 +- name: 'Stakeholder collaboration: weekly embedded meetings and observation' + startOffset: 512 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=512 + endOffset: 675 +- name: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' + startOffset: 675 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=675 + endOffset: 840 +- name: 'Operating model for data products: four-phase funnel and accountability' + startOffset: 840 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=840 + endOffset: 923 +- name: 'Project intake & prioritization: "single front door" and cross-functional + kickoff' + startOffset: 923 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=923 + endOffset: 1057 +- name: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' + startOffset: 1057 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1057 + endOffset: 1254 +- name: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' + startOffset: 1254 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1254 + endOffset: 1272 +- name: 'Data science vs analytics: choosing technical approach and leads' + startOffset: 1272 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1272 + endOffset: 1368 +- name: 'Research & development: modeling work, sprint planning and Kanban usage' + startOffset: 1368 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1368 + endOffset: 1517 +- name: 'Pilot & A/B testing: validating models against baseline KPIs and feedback + loops' + startOffset: 1517 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1517 + endOffset: 1645 +- name: 'Production rollout: spectrum of production and evolving MLOps capabilities' + startOffset: 1645 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1645 + endOffset: 1698 +- name: 'Organizational structure: domain-focused lead data scientists (scheduling, + ops, pricing)' + startOffset: 1698 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1698 + endOffset: 1821 +- name: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' + startOffset: 1821 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1821 + endOffset: 2138 +- name: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' + startOffset: 2138 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2138 + endOffset: 2297 +- name: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' + startOffset: 2297 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2297 + endOffset: 2449 +- name: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' + startOffset: 2449 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2449 + endOffset: 2493 +- name: 'Communicating technical results: simplifying concepts for non‑technical audiences' + startOffset: 2493 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2493 + endOffset: 2710 +- name: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a helper' + startOffset: 2710 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2710 + endOffset: 2918 +- name: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' + startOffset: 2918 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2918 + endOffset: 2950 +- name: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' + startOffset: 2950 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2950 + endOffset: 3213 +- name: 'Model monitoring with Evidently: drift detection and integration plans' + startOffset: 3213 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3213 + endOffset: 3311 +- name: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' + startOffset: 3311 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3311 + endOffset: 3429 +- name: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' + startOffset: 3429 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3429 + endOffset: 3660 +- name: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' + startOffset: 3660 + url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3660 + endOffset: 3614 + transcript: - header: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) - line: This week, we'll talk about collaborative data science in business. We have @@ -1278,133 +1399,6 @@ transcript: sec: 3714 time: '1:01:54' who: Alexey -description: Discover MLOps tactics to prioritize data products, run A/B testing and - enable model monitoring for faster validation, reliable rollouts and stakeholder - buy-in. -intro: How do you prioritize data product work, validate models in production, and - keep them monitored without overwhelming stakeholders? In this episode, Ioannis Mesionis, - Lead Data Scientist at easyJet and head of their MLOps efforts, walks through a - practical data product operating model for tackling those challenges.

Drawing - on his cross‑functional work with Digital, Customer & Marketing, Ioannis explains - a four‑phase funnel with a "single front door" intake, a Definition of Done template - with KPIs and fail‑fast checks, and an inception process that includes EDA and GDPR - feasibility. He breaks down when to treat work as analytics vs. research, how R&D - sprints and Kanban feed into pilot and A/B testing against baseline KPIs, and strategies - for production rollout as MLOps capabilities evolve. Technical tooling and monitoring - get concrete coverage — MLflow, Prefect/Airflow, and using Evidently for drift detection - — plus pragmatic dashboarding and alerting patterns. Listeners will come away with - actionable guidance on prioritization, designing A/B tests, model monitoring, stakeholder - engagement, and the estimation and cadence practices that make ML teams productive. -dateadded: '2023-10-29' -duration: PT01H14S -quotableClips: -- name: Episode introduction & guest Ioannis Mesionis (EasyJet lead data scientist) - startOffset: 100 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=100 - endOffset: 154 -- name: Career origin & early projects (mathematics degree, master's, internship model) - startOffset: 154 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=154 - endOffset: 443 -- name: 'Lead Data Scientist role: partnering with Digital Customer & Marketing' - startOffset: 443 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=443 - endOffset: 512 -- name: 'Stakeholder collaboration: weekly embedded meetings and observation' - startOffset: 512 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=512 - endOffset: 675 -- name: 'Business domain knowledge: PPC, SEO, keywords and conversion optimization' - startOffset: 675 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=675 - endOffset: 840 -- name: 'Operating model for data products: four-phase funnel and accountability' - startOffset: 840 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=840 - endOffset: 923 -- name: 'Project intake & prioritization: "single front door" and cross-functional - kickoff' - startOffset: 923 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=923 - endOffset: 1057 -- name: 'Definition of Done: template, KPIs, success criteria and fail‑fast checks' - startOffset: 1057 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1057 - endOffset: 1254 -- name: 'Inception & EDA: data access, GDPR considerations and feasibility assessment' - startOffset: 1254 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1254 - endOffset: 1272 -- name: 'Data science vs analytics: choosing technical approach and leads' - startOffset: 1272 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1272 - endOffset: 1368 -- name: 'Research & development: modeling work, sprint planning and Kanban usage' - startOffset: 1368 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1368 - endOffset: 1517 -- name: 'Pilot & A/B testing: validating models against baseline KPIs and feedback - loops' - startOffset: 1517 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1517 - endOffset: 1645 -- name: 'Production rollout: spectrum of production and evolving MLOps capabilities' - startOffset: 1645 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1645 - endOffset: 1698 -- name: 'Organizational structure: domain-focused lead data scientists (scheduling, - ops, pricing)' - startOffset: 1698 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1698 - endOffset: 1821 -- name: 'Handling uncertainty in ML: MVPs, estimation practices and Kanban preference' - startOffset: 1821 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=1821 - endOffset: 2138 -- name: 'Sprint cadence: planning, stand-ups, bi‑weekly demos and stakeholder demos' - startOffset: 2138 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2138 - endOffset: 2297 -- name: 'Estimation techniques: T-shirt sizing, Planning Poker and Fibonacci points' - startOffset: 2297 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2297 - endOffset: 2449 -- name: 'Stakeholder engagement strategy: invite to demos, not daily stand-ups' - startOffset: 2449 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2449 - endOffset: 2493 -- name: 'Communicating technical results: simplifying concepts for non‑technical audiences' - startOffset: 2493 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2493 - endOffset: 2710 -- name: 'Developing soft skills: practice, analogies, feedback and ChatGPT as a helper' - startOffset: 2710 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2710 - endOffset: 2918 -- name: 'MLOps Zoomcamp takeaways: motivation for hands‑on MLOps learning' - startOffset: 2918 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2918 - endOffset: 2950 -- name: 'MLOps tooling overview: MLflow, Prefect, Airflow and engineering exposure' - startOffset: 2950 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=2950 - endOffset: 3213 -- name: 'Model monitoring with Evidently: drift detection and integration plans' - startOffset: 3213 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3213 - endOffset: 3311 -- name: 'Monitoring dashboards & alerts: Tableau quick solutions and custom emails' - startOffset: 3311 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3311 - endOffset: 3429 -- name: 'Recommended resources: Cassie Kozyrkov (Decision Intelligence) and textbooks' - startOffset: 3429 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3429 - endOffset: 3660 -- name: 'Closing remarks & contact: LinkedIn follow‑ups and final thoughts' - startOffset: 3660 - url: https://www.youtube.com/watch?v=1pExOVuCF8Q&t=3660 - endOffset: 3614 --- Links: diff --git a/_podcast/s11e06-product-owners-in-data-science.md b/_podcast/building-data-products-product-owner-vs-product-manager.md similarity index 97% rename from _podcast/s11e06-product-owners-in-data-science.md rename to _podcast/building-data-products-product-owner-vs-product-manager.md index 231b575c..4cc4b170 100644 --- a/_podcast/s11e06-product-owners-in-data-science.md +++ b/_podcast/building-data-products-product-owner-vs-product-manager.md @@ -1,20 +1,116 @@ --- +title: "Building Data Products at Scale: Recommenders, Domain Ownership, and Hiring for Production ML" +short: "Product Owners in Data Science" +season: 11 episode: 6 guests: - annahannemann +image: images/podcast/building-data-products-product-owner-vs-product-manager.jpg ids: anchor: Product-Owners-in-Data-Science---Anna-Hannemann-e1q0ord youtube: rTRTjB6cGng -image: images/podcast/s11e06-product-owners-in-data-science.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Product-Owners-in-Data-Science---Anna-Hannemann-e1q0ord apple: https://podcasts.apple.com/us/podcast/product-owners-in-data-science-anna-hannemann/id1541710331?i=1000585888321 spotify: https://open.spotify.com/episode/5deNrH5E6802ClwVt2Re4A?si=Xdg7qlT1TPCrH318MvS2RA youtube: https://www.youtube.com/watch?v=rTRTjB6cGng -season: 11 -short: Product Owners in Data Science -title: 'Data Product Leadership: Scaling Recommenders, Production ML Hiring & Price - Markdown Modeling' + +description: "Discover scaling recommender systems, production ML hiring strategies and price markdown modeling to cut waste, optimize discounts, and lead data product teams" +intro: "How do you scale recommender systems, hire for production ML, and model price markdowns to reduce waste—and who should own those decisions? In this episode, Anna Hannemann, Domain Owner for Data Science at Metro.digital, walks through practical answers informed by her PhD in Data Science and prior leadership of recommender and robotics/smart logistics teams.

We cover customer data completeness, API-first recommender design, and algorithm choices like collaborative filtering and Word2Vec variants, plus the trade-offs product owners must manage. Anna contrasts product owner and product manager responsibilities, describes the domain owner role for aligning data scientists across teams, and lays out hiring strategies for production ML—data scientists, ML engineers, and MLOps. You’ll also hear how to source problems from operations, evaluate new data domains with MVPs and manual fixes, and take a portfolio approach to staging data product investments.

If you work in data product leadership, product management, or machine learning operations, this episode delivers actionable frameworks for scaling recommenders, building production ML capabilities, and applying price markdown modeling to optimize discounting and reduce waste. Recommended reading: Data Science for Business." +topics: +- data products +- product owners +- product managers +- data science +- machine learning +- MLOps +dateadded: 2022-11-11 + +duration: PT00H59M17S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=0 + endOffset: 92 +- name: Guest & METRO overview and customer data completeness + startOffset: 92 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=92 + endOffset: 289 +- name: Anna's academic and career background (PhD, web science, logistics) + startOffset: 289 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=289 + endOffset: 769 +- name: Value of technical expertise for data product leads + startOffset: 769 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=769 + endOffset: 911 +- name: Core product owner responsibilities and team advocacy + startOffset: 911 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=911 + endOffset: 1200 +- name: 'Role comparison: product owner versus product manager' + startOffset: 1200 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1200 + endOffset: 1328 +- name: 'Recommender systems at METRO: API-first design and scaling' + startOffset: 1328 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1328 + endOffset: 1801 +- name: 'Hiring strategy for production ML: data scientist, ML engineer, MLOps' + startOffset: 1801 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1801 + endOffset: 2093 +- name: 'Recommender algorithms: collaborative filtering and Word2Vec variants' + startOffset: 2093 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2093 + endOffset: 2155 +- name: 'Essential skills: metrics, trade-offs, and technical literacy for product + owners' + startOffset: 2155 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2155 + endOffset: 2312 +- name: 'Domain owner role: aligning data scientists across product teams' + startOffset: 2312 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2312 + endOffset: 2401 +- name: 'People management at scale: directs, reviews, and cross-team enablement' + startOffset: 2401 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2401 + endOffset: 2494 +- name: 'Price markdown modeling: reducing waste and optimal discounting' + startOffset: 2494 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2494 + endOffset: 2688 +- name: 'Sourcing problems from operations: business-driven prioritization' + startOffset: 2688 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2688 + endOffset: 2757 +- name: 'Managing multiple data domains: delegation, rotations, and budget ownership' + startOffset: 2757 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2757 + endOffset: 2924 +- name: 'Evaluating new domains: MVPs, manual fixes, and business justification' + startOffset: 2924 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2924 + endOffset: 3189 +- name: 'Portfolio approach: validating and staging data product investments' + startOffset: 3189 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3189 + endOffset: 3261 +- name: 'Community leadership: organizing ProductTank meetups' + startOffset: 3261 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3261 + endOffset: 3468 +- name: 'Recommended resource: "Data Science for Business" for data product roles' + startOffset: 3468 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3468 + endOffset: 3625 +- name: Episode wrap-up and live chat highlights + startOffset: 3625 + url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3625 + endOffset: 3557 + transcript: - header: Episode Introduction - header: Guest & METRO overview and customer data completeness @@ -1061,109 +1157,6 @@ transcript: sec: 3649 time: '1:00:49' who: Anna -description: Discover scaling recommender systems, production ML hiring strategies - and price markdown modeling to cut waste, optimize discounts, and lead data product - teams. -intro: 'How do you scale recommender systems, hire for production ML, and model price - markdowns to reduce waste—and who should own those decisions? In this episode, Anna - Hannemann, Domain Owner for Data Science at Metro.digital, walks through practical - answers informed by her PhD in Data Science and prior leadership of recommender - and robotics/smart logistics teams.

We cover customer data completeness, - API-first recommender design, and algorithm choices like collaborative filtering - and Word2Vec variants, plus the trade-offs product owners must manage. Anna contrasts - product owner and product manager responsibilities, describes the domain owner role - for aligning data scientists across teams, and lays out hiring strategies for production - ML—data scientists, ML engineers, and MLOps. You’ll also hear how to source problems - from operations, evaluate new data domains with MVPs and manual fixes, and take - a portfolio approach to staging data product investments.

If you work in - data product leadership, product management, or machine learning operations, this - episode delivers actionable frameworks for scaling recommenders, building production - ML capabilities, and applying price markdown modeling to optimize discounting and - reduce waste. Recommended reading: Data Science for Business.' -dateadded: '2022-11-11' -duration: PT00H59M17S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=0 - endOffset: 92 -- name: Guest & METRO overview and customer data completeness - startOffset: 92 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=92 - endOffset: 289 -- name: Anna's academic and career background (PhD, web science, logistics) - startOffset: 289 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=289 - endOffset: 769 -- name: Value of technical expertise for data product leads - startOffset: 769 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=769 - endOffset: 911 -- name: Core product owner responsibilities and team advocacy - startOffset: 911 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=911 - endOffset: 1200 -- name: 'Role comparison: product owner versus product manager' - startOffset: 1200 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1200 - endOffset: 1328 -- name: 'Recommender systems at METRO: API-first design and scaling' - startOffset: 1328 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1328 - endOffset: 1801 -- name: 'Hiring strategy for production ML: data scientist, ML engineer, MLOps' - startOffset: 1801 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=1801 - endOffset: 2093 -- name: 'Recommender algorithms: collaborative filtering and Word2Vec variants' - startOffset: 2093 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2093 - endOffset: 2155 -- name: 'Essential skills: metrics, trade-offs, and technical literacy for product - owners' - startOffset: 2155 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2155 - endOffset: 2312 -- name: 'Domain owner role: aligning data scientists across product teams' - startOffset: 2312 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2312 - endOffset: 2401 -- name: 'People management at scale: directs, reviews, and cross-team enablement' - startOffset: 2401 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2401 - endOffset: 2494 -- name: 'Price markdown modeling: reducing waste and optimal discounting' - startOffset: 2494 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2494 - endOffset: 2688 -- name: 'Sourcing problems from operations: business-driven prioritization' - startOffset: 2688 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2688 - endOffset: 2757 -- name: 'Managing multiple data domains: delegation, rotations, and budget ownership' - startOffset: 2757 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2757 - endOffset: 2924 -- name: 'Evaluating new domains: MVPs, manual fixes, and business justification' - startOffset: 2924 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=2924 - endOffset: 3189 -- name: 'Portfolio approach: validating and staging data product investments' - startOffset: 3189 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3189 - endOffset: 3261 -- name: 'Community leadership: organizing ProductTank meetups' - startOffset: 3261 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3261 - endOffset: 3468 -- name: 'Recommended resource: "Data Science for Business" for data product roles' - startOffset: 3468 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3468 - endOffset: 3625 -- name: Episode wrap-up and live chat highlights - startOffset: 3625 - url: https://www.youtube.com/watch?v=rTRTjB6cGng&t=3625 - endOffset: 3557 --- Links: diff --git a/_podcast/s10e08-leading-data-research.md b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md similarity index 97% rename from _podcast/s10e08-leading-data-research.md rename to _podcast/building-data-science-programs-and-democratizing-high-performance-computing.md index 443fd221..f459c18b 100644 --- a/_podcast/s10e08-leading-data-research.md +++ b/_podcast/building-data-science-programs-and-democratizing-high-performance-computing.md @@ -1,19 +1,141 @@ --- +title: "Build Data Science Programs, Democratize HPC & Scale Graph Analytics with Arkouda" +short: "Leading Data Research" +season: 10 episode: 8 guests: - davidbader +image: images/podcast/building-data-science-programs-and-democratizing-high-performance-computing.jpg ids: anchor: Leading-Data-Research---David-Bader-e1nmt3r youtube: vZLlpsUlchQ -image: images/podcast/s10e08-leading-data-research.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Leading-Data-Research---David-Bader-e1nmt3r apple: https://podcasts.apple.com/us/podcast/leading-data-research-david-bader/id1541710331?i=1000579710785 spotify: https://open.spotify.com/episode/7DmFWFHUwxx4Wf0X6GbKBf?si=2DW0G2EMQ7ebB9K60LfJyQ youtube: https://www.youtube.com/watch?v=vZLlpsUlchQ -season: 10 -short: Leading Data Research -title: Build Data Science Programs, Democratize HPC & Scale Graph Analytics with Arkouda + +description: "Learn to build data science programs, democratize HPC and scale graph analytics with Arkouda - practical curriculum, performance tips and recruitment tips" +intro: "How do you build effective data science programs, democratize high-performance computing, and scale graph analytics so researchers and practitioners can solve real-world problems? In this episode, David Bader — Director of the Institute for Data Science at NJIT, founder of NJIT’s Department of Data Science, and a distinguished professor with deep expertise in HPC, big data, and analytics — walks through his career, leadership in launching academic units, and practical lessons for curriculum design and regional workforce alignment.

We explore Arkouda and ARACHNE — interactive, massive-scale Python analytics and graph tools — and the Chapel-backed supercomputing techniques that aim to democratize HPC for broader use. David discusses research lab-as-startup practices (open source releases, datasets like synthetic/SNAP, and industry partnerships with NSF, Accenture, NVIDIA), building usable systems to achieve adoption (including a NASA example), and underappreciated advances such as STINGER and streaming graph analytics. He also covers mentorship models, recruiting PhD and MS students, conference strategies, and balancing teaching, research, and service.

Listen to learn concrete approaches to creating data science programs, practical steps to scale graph analytics with Arkouda, and tactics for turning research into real-world impact" +topics: +- data science +- data analytics +- tools +dateadded: 2022-09-16 + +duration: PT01H03M03S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=0 + endOffset: 107 +- name: 'Guest Intro: David Bader — NJIT Institute for Data Science, research focus' + startOffset: 107 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=107 + endOffset: 200 +- name: Career Journey & Academic Appointments + startOffset: 200 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=200 + endOffset: 293 +- name: 'Daily Responsibilities: Research, Teaching, and Institute Leadership' + startOffset: 293 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=293 + endOffset: 371 +- name: Active Projects & Industry Partnerships (NSF, Accenture, NVIDIA) + startOffset: 371 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=371 + endOffset: 510 +- name: 'Launching Academic Units: Starting Departments and Degree Programs' + startOffset: 510 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=510 + endOffset: 541 +- name: Designing Data Science Curricula & Regional Workforce Alignment + startOffset: 541 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=541 + endOffset: 835 +- name: 'Academic Ranks: Assistant, Associate, Full, and Distinguished Professor' + startOffset: 835 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=835 + endOffset: 1061 +- name: 'Career Pathways: PhD, Postdoc, and Faculty Entry' + startOffset: 1061 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1061 + endOffset: 1143 +- name: 'Academic CV vs. Industry Resume: Documentation and Expectations' + startOffset: 1143 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1143 + endOffset: 1450 +- name: 'Arkouda & ARACHNE: Interactive, Massive-scale Python Analytics and Graph + Tools' + startOffset: 1450 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1450 + endOffset: 1655 +- name: 'Backend Performance: Chapel, Supercomputing, and Democratizing HPC' + startOffset: 1655 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1655 + endOffset: 1772 +- name: 'Research Lab as Startup: Open Source, Code Release, and Student Output' + startOffset: 1772 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1772 + endOffset: 1830 +- name: 'Finding Datasets: Synthetic Data, SNAP, and Industry Collaboration' + startOffset: 1830 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1830 + endOffset: 1958 +- name: Lab Composition & Mentorship Model (PhD, MS, undergrads, high school) + startOffset: 1958 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1958 + endOffset: 2139 +- name: 'Time Allocation: Balancing Teaching Load, Research, and Service' + startOffset: 2139 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2139 + endOffset: 2226 +- name: 'Most Rewarding Work: Linear-time Algorithm & Pancake-flipping Variant' + startOffset: 2226 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2226 + endOffset: 2429 +- name: 'Underappreciated Impact: STINGER and Streaming Graph Analytics' + startOffset: 2429 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2429 + endOffset: 2745 +- name: Virtual Seminar Series & NJIT Data Science YouTube Channel + startOffset: 2745 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2745 + endOffset: 2812 +- name: 'Teaching-focused Careers: Universities Prioritizing Instruction over Research' + startOffset: 2812 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2812 + endOffset: 2941 +- name: 'Staying Current: Journals, Conferences, and Information Triage' + startOffset: 2941 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2941 + endOffset: 3232 +- name: 'Favorite Conferences: Supercomputing, IPDPS, HPEC' + startOffset: 3232 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3232 + endOffset: 3298 +- name: 'Selecting Research Topics: Domain-driven, Impact-first Approach' + startOffset: 3298 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3298 + endOffset: 3400 +- name: 'Building Usable Systems: From Research to Real-world Adoption (NASA example)' + startOffset: 3400 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3400 + endOffset: 3491 +- name: 'Recruiting Students: PhD vs. Industry and Collaborative Opportunities' + startOffset: 3491 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3491 + endOffset: 3811 +- name: 'Contact & Resources: davidbader.net, Arkouda, NJIT Data Science links' + startOffset: 3811 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3811 + endOffset: 3846 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3846 + url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3846 + endOffset: 3783 + transcript: - header: Podcast Introduction - header: 'Guest Intro: David Bader — NJIT Institute for Data Science, research focus' @@ -1138,138 +1260,6 @@ transcript: sec: 3890 time: '1:04:50' who: Alexey -description: Learn to build data science programs, democratize HPC and scale graph - analytics with Arkouda - practical curriculum, performance tips and recruitment - tips -intro: How do you build effective data science programs, democratize high-performance - computing, and scale graph analytics so researchers and practitioners can solve - real-world problems? In this episode, David Bader — Director of the Institute for - Data Science at NJIT, founder of NJIT’s Department of Data Science, and a distinguished - professor with deep expertise in HPC, big data, and analytics — walks through his - career, leadership in launching academic units, and practical lessons for curriculum - design and regional workforce alignment.

We explore Arkouda and ARACHNE - — interactive, massive-scale Python analytics and graph tools — and the Chapel-backed - supercomputing techniques that aim to democratize HPC for broader use. David discusses - research lab-as-startup practices (open source releases, datasets like synthetic/SNAP, - and industry partnerships with NSF, Accenture, NVIDIA), building usable systems - to achieve adoption (including a NASA example), and underappreciated advances such - as STINGER and streaming graph analytics. He also covers mentorship models, recruiting - PhD and MS students, conference strategies, and balancing teaching, research, and - service.

Listen to learn concrete approaches to creating data science programs, - practical steps to scale graph analytics with Arkouda, and tactics for turning research - into real-world impact. -dateadded: '2022-09-16' -duration: PT01H03M03S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=0 - endOffset: 107 -- name: 'Guest Intro: David Bader — NJIT Institute for Data Science, research focus' - startOffset: 107 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=107 - endOffset: 200 -- name: Career Journey & Academic Appointments - startOffset: 200 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=200 - endOffset: 293 -- name: 'Daily Responsibilities: Research, Teaching, and Institute Leadership' - startOffset: 293 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=293 - endOffset: 371 -- name: Active Projects & Industry Partnerships (NSF, Accenture, NVIDIA) - startOffset: 371 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=371 - endOffset: 510 -- name: 'Launching Academic Units: Starting Departments and Degree Programs' - startOffset: 510 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=510 - endOffset: 541 -- name: Designing Data Science Curricula & Regional Workforce Alignment - startOffset: 541 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=541 - endOffset: 835 -- name: 'Academic Ranks: Assistant, Associate, Full, and Distinguished Professor' - startOffset: 835 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=835 - endOffset: 1061 -- name: 'Career Pathways: PhD, Postdoc, and Faculty Entry' - startOffset: 1061 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1061 - endOffset: 1143 -- name: 'Academic CV vs. Industry Resume: Documentation and Expectations' - startOffset: 1143 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1143 - endOffset: 1450 -- name: 'Arkouda & ARACHNE: Interactive, Massive-scale Python Analytics and Graph - Tools' - startOffset: 1450 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1450 - endOffset: 1655 -- name: 'Backend Performance: Chapel, Supercomputing, and Democratizing HPC' - startOffset: 1655 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1655 - endOffset: 1772 -- name: 'Research Lab as Startup: Open Source, Code Release, and Student Output' - startOffset: 1772 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1772 - endOffset: 1830 -- name: 'Finding Datasets: Synthetic Data, SNAP, and Industry Collaboration' - startOffset: 1830 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1830 - endOffset: 1958 -- name: Lab Composition & Mentorship Model (PhD, MS, undergrads, high school) - startOffset: 1958 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=1958 - endOffset: 2139 -- name: 'Time Allocation: Balancing Teaching Load, Research, and Service' - startOffset: 2139 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2139 - endOffset: 2226 -- name: 'Most Rewarding Work: Linear-time Algorithm & Pancake-flipping Variant' - startOffset: 2226 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2226 - endOffset: 2429 -- name: 'Underappreciated Impact: STINGER and Streaming Graph Analytics' - startOffset: 2429 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2429 - endOffset: 2745 -- name: Virtual Seminar Series & NJIT Data Science YouTube Channel - startOffset: 2745 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2745 - endOffset: 2812 -- name: 'Teaching-focused Careers: Universities Prioritizing Instruction over Research' - startOffset: 2812 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2812 - endOffset: 2941 -- name: 'Staying Current: Journals, Conferences, and Information Triage' - startOffset: 2941 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=2941 - endOffset: 3232 -- name: 'Favorite Conferences: Supercomputing, IPDPS, HPEC' - startOffset: 3232 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3232 - endOffset: 3298 -- name: 'Selecting Research Topics: Domain-driven, Impact-first Approach' - startOffset: 3298 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3298 - endOffset: 3400 -- name: 'Building Usable Systems: From Research to Real-world Adoption (NASA example)' - startOffset: 3400 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3400 - endOffset: 3491 -- name: 'Recruiting Students: PhD vs. Industry and Collaborative Opportunities' - startOffset: 3491 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3491 - endOffset: 3811 -- name: 'Contact & Resources: davidbader.net, Arkouda, NJIT Data Science links' - startOffset: 3811 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3811 - endOffset: 3846 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3846 - url: https://www.youtube.com/watch?v=vZLlpsUlchQ&t=3846 - endOffset: 3783 --- Links: diff --git a/_podcast/s01e03-building-ds-team.md b/_podcast/building-data-team.md similarity index 94% rename from _podcast/s01e03-building-ds-team.md rename to _podcast/building-data-team.md index 07343e62..e1f82c6a 100644 --- a/_podcast/s01e03-building-ds-team.md +++ b/_podcast/building-data-team.md @@ -1,11 +1,11 @@ --- -title: 'How to Build and Scale ML Teams: Hiring, MLOps & Product-Driven AI for Startups' -short: Building a Data Science Team -guests: -- dattran -image: images/podcast/s01e03-building-ds-team.jpg +title: "How to Build and Scale ML Teams: Hiring, MLOps & Product-Driven AI for Startups" +short: "Building a Data Science Team" season: 1 episode: 3 +guests: +- dattran +image: images/podcast/building-data-team.jpg ids: youtube: ScDIB-3O77A anchor: Building-a-Data-Science-Team---Dat-Tran-enlmef @@ -14,21 +14,141 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-a-Data-Science-Team---Dat-Tran-enlmef spotify: https://open.spotify.com/episode/0daFpY1z2J4Uop1XdMNsnY apple: https://podcasts.apple.com/us/podcast/building-a-data-science-team-dat-tran/id1541710331?i=1000502061864 -intro: 'How do you build and scale an ML team that delivers product-driven AI without - getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner - & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks - through practical strategies for hiring, MLOps, and shaping data teams for startups. -

Dat draws on a decade of production ML experience to unpack the MLOps mindset - (day‑two operations, model maintenance), how to hire early (T‑shaped generalists, - take‑home assessments, key hiring signals), and when to shift to specialists as - you scale. He also explains product-centric practices: aligning hiring to prototype - vs. MVP needs, prioritizing impact over technical perfection, and building human‑centric - AI (augmenting pricing managers at Priceloop). Other topics include open research - and open source as strategic advantages, bootstrapping data capabilities, retention - through autonomy and interesting work, and educating leadership about realistic - AI expectations.

Listen for actionable guidance on building ML teams, hiring - machine learning engineers, and implementing MLOps and product-driven AI in early‑stage - startups.' + +description: "Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven AI for startups—scale with T-shaped engineers, ship robust models." +intro: "How do you build and scale an ML team that delivers product-driven AI without getting bogged down by tech debt or false promises? In this episode, Dat Tran — Partner & CTO at DATANOMIQ and former AI lead at Axel Springer, idealo, and Pivotal — walks through practical strategies for hiring, MLOps, and shaping data teams for startups.

Dat draws on a decade of production ML experience to unpack the MLOps mindset (day-two operations, model maintenance), how to hire early (T-shaped generalists, take-home assessments, key hiring signals), and when to shift to specialists as you scale. He also explains product-centric practices: aligning hiring to prototype vs. MVP needs, prioritizing impact over technical perfection, and building human-centric AI (augmenting pricing managers at Priceloop). Other topics include open research and open source as strategic advantages, bootstrapping data capabilities, retention through autonomy and interesting work, and educating leadership about realistic AI expectations.

Listen for actionable guidance on building ML teams, hiring machine learning engineers, and implementing MLOps and product-driven AI in early-stage startups." +topics: +- leadership +- team building +- machine learning +- MLOps +- startup +dateadded: 2021-02-23 + +duration: PT00H58M44S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=0 + endOffset: 126 +- name: Guest Overview & Career Snapshot + startOffset: 126 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=126 + endOffset: 192 +- name: 'Early Background: Economics, Investment Banking & Early Coding' + startOffset: 192 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=192 + endOffset: 263 +- name: From VBA Automation to Machine Learning Interest + startOffset: 263 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=263 + endOffset: 373 +- name: 'Accenture & Big Data: Spark, MPP Databases and Early ML Projects' + startOffset: 373 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=373 + endOffset: 486 +- name: 'Pivotal Experience: Production ML, DevOps Practices & Engineering Rigor' + startOffset: 486 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=486 + endOffset: 560 +- name: 'MLOps Mindset: Day-Two Operations and Model Maintenance' + startOffset: 560 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=560 + endOffset: 667 +- name: Creating a Head of Data Role at Idealo + startOffset: 667 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=667 + endOffset: 804 +- name: 'Team Building & Open Source: Sustainable Machine Learning Culture' + startOffset: 804 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=804 + endOffset: 908 +- name: 'Axel Springer: Corporate Tech Transformation, Research & Evangelism' + startOffset: 908 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=908 + endOffset: 1158 +- name: 'Career Transition: Leaving Corporate to Found a Startup' + startOffset: 1158 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1158 + endOffset: 1226 +- name: 'Founding Priceloop: Technical Co-founder and Pricing Opportunity' + startOffset: 1226 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1226 + endOffset: 1399 +- name: 'Pricing Product Vision: White-Box AI Framework for Dynamic Pricing' + startOffset: 1399 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1399 + endOffset: 1492 +- name: 'Human-Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' + startOffset: 1492 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1492 + endOffset: 1525 +- name: 'Early-Stage Hiring Plan: Building a Tactical Product Team' + startOffset: 1525 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1525 + endOffset: 1645 +- name: 'Open Research Strategy: Community, Open-Source & Competitive Advantage' + startOffset: 1645 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1645 + endOffset: 1737 +- name: 'Aligning Hiring with Vision: Prototype, MVP & Feature Uncertainty' + startOffset: 1737 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1737 + endOffset: 1780 +- name: 'Cross-Functional Roles: ML Engineers, Data Engineers, PMs & Designers' + startOffset: 1780 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1780 + endOffset: 1839 +- name: 'Generalists First: T-Shaped Engineers for Early Startups' + startOffset: 1839 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1839 + endOffset: 2015 +- name: 'Mid-Stage Hiring: Shifting Toward Specialists as Maturity Grows' + startOffset: 2015 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2015 + endOffset: 2243 +- name: 'Product-Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' + startOffset: 2243 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2243 + endOffset: 2371 +- name: 'Encouraging Open Source: Managerial Coaching and Leading by Example' + startOffset: 2371 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2371 + endOffset: 2607 +- name: 'Hiring Signals: CVs, Coding Skills, Math Background & Soft Skills' + startOffset: 2607 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2607 + endOffset: 2851 +- name: 'Take-Home Assessments: Code Quality, Naming, Consistency & Detail' + startOffset: 2851 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2851 + endOffset: 2991 +- name: 'Project Prioritization: Impact vs Technical Feasibility & Fail-Fast' + startOffset: 2991 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2991 + endOffset: 3152 +- name: 'Bootstrapping Data Teams: When to Hire Engineers Versus Analysts' + startOffset: 3152 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3152 + endOffset: 3215 +- name: 'Corporate IT in a Tech Transformation: From Central IT to DevOps' + startOffset: 3215 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3215 + endOffset: 3263 +- name: 'Retention Strategies: Competitive Pay, Interesting Work & Autonomy' + startOffset: 3263 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3263 + endOffset: 3400 +- name: 'Expectation Management: Educating Leadership on AI Capabilities' + startOffset: 3400 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3400 + endOffset: 3619 +- name: Episode Wrap-Up & Key Takeaways + startOffset: 3619 + url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3619 + endOffset: 3524 + transcript: - header: Podcast Introduction - header: Guest Overview & Career Snapshot @@ -150,7 +270,7 @@ transcript: sec: 517 time: '8:37' who: Dat -- header: 'MLOps Mindset: Day‑Two Operations and Model Maintenance' +- header: 'MLOps Mindset: Day-Two Operations and Model Maintenance' - line: 'I learned a lot about this. I devised my own ideas on how to make it happen. Because at the time, no one was really thinking about that. What I was thinking was: how do you create this fancy machine learning model? How do you do all the @@ -310,7 +430,7 @@ transcript: sec: 1198 time: '19:58' who: Dat -- header: 'Founding Priceloop: Technical Co‑founder and Pricing Opportunity' +- header: 'Founding Priceloop: Technical Co-founder and Pricing Opportunity' - line: Then I was talking to a few friends. One idea was “Okay, maybe you go back to Vietnam.” I'm not from Vietnam, I'm from Germany, but maybe go to Vietnam and go to a consultancy, because the tech is really strong there, and maybe an idea @@ -352,7 +472,7 @@ transcript: sec: 1346 time: '22:26' who: Dat -- header: 'Pricing Product Vision: White‑Box AI Framework for Dynamic Pricing' +- header: 'Pricing Product Vision: White-Box AI Framework for Dynamic Pricing' - line: As far as you know, there's many AI software systems out there, also for pricing. Most of these pricing servers are actually more closed solutions. You get the data from your client, and then you put it into your system – maybe you have a @@ -376,7 +496,7 @@ transcript: sec: 1460 time: '24:20' who: Dat -- header: 'Human‑Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' +- header: 'Human-Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' - line: We don't want to take away the pricing manager. We don't want to tell them “Hey, if you're going to use this, you don't need to hire a pricing manager or you can fire the pricing manager.” No, we want to give them a frame of a tool. @@ -386,7 +506,7 @@ transcript: sec: 1492 time: '24:52' who: Dat -- header: 'Early‑Stage Hiring Plan: Building a Tactical Product Team' +- header: 'Early-Stage Hiring Plan: Building a Tactical Product Team' - line: A long story. But very interesting. What stood out to me was, first of all, you mentioned Andrew Ng and his course on Coursera. I think so many people ended up where they are now, because of that course. Including myself. Yeah, it changed @@ -408,7 +528,7 @@ transcript: sec: 1598 time: '26:38' who: Dat -- header: 'Open Research Strategy: Community, Open‑Source & Competitive Advantage' +- header: 'Open Research Strategy: Community, Open-Source & Competitive Advantage' - line: Our goal is to create a strong tactical product team. Which focuses on disrupting one of the industries. We believe that the future is in open research, and contribution from outside and contributing into ideas for many, many different organizations. @@ -438,7 +558,7 @@ transcript: sec: 1737 time: '28:57' who: Dat -- header: 'Cross‑Functional Roles: ML Engineers, Data Engineers, PMs & Designers' +- header: 'Cross-Functional Roles: ML Engineers, Data Engineers, PMs & Designers' - line: But we just don't know, which features will lead to this kind of thing. We are hiring for different roles that would take us to that point to get a better understanding of our vision. We’re building like an open framework. Like a library. @@ -455,7 +575,7 @@ transcript: sec: 1812 time: '30:12' who: Dat -- header: 'Generalists First: T‑Shaped Engineers for Early Startups' +- header: 'Generalists First: T-Shaped Engineers for Early Startups' - line: There's a lot of roles that need you to think about before. In the beginning you also need to think about – do you need very experienced people or inexperienced people? Also generalists with specialists? This is the question that you really @@ -505,7 +625,7 @@ transcript: sec: 1983 time: '33:03' who: Dat -- header: 'Mid‑Stage Hiring: Shifting Toward Specialists as Maturity Grows' +- header: 'Mid-Stage Hiring: Shifting Toward Specialists as Maturity Grows' - line: If I would map it to Idealo. Idealo was not very mature, but also not completely immature. It was in the middle of this transformation. They had a data analyst before – they had business intelligence people – they also had data engineering @@ -572,7 +692,7 @@ transcript: sec: 2241 time: '37:21' who: Dat -- header: 'Product‑Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' +- header: 'Product-Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' - line: You mentioned a couple of things previously. And one thing that stood out to me was – you want to build a strong product team. What does that mean to you – a strong product team? @@ -741,7 +861,7 @@ transcript: sec: 2841 time: '47:21' who: Alexey -- header: 'Take‑Home Assessments: Code Quality, Naming, Consistency & Detail' +- header: 'Take-Home Assessments: Code Quality, Naming, Consistency & Detail' - line: The second interview is a homework assignment. I send out a homework, which is not very difficult. Then they send me the code, whether it is Jupyter Notebook or whatever. Then I check it. From this simple task, you could already see how @@ -783,7 +903,7 @@ transcript: sec: 2939 time: '48:59' who: Alexey -- header: 'Project Prioritization: Impact vs Technical Feasibility & Fail‑Fast' +- header: 'Project Prioritization: Impact vs Technical Feasibility & Fail-Fast' - line: This is always a very difficult question. It's risky. Let's say you have 100 projects. You have only limited resources, which means you need to pick the one that has the highest return on investment. What I do is – I have this matrix. @@ -950,7 +1070,7 @@ transcript: sec: 3608 time: '60:08' who: Alexey -- header: Episode Wrap‑Up & Key Takeaways +- header: Episode Wrap-Up & Key Takeaways - line: Yeah. Thanks a lot for taking time to come here and share your knowledge with us and your expertise. Thanks a lot and thank you everyone for attending and you questions. And we will put the video out soon. And yeah – that’s all, I think. @@ -967,131 +1087,6 @@ transcript: sec: 3650 time: '60:50' who: Alexey -description: 'Master building ML teams: hiring playbooks, MLOps day-two ops, and product-driven - AI for startups—scale with T-shaped engineers, ship robust models.' -dateadded: '2021-02-23' -duration: PT00H58M44S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=0 - endOffset: 126 -- name: Guest Overview & Career Snapshot - startOffset: 126 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=126 - endOffset: 192 -- name: 'Early Background: Economics, Investment Banking & Early Coding' - startOffset: 192 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=192 - endOffset: 263 -- name: From VBA Automation to Machine Learning Interest - startOffset: 263 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=263 - endOffset: 373 -- name: 'Accenture & Big Data: Spark, MPP Databases and Early ML Projects' - startOffset: 373 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=373 - endOffset: 486 -- name: 'Pivotal Experience: Production ML, DevOps Practices & Engineering Rigor' - startOffset: 486 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=486 - endOffset: 560 -- name: 'MLOps Mindset: Day‑Two Operations and Model Maintenance' - startOffset: 560 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=560 - endOffset: 667 -- name: Creating a Head of Data Role at Idealo - startOffset: 667 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=667 - endOffset: 804 -- name: 'Team Building & Open Source: Sustainable Machine Learning Culture' - startOffset: 804 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=804 - endOffset: 908 -- name: 'Axel Springer: Corporate Tech Transformation, Research & Evangelism' - startOffset: 908 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=908 - endOffset: 1158 -- name: 'Career Transition: Leaving Corporate to Found a Startup' - startOffset: 1158 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1158 - endOffset: 1226 -- name: 'Founding Priceloop: Technical Co‑founder and Pricing Opportunity' - startOffset: 1226 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1226 - endOffset: 1399 -- name: 'Pricing Product Vision: White‑Box AI Framework for Dynamic Pricing' - startOffset: 1399 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1399 - endOffset: 1492 -- name: 'Human‑Centric Pricing: Augmenting Pricing Managers, Not Replacing Them' - startOffset: 1492 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1492 - endOffset: 1525 -- name: 'Early‑Stage Hiring Plan: Building a Tactical Product Team' - startOffset: 1525 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1525 - endOffset: 1645 -- name: 'Open Research Strategy: Community, Open‑Source & Competitive Advantage' - startOffset: 1645 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1645 - endOffset: 1737 -- name: 'Aligning Hiring with Vision: Prototype, MVP & Feature Uncertainty' - startOffset: 1737 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1737 - endOffset: 1780 -- name: 'Cross‑Functional Roles: ML Engineers, Data Engineers, PMs & Designers' - startOffset: 1780 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1780 - endOffset: 1839 -- name: 'Generalists First: T‑Shaped Engineers for Early Startups' - startOffset: 1839 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=1839 - endOffset: 2015 -- name: 'Mid‑Stage Hiring: Shifting Toward Specialists as Maturity Grows' - startOffset: 2015 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2015 - endOffset: 2243 -- name: 'Product‑Centric Culture: Customer Focus, Fast Iteration & Feedback Loops' - startOffset: 2243 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2243 - endOffset: 2371 -- name: 'Encouraging Open Source: Managerial Coaching and Leading by Example' - startOffset: 2371 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2371 - endOffset: 2607 -- name: 'Hiring Signals: CVs, Coding Skills, Math Background & Soft Skills' - startOffset: 2607 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2607 - endOffset: 2851 -- name: 'Take‑Home Assessments: Code Quality, Naming, Consistency & Detail' - startOffset: 2851 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2851 - endOffset: 2991 -- name: 'Project Prioritization: Impact vs Technical Feasibility & Fail‑Fast' - startOffset: 2991 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=2991 - endOffset: 3152 -- name: 'Bootstrapping Data Teams: When to Hire Engineers Versus Analysts' - startOffset: 3152 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3152 - endOffset: 3215 -- name: 'Corporate IT in a Tech Transformation: From Central IT to DevOps' - startOffset: 3215 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3215 - endOffset: 3263 -- name: 'Retention Strategies: Competitive Pay, Interesting Work & Autonomy' - startOffset: 3263 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3263 - endOffset: 3400 -- name: 'Expectation Management: Educating Leadership on AI Capabilities' - startOffset: 3400 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3400 - endOffset: 3619 -- name: Episode Wrap‑Up & Key Takeaways - startOffset: 3619 - url: https://www.youtube.com/watch?v=ScDIB-3O77A&t=3619 - endOffset: 3524 --- ## Books diff --git a/_podcast/s18e07-building-domestic-risk-assessment-tool.md b/_podcast/building-domestic-risk-assessment-tool.md similarity index 61% rename from _podcast/s18e07-building-domestic-risk-assessment-tool.md rename to _podcast/building-domestic-risk-assessment-tool.md index 21e353e6..0a86b2c6 100644 --- a/_podcast/s18e07-building-domestic-risk-assessment-tool.md +++ b/_podcast/building-domestic-risk-assessment-tool.md @@ -1,40 +1,38 @@ --- +title: "Building a Domestic Risk Assessment Tool: Data Cleaning, Risk Scoring Models and Privacy Compliance" +short: "Building a Domestic Risk Assessment Tool" +season: 18 episode: 7 guests: - sabinafirtala +image: images/podcast/building-domestic-risk-assessment-tool.jpg ids: - anchor: lub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i + anchor: datatalksclub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i youtube: CpWlBAmD9ok -image: images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Building-a-Domestic-Risk-Assessment-Tool---Sabina-Firtala-e2lr92i apple: https://podcasts.apple.com/us/podcast/building-a-domestic-risk-assessment-tool-sabina-firtala/id1541710331?i=1000662124309 spotify: https://open.spotify.com/episode/7bjORhGzTQoxtbv60mMtzW?si=p6UaBdZJTnGvlwbGb6AsFQ youtube: https://www.youtube.com/watch?v=CpWlBAmD9ok -season: 18 -short: Building a Domestic Risk Assessment Tool -title: 'Build a Domestic Risk Assessment Tool for Triage: Data, Models, Privacy & - Deployment' -description: 'Learn to build a domestic risk assessment tool: data cleaning, risk - scoring models, privacy and deployment strategies to improve triage and resource - allocation.' -intro: 'How do you build a domestic risk assessment tool that meaningfully improves - triage while protecting people’s privacy and avoiding bias? In this episode, Sabina - Firtala from Frontline’s AI product development walks through the end-to-end process - of building a domestic risk assessment tool for triage. Sabina brings hands-on experience - across data wrangling, visualization, statistical testing, model training and validation, - with a background in Natural Sciences and prior analyst roles in finance and SaaS, - plus freelance work for mission-driven projects.

We cover problem framing - and project scope, data sources (case management systems, public records, surveys), - and data preparation: cleaning, linking and feature engineering. Sabina explains - risk scoring and model architecture, evaluation metrics and bias assessment, and - practical privacy, ethical and legal compliance measures. Deployment topics include - integrating risk tools into frontline workflows, user interface and decision-support - design, stakeholder training and trust, plus monitoring for model drift and alerts. - The episode also addresses operational constraints, partnerships, funding and open - resources. Listen for concrete guidance on building, evaluating and deploying a - domestic risk assessment tool—focused on impact, fairness, privacy and sustainability.' -dateadded: '2024-07-15' +description: "Discover building a domestic risk assessment: data cleaning, risk scoring models, and privacy compliance to improve triage, reduce bias, and ensure compliance." +topics: +- data science +- machine learning +- data engineering +- data governance +- MLOps +intro: "How do you build an accurate, privacy-compliant domestic risk assessment tool that frontline teams can actually use? In this episode Sabina Firtala — who leads Frontline’s AI product development and brings experience in data wrangling, model validation, and applied analytics from finance, SaaS, and mission-driven projects — walks through a practical roadmap.

We cover problem framing and project scope; sources like case management, public records, and surveys; and hands-on data" + work: cleaning, linking, and feature engineering. Sabina explains risk scoring approaches + and model architecture, evaluation metrics and bias assessment, plus privacy, ethical + considerations, and legal data governance. You’ll also hear about deployment into + frontline workflows, user interface and decision-support design, training and stakeholder + trust, ongoing monitoring and drift detection, and examples of impact on triage + and resource allocation. The conversation closes with collaboration strategies, + funding and scaling, open documentation for reproducibility, and concrete lessons + learned.

Listen for actionable guidance on data cleaning, building and + validating risk scoring models, and ensuring privacy compliance so you can design + responsible, usable domestic risk assessment tools.' +dateadded: 2024-07-15 quotableClips: - name: Podcast Introduction startOffset: 0 @@ -124,8 +122,18 @@ quotableClips: startOffset: 3840 url: https://www.youtube.com/watch?v=CpWlBAmD9ok&t=3840 endOffset: 3840 ---- +context: 'Context: The episode follows the end-to-end effort to create a domestic + risk assessment tool—framing the problem, assembling and preparing data, designing + and evaluating models, addressing privacy, ethics and legal needs, integrating with + frontline workflows and interfaces, engaging stakeholders, monitoring performance, + and planning for scale, funding, and reproducibility. + Core theme: Designing and operationalizing a people-centered, data-driven domestic + risk assessment that translates technical rigor into trustworthy, ethical, legally + compliant, and user-friendly decision support—balancing accuracy, fairness, privacy, + and sustainability so models meaningfully improve frontline triage and resource + allocation in the real world.' +--- Links: * [LinkedI](https://www.linkedin.com/company/frontline100/){:target="_blank"} diff --git a/_podcast/s14e09-interpretable-ai-and-ml.md b/_podcast/building-explainable-and-actionable-ai-ml-systems.md similarity index 95% rename from _podcast/s14e09-interpretable-ai-and-ml.md rename to _podcast/building-explainable-and-actionable-ai-ml-systems.md index eaf25149..59f39f3f 100644 --- a/_podcast/s14e09-interpretable-ai-and-ml.md +++ b/_podcast/building-explainable-and-actionable-ai-ml-systems.md @@ -1,20 +1,148 @@ --- +title: "Build Explainable and Actionable AI/ML Systems: Industrial PhD, Trust Theory & Production Deployment" +short: "Build Explainable and Actionable AI/ML Systems" +season: 14 episode: 9 guests: - polinamosolova +image: images/podcast/building-explainable-and-actionable-ai-ml-systems.jpg ids: - anchor: atatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq + anchor: datatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq youtube: EQcY83VA0Us -image: images/podcast/s14e09-interpretable-ai-and-ml.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Interpretable-AI-and-ML---Polina-Mosolova-e26hffq apple: https://podcasts.apple.com/us/podcast/interpretable-ai-and-ml-polina-mosolova/id1541710331?i=1000619926085 spotify: https://open.spotify.com/episode/0p84r6bZmgKO514oC1HE2L?si=30L5gJoSS6Wtrghtdr3jYA youtube: https://www.youtube.com/watch?v=EQcY83VA0Us -season: 14 -short: Interpretable AI and ML -title: 'Actionable Churn Prediction: Explainable AI, Organizational Trust (ABI) & - MLOps' + +description: "Build trustworthy ML systems that drive business decisions through explainable AI, organizational trust theory, and actionable model deployment." +intro: "How do you build ML systems that business teams trust and can act on? In this episode, Polina Mosolova — a data scientist at SAP who completed an industrial PhD building end-to-end ML pipelines — demonstrates how to bridge research and production through explainable AI grounded in organizational trust theory. Drawing from her churn prediction research, Polina shows how the ABI framework (Ability, Benevolence, Integrity) transforms model explanations into actionable business interventions.

We explore the industrial PhD path as a vehicle for building trustworthy ML systems, covering the practical tensions of research and production deliverables, supervision dynamics, and how academic rigor enhances deployable models. Technical deep-dives include interpretability versus explainability versus actionable ML, model architecture choices (glass-box models, GAMs, Neural Additive Models), explainability tooling (random forest + SHAP), computer vision activation maps, and why LLM explainability faces unique challenges compared to tabular models. The conversation ties together trust proxies, KPIs, and MLOps practices that make explanations business-relevant.

Listen to learn a systematic approach for building ML systems where explanations drive decisions — essential for data scientists who need to deploy models that stakeholders understand, trust, and can act upon to achieve measurable business outcomes." +topics: +- machine learning +- AI +- MLOps +- explainable AI +- interpretability +dateadded: 2023-07-08 + +duration: PT01H01M48S + +quotableClips: +- name: Episode Introduction & Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=0 + endOffset: 74 +- name: 'Guest Introduction: Polina Mosolova — Industrial PhD and Churn Prediction' + startOffset: 74 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=74 + endOffset: 125 +- name: 'Career Journey: Industrial PhD to Full-Stack Data Scientist at SAP' + startOffset: 125 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=125 + endOffset: 439 +- name: 'Role Evolution: From Full-Stack Data Scientist to MLOps Specialization' + startOffset: 439 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=439 + endOffset: 559 +- name: 'PhD Practice: Building End-to-End ML Pipelines During Doctoral Research' + startOffset: 559 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=559 + endOffset: 634 +- name: 'Dual Goals: Balancing Academic Research and Production Deliverables' + startOffset: 634 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=634 + endOffset: 753 +- name: 'Dissertation Focus: Churn Prediction Informed by Organizational Trust Theory' + startOffset: 753 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=753 + endOffset: 842 +- name: 'Production Challenges: Deploying Research Models in Industry' + startOffset: 842 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=842 + endOffset: 1077 +- name: 'Supervision & Stakeholders: Academic and Company Support Structures' + startOffset: 1077 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1077 + endOffset: 1145 +- name: 'Research-Industry Bridge: Academic Conferences and Summer Schools' + startOffset: 1145 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1145 + endOffset: 1237 +- name: 'Time Management: Balancing PhD Writing with Industrial Responsibilities' + startOffset: 1237 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1237 + endOffset: 1478 +- name: 'Finding Industrial PhDs: Prevalence, Companies, and How to Search' + startOffset: 1478 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1478 + endOffset: 1661 +- name: 'Practical Tips: Job Postings, Language Requirements, and Application Search' + startOffset: 1661 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1661 + endOffset: 1792 +- name: 'Organizational Trust Theory: ABI Framework — Ability, Benevolence, Integrity' + startOffset: 1792 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1792 + endOffset: 2076 +- name: Pricing, Contracts, and Trust Dynamics in Subscription Services + startOffset: 2076 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2076 + endOffset: 2299 +- name: Linking Organizational Trust to Explainable AI and Feature Design + startOffset: 2299 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2299 + endOffset: 2514 +- name: 'Actionability: Turning Explanations into Usable Business Interventions' + startOffset: 2514 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2514 + endOffset: 2643 +- name: 'Definitions: Interpretability vs Explainability vs Actionable ML' + startOffset: 2643 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2643 + endOffset: 2842 +- name: 'Model Choices: Glass-Box Models, Generalized Additive Models, Neural Additive + Models' + startOffset: 2842 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2842 + endOffset: 2940 +- name: 'Explainability Tools: Random Forest + SHAP — Explainable vs Interpretable' + startOffset: 2940 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2940 + endOffset: 3047 +- name: 'Computer Vision Explainability: Activation Maps and Human Interpretability' + startOffset: 3047 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3047 + endOffset: 3108 +- name: 'Summary: Interpretable Models, Explainable Outputs, and Actionable Decisions' + startOffset: 3108 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3108 + endOffset: 3159 +- name: 'Audience Matters: Explainable Feature Spaces and Tailoring Explanations' + startOffset: 3159 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3159 + endOffset: 3323 +- name: 'Explainable AI and Trust: User Confidence, Provenance, and Transparency' + startOffset: 3323 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3323 + endOffset: 3463 +- name: 'LLMs and Hallucinations: Explainability Challenges Versus Tabular Models' + startOffset: 3463 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3463 + endOffset: 3498 +- name: 'Measuring Trust: KPIs, Proxies, and Ethical Constraints' + startOffset: 3498 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3498 + endOffset: 3629 +- name: 'Business Relevance: Practical Proxies for Trust and Prioritizing Product + Ability' + startOffset: 3629 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3629 + endOffset: 3761 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3761 + url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3761 + endOffset: 3708 + transcript: - header: Episode Introduction & Overview - header: 'Guest Introduction: Polina Mosolova — Industrial PhD and Churn Prediction' @@ -38,7 +166,7 @@ transcript: sec: 116 time: '1:56' who: Alexey -- header: 'Career Journey: Industrial PhD to Full‑Stack Data Scientist at SAP' +- header: 'Career Journey: Industrial PhD to Full-Stack Data Scientist at SAP' - line: So let's start. Before we go into our main topic of interpretable/explainable AI and ML, let's start with your background. Can you tell us about your career journey so far? @@ -82,7 +210,7 @@ transcript: sec: 418 time: '6:58' who: Polina -- header: 'Role Evolution: From Full‑Stack Data Scientist to MLOps Specialization' +- header: 'Role Evolution: From Full-Stack Data Scientist to MLOps Specialization' - line: Yeah, it's funny that you mentioned this full-stack data scientist term. When I first gave this talk like two or three years ago, it was a thing because the role of an ML engineer was not yet that developed. It wasn't that common. And @@ -116,7 +244,7 @@ transcript: sec: 499 time: '8:19' who: Polina -- header: 'PhD Practice: Building End‑to‑End ML Pipelines During Doctoral Research' +- header: 'PhD Practice: Building End-to-End ML Pipelines During Doctoral Research' - line: Is it a common situation when a PhD student actually needs to do everything end-to-end? Because I think it is, right? That's kind of the point. Or is there usually help? @@ -293,7 +421,7 @@ transcript: sec: 1126 time: '18:46' who: Alexey -- header: 'Research‑Industry Bridge: Academic Conferences and Summer Schools' +- header: 'Research-Industry Bridge: Academic Conferences and Summer Schools' - line: Day-to-day, I think it was just a data science project. Just the data science work that you can imagine – regular calls with stakeholders. I think that's not that much different from what every data scientist who has business facing roles @@ -782,7 +910,7 @@ transcript: sec: 2841 time: '47:21' who: Polina -- header: 'Model Choices: Glass‑Box Models, Generalized Additive Models, Neural Additive +- header: 'Model Choices: Glass-Box Models, Generalized Additive Models, Neural Additive Models' - line: And then would random forest plus SHAP values be a glass box model or black box? @@ -1072,7 +1200,7 @@ transcript: sec: 3748 time: '1:02:28' who: Polina -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: Okay. I think we should be wrapping up. Thanks a lot, Polina, for joining us today, for sharing your experience with us, for telling us about your experience doing a PhD, and your work. And thanks, everyone, for joining us today too, and @@ -1084,142 +1212,6 @@ transcript: sec: 3782 time: '1:03:02' who: Polina -description: Master churn prediction with explainable AI and MLOps—learn ABI trust, - interpretable feature design, and deploy actionable models to reduce subscription - loss. -intro: How do you turn churn prediction research into models that business teams trust - and can act on? In this episode, Polina Mosolova — a data scientist at SAP who completed - an industrial PhD building end‑to‑end ML pipelines — walks through her applied framework - for churn prediction that integrates explainable AI with organizational trust theory. -

We cover Polina’s journey from full‑stack data scientist to MLOps specialization, - the practical tensions of producing research and production deliverables, and supervision - and stakeholder dynamics for industrial PhDs. The conversation centers on the ABI - framework (Ability, Benevolence, Integrity) and how trust proxies and KPIs make - churn models business‑relevant. Technical topics include interpretability versus - explainability versus actionable ML, model choices (glass‑box models, GAMs, Neural - Additive Models), explainability tools (random forest + SHAP), computer vision activation - maps, and the limits of LLM explainability and hallucinations compared to tabular - models.

Listen to learn concrete guidance for deploying explainable churn - models, translating explanations into interventions, and operationalizing trust - through MLOps and practical metrics — essential for data scientists building production - churn prediction systems. -dateadded: '2023-07-08' -duration: PT01H01M48S -quotableClips: -- name: Episode Introduction & Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=0 - endOffset: 74 -- name: 'Guest Introduction: Polina Mosolova — Industrial PhD and Churn Prediction' - startOffset: 74 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=74 - endOffset: 125 -- name: 'Career Journey: Industrial PhD to Full‑Stack Data Scientist at SAP' - startOffset: 125 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=125 - endOffset: 439 -- name: 'Role Evolution: From Full‑Stack Data Scientist to MLOps Specialization' - startOffset: 439 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=439 - endOffset: 559 -- name: 'PhD Practice: Building End‑to‑End ML Pipelines During Doctoral Research' - startOffset: 559 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=559 - endOffset: 634 -- name: 'Dual Goals: Balancing Academic Research and Production Deliverables' - startOffset: 634 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=634 - endOffset: 753 -- name: 'Dissertation Focus: Churn Prediction Informed by Organizational Trust Theory' - startOffset: 753 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=753 - endOffset: 842 -- name: 'Production Challenges: Deploying Research Models in Industry' - startOffset: 842 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=842 - endOffset: 1077 -- name: 'Supervision & Stakeholders: Academic and Company Support Structures' - startOffset: 1077 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1077 - endOffset: 1145 -- name: 'Research‑Industry Bridge: Academic Conferences and Summer Schools' - startOffset: 1145 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1145 - endOffset: 1237 -- name: 'Time Management: Balancing PhD Writing with Industrial Responsibilities' - startOffset: 1237 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1237 - endOffset: 1478 -- name: 'Finding Industrial PhDs: Prevalence, Companies, and How to Search' - startOffset: 1478 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1478 - endOffset: 1661 -- name: 'Practical Tips: Job Postings, Language Requirements, and Application Search' - startOffset: 1661 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1661 - endOffset: 1792 -- name: 'Organizational Trust Theory: ABI Framework — Ability, Benevolence, Integrity' - startOffset: 1792 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=1792 - endOffset: 2076 -- name: Pricing, Contracts, and Trust Dynamics in Subscription Services - startOffset: 2076 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2076 - endOffset: 2299 -- name: Linking Organizational Trust to Explainable AI and Feature Design - startOffset: 2299 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2299 - endOffset: 2514 -- name: 'Actionability: Turning Explanations into Usable Business Interventions' - startOffset: 2514 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2514 - endOffset: 2643 -- name: 'Definitions: Interpretability vs Explainability vs Actionable ML' - startOffset: 2643 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2643 - endOffset: 2842 -- name: 'Model Choices: Glass‑Box Models, Generalized Additive Models, Neural Additive - Models' - startOffset: 2842 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2842 - endOffset: 2940 -- name: 'Explainability Tools: Random Forest + SHAP — Explainable vs Interpretable' - startOffset: 2940 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=2940 - endOffset: 3047 -- name: 'Computer Vision Explainability: Activation Maps and Human Interpretability' - startOffset: 3047 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3047 - endOffset: 3108 -- name: 'Summary: Interpretable Models, Explainable Outputs, and Actionable Decisions' - startOffset: 3108 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3108 - endOffset: 3159 -- name: 'Audience Matters: Explainable Feature Spaces and Tailoring Explanations' - startOffset: 3159 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3159 - endOffset: 3323 -- name: 'Explainable AI and Trust: User Confidence, Provenance, and Transparency' - startOffset: 3323 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3323 - endOffset: 3463 -- name: 'LLMs and Hallucinations: Explainability Challenges Versus Tabular Models' - startOffset: 3463 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3463 - endOffset: 3498 -- name: 'Measuring Trust: KPIs, Proxies, and Ethical Constraints' - startOffset: 3498 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3498 - endOffset: 3629 -- name: 'Business Relevance: Practical Proxies for Trust and Prioritizing Product - Ability' - startOffset: 3629 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3629 - endOffset: 3761 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3761 - url: https://www.youtube.com/watch?v=EQcY83VA0Us&t=3761 - endOffset: 3708 --- Links: diff --git a/_podcast/s16e02-bridging-data-science-and-healthcare.md b/_podcast/building-healthcare-machine-learning-systems.md similarity index 95% rename from _podcast/s16e02-bridging-data-science-and-healthcare.md rename to _podcast/building-healthcare-machine-learning-systems.md index 300cc2f7..92652f14 100644 --- a/_podcast/s16e02-bridging-data-science-and-healthcare.md +++ b/_podcast/building-healthcare-machine-learning-systems.md @@ -1,19 +1,155 @@ --- +title: "Building Healthcare ML Systems: From Sepsis Prediction to Low-Resource Clinical Deployment" +short: "Bridging Data Science and Healthcare" +season: 16 episode: 2 guests: - elenistamatelou +image: images/podcast/building-healthcare-machine-learning-systems.jpg ids: anchor: datatalksclub/episodes/Bridging-Data-Science-and-Healthcare---Eleni-Stamatelou-e2aegvc youtube: pDOwlulDh0c -image: images/podcast/s16e02-bridging-data-science-and-healthcare.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Bridging-Data-Science-and-Healthcare---Eleni-Stamatelou-e2aegvc apple: https://podcasts.apple.com/us/podcast/bridging-data-science-and-healthcare-eleni-stamatelou/id1541710331?i=1000632040444 spotify: https://open.spotify.com/episode/5W6lfZVhjIKEmVzBuexfzE?si=0nUHr66eQa6oPVJDb3d0rw youtube: https://www.youtube.com/watch?v=pDOwlulDh0c -season: 16 -short: Bridging Data Science and Healthcare -title: 'ML in Healthcare: Low-Resource Monitoring, Sepsis Prediction & Clinical Translation' +description: "Learn to build reliable healthcare ML systems for sepsis prediction and low-resource clinical deployment—improve patient outcomes, scalability, and trust." +topics: +- machine learning +- data science +- MLOps +- data engineering +- data governance +- healthcare +intro: "How do you build machine learning systems that can predict sepsis and actually work in low-resource clinical settings? In this episode Eleni Stamatelou, a machine learning researcher and educator focused on using data science to improve healthcare, walks through the technical and practical steps of turning models into deployed clinical tools. With expertise in signal processing, deep learning, and data-driven design, Eleni frames the core challenges of healthcare ML: data quality and preprocessing, model reliability for sepsis prediction, and the constraints of low-resource deployment.

We cover key topics including designing robust models for noisy clinical signals, evaluation and validation strategies suited to patient safety, and pragmatic considerations for integrating ML into clinical workflows with limited infrastructure. Listeners will gain actionable insights on bridging research and practice—how to prioritize features, manage trade-offs between complexity and reliability, and make deployment decisions that respect resource limitations.

If you work on machine learning in healthcare, clinical AI, or sepsis prediction, this episode provides concrete perspectives on building systems that are both scientifically sound and practically deployable in low-resource environments." +dateadded: 2023-10-23 +duration: PT00H59M01S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=0 + endOffset: 44 +- name: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' + startOffset: 44 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=44 + endOffset: 105 +- name: 'Education & Early Career: University of Patras, Erasmus, VUB/ULB' + startOffset: 105 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=105 + endOffset: 165 +- name: 'Moving to the Netherlands: Philips Healthcare internship and doctorate in + data science' + startOffset: 165 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=165 + endOffset: 283 +- name: 'Philips Healthcare Projects: C-arm imaging and pregnancy monitoring' + startOffset: 283 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=283 + endOffset: 408 +- name: 'Low-Resource Pediatric Monitoring: Vital-sign system design for Malawi' + startOffset: 408 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=408 + endOffset: 454 +- name: 'Data Collection for Clinical Outcomes: linking sensors to lab results' + startOffset: 454 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=454 + endOffset: 574 +- name: 'Motivation for Healthcare: choosing impact over other engineering paths' + startOffset: 574 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=574 + endOffset: 663 +- name: 'Master’s Thesis: white blood cell image classification for a cell sorter + (IMEC)' + startOffset: 663 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=663 + endOffset: 793 +- name: '3D Reconstruction Work: multi-view geometry from C-arm images' + startOffset: 793 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=793 + endOffset: 943 +- name: 'Home Monitoring for Pregnancy: smartwatches, weight tracking, and midwife + dashboards' + startOffset: 943 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=943 + endOffset: 1085 +- name: 'Research Orientation: novelty with clinical translation' + startOffset: 1085 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1085 + endOffset: 1168 +- name: 'Ballistography Signal Research: denoising and U-Net for infant heart rate + estimation' + startOffset: 1168 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1168 + endOffset: 1309 +- name: 'Signal Processing vs Deep Learning: filters, Fourier methods, and when to + use ML' + startOffset: 1309 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1309 + endOffset: 1483 +- name: 'Patient Acuity Scoring: vitals-based scoring poster' + startOffset: 1483 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1483 + endOffset: 1523 +- name: Regulatory & Explainable AI Challenges; annotation scarcity and data gaps + startOffset: 1523 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1523 + endOffset: 1692 +- name: 'Clinical Use Case: sepsis prediction from vitals and clinical data' + startOffset: 1692 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1692 + endOffset: 1870 +- name: 'Clinical Validation & Adoption: engaging clinicians and long approval timelines' + startOffset: 1870 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1870 + endOffset: 2034 +- name: 'Healthcare vs E-commerce Data: offline events, timestamps, and higher risk' + startOffset: 2034 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2034 + endOffset: 2145 +- name: 'Population Differences & Generalization: Europe vs Africa considerations' + startOffset: 2145 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2145 + endOffset: 2377 +- name: 'Automation Impact: job displacement concerns and augmentation potential' + startOffset: 2377 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2377 + endOffset: 2590 +- name: 'Data Infrastructure Variability: digitization, interoperability, and compact + discs' + startOffset: 2590 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2590 + endOffset: 2792 +- name: 'Incremental Adoption Strategy: visualization, feedback loops, and trust building' + startOffset: 2792 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2792 + endOffset: 3050 +- name: 'ML Deployment Constraints: on-device vs cloud for low-resource settings' + startOffset: 3050 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3050 + endOffset: 3165 +- name: 'Sabbatical & Personal Projects: reflection and next steps' + startOffset: 3165 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3165 + endOffset: 3211 +- name: 'Transitioning into Healthcare Data Science: pathways and role types' + startOffset: 3211 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3211 + endOffset: 3346 +- name: 'Skills Transferability: technical skills are sufficient; learn clinical context + on the job' + startOffset: 3346 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3346 + endOffset: 3410 +- name: 'Job Market & Funding: demand for healthcare data scientists and research + funding' + startOffset: 3410 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3410 + endOffset: 3539 +- name: Closing Remarks and Resources (publications, GitHub, LinkedIn) + startOffset: 3539 + url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3539 + endOffset: 3541 transcript: - header: Podcast Introduction - header: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' @@ -114,7 +250,7 @@ transcript: sec: 262 time: '4:22' who: Elena -- header: 'Philips Healthcare Projects: C‑arm imaging and pregnancy monitoring' +- header: 'Philips Healthcare Projects: C-arm imaging and pregnancy monitoring' - line: Nice. Well, I have never heard about Philips Healthcare. What I know about Philips is that they produce lamps. I have a few smart lamps – Philips Hue. I also know that they produce trimmers (for shaving) but that's pretty much the @@ -142,7 +278,7 @@ transcript: sec: 380 time: '6:20' who: Alexey -- header: 'Low‑Resource Pediatric Monitoring: Vital‑sign system design for Malawi' +- header: 'Low-Resource Pediatric Monitoring: Vital-sign system design for Malawi' - line: I actually left my current company in July. I'm actually now on sabbatical. I worked for them for almost two years, focusing more on designing things like, “What is the future of data science in the company and how can we develop data @@ -241,7 +377,7 @@ transcript: sec: 753 time: '12:33' who: Alexey -- header: '3D Reconstruction Work: multi‑view geometry from C‑arm images' +- header: '3D Reconstruction Work: multi-view geometry from C-arm images' - line: Yeah. And there, I worked on a topic that was not really related to data science. They have this C-arm, which is in the shape of a C, like that [Elena shows the shape of the letter C with her hand] and on the top of this arm, there are four @@ -347,7 +483,7 @@ transcript: sec: 1162 time: '19:22' who: Alexey -- header: 'Ballistography Signal Research: denoising and U‑Net for infant heart rate +- header: 'Ballistography Signal Research: denoising and U-Net for infant heart rate estimation' - line: During the period when I was working for this company where we were working on vital sign monitoring systems in Africa, I was working on… It is called ballistography @@ -424,7 +560,7 @@ transcript: sec: 1446 time: '24:06' who: Alexey -- header: 'Patient Acuity Scoring: vitals‑based scoring poster' +- header: 'Patient Acuity Scoring: vitals-based scoring poster' - line: This is my first publication. But then I also published a poster that is not available online. It was about calculating a patient’s score based on the vitals of the patient. The main idea behind this was to have an overall assessment of @@ -568,7 +704,7 @@ transcript: sec: 2018 time: '33:38' who: Alexey -- header: 'Healthcare vs E‑commerce Data: offline events, timestamps, and higher risk' +- header: 'Healthcare vs E-commerce Data: offline events, timestamps, and higher risk' - line: I see that there is a comment from Sylvia. “Thanks, Eleni, for sharing your experience. How advanced and trusted is data science in healthcare compared to other sectors?” For example, I worked in e-commerce, and I think in e-commerce, @@ -828,7 +964,7 @@ transcript: sec: 2996 time: '49:56' who: Alexey -- header: 'ML Deployment Constraints: on‑device vs cloud for low‑resource settings' +- header: 'ML Deployment Constraints: on-device vs cloud for low-resource settings' - line: This is indeed, for the data engineer to just take the model that the data scientists create, and then deploy that on the machine so that it also works and aligns with the rest of the software there. Also, they take into account the restrictions @@ -1002,154 +1138,9 @@ transcript: sec: 3585 time: '59:45' who: Alexey -description: Learn ML tactics for sepsis prediction and low-resource monitoring with - clinical translation tips—deployment, validation, clinician adoption to accelerate - impact. -intro: 'How do you move machine learning from promising models to reliable tools that - work in low-resource hospitals — and what does it take to predict conditions like - sepsis from routinely collected vitals? In this episode, we speak with Eleni Stamatelou, - a machine learning researcher focused on healthcare whose path spans the University - of Patras, Erasmus exchanges, work at VUB/ULB, a Philips Healthcare internship and - a doctorate in data science. Eleni’s work ranges from C‑arm 3D reconstruction and - white blood cell image classification to home pregnancy monitoring and a vital‑sign - system deployed for pediatric care in Malawi.

We dig into practical topics: - designing sensors and linking them to lab outcomes, ballistography signal denoising - and U‑Net heart‑rate estimation, the tradeoffs between signal‑processing and deep - learning approaches, and a sepsis prediction use case built from vitals and clinical - data. We also cover clinical translation challenges — annotation scarcity, explainability, - validation timelines, population generalization, and on‑device versus cloud deployment - constraints. Listen to understand the technical and clinical steps needed to build, - validate and deploy ML in healthcare and how to navigate a career in healthcare - data science.' -dateadded: '2023-10-23' -duration: PT00H59M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=0 - endOffset: 44 -- name: 'Guest Overview: Elena Stamatelou — ML researcher focused on healthcare' - startOffset: 44 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=44 - endOffset: 105 -- name: 'Education & Early Career: University of Patras, Erasmus, VUB/ULB' - startOffset: 105 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=105 - endOffset: 165 -- name: 'Moving to the Netherlands: Philips Healthcare internship and doctorate in - data science' - startOffset: 165 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=165 - endOffset: 283 -- name: 'Philips Healthcare Projects: C‑arm imaging and pregnancy monitoring' - startOffset: 283 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=283 - endOffset: 408 -- name: 'Low‑Resource Pediatric Monitoring: Vital‑sign system design for Malawi' - startOffset: 408 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=408 - endOffset: 454 -- name: 'Data Collection for Clinical Outcomes: linking sensors to lab results' - startOffset: 454 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=454 - endOffset: 574 -- name: 'Motivation for Healthcare: choosing impact over other engineering paths' - startOffset: 574 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=574 - endOffset: 663 -- name: 'Master’s Thesis: white blood cell image classification for a cell sorter - (IMEC)' - startOffset: 663 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=663 - endOffset: 793 -- name: '3D Reconstruction Work: multi‑view geometry from C‑arm images' - startOffset: 793 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=793 - endOffset: 943 -- name: 'Home Monitoring for Pregnancy: smartwatches, weight tracking, and midwife - dashboards' - startOffset: 943 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=943 - endOffset: 1085 -- name: 'Research Orientation: novelty with clinical translation' - startOffset: 1085 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1085 - endOffset: 1168 -- name: 'Ballistography Signal Research: denoising and U‑Net for infant heart rate - estimation' - startOffset: 1168 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1168 - endOffset: 1309 -- name: 'Signal Processing vs Deep Learning: filters, Fourier methods, and when to - use ML' - startOffset: 1309 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1309 - endOffset: 1483 -- name: 'Patient Acuity Scoring: vitals‑based scoring poster' - startOffset: 1483 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1483 - endOffset: 1523 -- name: Regulatory & Explainable AI Challenges; annotation scarcity and data gaps - startOffset: 1523 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1523 - endOffset: 1692 -- name: 'Clinical Use Case: sepsis prediction from vitals and clinical data' - startOffset: 1692 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1692 - endOffset: 1870 -- name: 'Clinical Validation & Adoption: engaging clinicians and long approval timelines' - startOffset: 1870 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=1870 - endOffset: 2034 -- name: 'Healthcare vs E‑commerce Data: offline events, timestamps, and higher risk' - startOffset: 2034 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2034 - endOffset: 2145 -- name: 'Population Differences & Generalization: Europe vs Africa considerations' - startOffset: 2145 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2145 - endOffset: 2377 -- name: 'Automation Impact: job displacement concerns and augmentation potential' - startOffset: 2377 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2377 - endOffset: 2590 -- name: 'Data Infrastructure Variability: digitization, interoperability, and compact - discs' - startOffset: 2590 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2590 - endOffset: 2792 -- name: 'Incremental Adoption Strategy: visualization, feedback loops, and trust building' - startOffset: 2792 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=2792 - endOffset: 3050 -- name: 'ML Deployment Constraints: on‑device vs cloud for low‑resource settings' - startOffset: 3050 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3050 - endOffset: 3165 -- name: 'Sabbatical & Personal Projects: reflection and next steps' - startOffset: 3165 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3165 - endOffset: 3211 -- name: 'Transitioning into Healthcare Data Science: pathways and role types' - startOffset: 3211 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3211 - endOffset: 3346 -- name: 'Skills Transferability: technical skills are sufficient; learn clinical context - on the job' - startOffset: 3346 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3346 - endOffset: 3410 -- name: 'Job Market & Funding: demand for healthcare data scientists and research - funding' - startOffset: 3410 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3410 - endOffset: 3539 -- name: Closing Remarks and Resources (publications, GitHub, LinkedIn) - startOffset: 3539 - url: https://www.youtube.com/watch?v=pDOwlulDh0c&t=3539 - endOffset: 3541 +context: 'Building Healthcare ML Systems: From Sepsis Prediction to Low-Resource Clinical + Deployment' --- - Links: * [LinkedIn](https://www.linkedin.com/in/elenistamatelou/){:target="_blank"} diff --git a/_podcast/s13e01-accelerating-adoption-of-ai-through-diversity.md b/_podcast/building-ml-communities-diversity-and-career-growth.md similarity index 95% rename from _podcast/s13e01-accelerating-adoption-of-ai-through-diversity.md rename to _podcast/building-ml-communities-diversity-and-career-growth.md index 839d80f2..f52a0a70 100644 --- a/_podcast/s13e01-accelerating-adoption-of-ai-through-diversity.md +++ b/_podcast/building-ml-communities-diversity-and-career-growth.md @@ -1,23 +1,142 @@ --- +title: "How to Build & Scale a Data Science Community: Diversity, ML Deployment & Career Growth" +short: "Accelerating the Adoption of AI through Diversity" +season: 13 episode: 1 guests: - daniameira +image: images/podcast/building-ml-communities-diversity-and-career-growth.jpg ids: anchor: Accelerating-the-Adoption-of-AI-through-Diversity---Dnia-Meira-e1v9obp youtube: SRUwwvk_YCk -image: images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Accelerating-the-Adoption-of-AI-through-Diversity---Dnia-Meira-e1v9obp apple: https://podcasts.apple.com/us/podcast/accelerating-the-adoption-of-ai-through-diversity/id1541710331?i=1000601491838 spotify: https://open.spotify.com/episode/6pRkAK9Zo2QrXZCAzh2veV?si=ixEmGK5-RemknBcHrChMNA youtube: https://www.youtube.com/watch?v=SRUwwvk_YCk -season: 13 -short: Accelerating the Adoption of AI through Diversity -title: 'How to Build & Scale a Data Science Community: Diversity, ML Deployment & - Career Growth' + +description: "Discover how to build and scale a data science community, boost diversity, deploy ML, and accelerate career growth with mentoring & hiring strategies" +intro: "How do you build and scale a data science community that actually advances diversity, supports machine learning deployment, and accelerates career growth? In this episode, Dânia Meira — AI Guild co-founder, data scientist, teacher and speaker with a Master’s in Computer Science (AI) — walks through her journey from applied math and marketing analytics to founding a global data science community in Berlin.

We cover practical community building: turning women’s meetups into monthly dinners and an international membership, curating meetup content and the Datalift Summit, and policies like visibility-first speaker invites, codes of conduct, and misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) improves product fit and market reach, how to create psychological safety, and how to source and train diverse talent for regulated industries. She also outlines a vendor-agnostic consulting model for machine learning deployment, community-to-client matching, and scaling from a freelance network to full-time teams." +topics: +- data science +- machine learning +- community building +- diversity +- career growth +dateadded: 2023-02-25 + +duration: PT00H59M44S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=0 + endOffset: 94 +- name: 'Guest Introduction: Dania — AI Guild co-founder, machine learning background' + startOffset: 94 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=94 + endOffset: 152 +- name: 'Early Career: Applied math, Spark vs Hadoop thesis and marketing analytics' + startOffset: 152 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=152 + endOffset: 216 +- name: 'Move to Berlin: Startup roles and building end-to-end data skills' + startOffset: 216 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=216 + endOffset: 319 +- name: 'Role Evolution: Data scientist generalist to specialized career paths' + startOffset: 319 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=319 + endOffset: 392 +- name: 'Teaching & Mentoring: Bootcamps, Data Science for Good, and skills sharing' + startOffset: 392 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=392 + endOffset: 677 +- name: 'Community Origin Story: From women’s meetups to a broader support network' + startOffset: 677 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=677 + endOffset: 921 +- name: 'Community Growth: Monthly dinners, global expansion, and membership scale' + startOffset: 921 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=921 + endOffset: 1005 +- name: 'Datalift Summit Origin: Organizing the first in-person conference post-COVID' + startOffset: 1005 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1005 + endOffset: 1191 +- name: 'Meetup Content Strategy: Curating panels on career options and practical + topics' + startOffset: 1191 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1191 + endOffset: 1395 +- name: 'Diversity in Berlin: Internationality, gender balance, and workplace culture' + startOffset: 1395 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1395 + endOffset: 1579 +- name: 'Broader Diversity Dimensions: Backgrounds, nationality, and neurodiversity' + startOffset: 1579 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1579 + endOffset: 1639 +- name: 'Business Case for Diversity: Inclusive teams, product fit, and market reach' + startOffset: 1639 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1639 + endOffset: 1884 +- name: 'Psychological Safety: Leadership signals and enabling open conversations' + startOffset: 1884 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1884 + endOffset: 2023 +- name: 'Consulting Model: Vendor-agnostic machine learning deployment support' + startOffset: 2023 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2023 + endOffset: 2061 +- name: 'Recruitment & Training: Sourcing diverse talent pools for regulated industries' + startOffset: 2061 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2061 + endOffset: 2376 +- name: 'Visibility Policy: Inviting women speakers first to increase participation' + startOffset: 2376 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2376 + endOffset: 2601 +- name: 'Career Growth Advice: Networks, visibility, and stepping into leadership' + startOffset: 2601 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2601 + endOffset: 2736 +- name: 'Code of Conduct: Crafting practical rules and expected community behavior' + startOffset: 2736 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2736 + endOffset: 2970 +- name: 'Responding to Misconduct: Reporting, case-by-case handling, and consequences' + startOffset: 2970 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2970 + endOffset: 3228 +- name: 'Community-to-Client Matching: Leveraging member expertise for projects' + startOffset: 3228 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3228 + endOffset: 3373 +- name: 'Team Roles: Founders’ split — machine learning delivery and career coaching' + startOffset: 3373 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3373 + endOffset: 3476 +- name: 'Scaling Strategy: Freelance network today, hiring full-time as demand grows' + startOffset: 3476 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3476 + endOffset: 3551 +- name: 'Recommended Resources: Weapons of Math Destruction and Coded Bias' + startOffset: 3551 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3551 + endOffset: 3642 +- name: 'Datalift Summit 2023: Call for speakers, workshops, and production use cases' + startOffset: 3642 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3642 + endOffset: 3678 +- name: Closing Remarks and Sign-off + startOffset: 3678 + url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3678 + endOffset: 3584 + transcript: - header: Podcast Introduction -- header: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' +- header: 'Guest Introduction: Dania — AI Guild co-founder, machine learning background' - line: This week, we'll talk about diversity and leadership in data science and AI. We have a special guest today, Dania. Dania is a co-founder and director at the AI Guild, where she works with companies scaling data analytics and machine learning. @@ -62,7 +181,7 @@ transcript: sec: 214 time: '3:34' who: Alexey -- header: 'Move to Berlin: Startup roles and building end‑to‑end data skills' +- header: 'Move to Berlin: Startup roles and building end-to-end data skills' - line: Yeah. At this point, I was finishing my Master's, and I was thinking about working abroad. It happened that by chance, I got approached via LinkedIn for a job as a data scientist in Berlin. It was a perfect match. They were looking @@ -299,7 +418,7 @@ transcript: sec: 963 time: '16:03' who: Alexey -- header: 'Datalift Summit Origin: Organizing the first in‑person conference post‑COVID' +- header: 'Datalift Summit Origin: Organizing the first in-person conference post-COVID' - line: Yes. There's a lot going on. Yes. The monthly dinners were how we started. We always had the idea to get people together in a bigger group. We have the local dinners, but what about one big event? One where everyone travels to Berlin, which @@ -609,7 +728,7 @@ transcript: sec: 1918 time: '31:58' who: Dania -- header: 'Consulting Model: Vendor‑agnostic machine learning deployment support' +- header: 'Consulting Model: Vendor-agnostic machine learning deployment support' - line: You told us the story of how the AI Guild started. You had these meetups, these sessions, where you wanted to connect with other women in the field, and then eventually it grew to dinners and then the Guild itself. This was probably @@ -873,7 +992,7 @@ transcript: sec: 2930 time: '48:50' who: Alexey -- header: 'Responding to Misconduct: Reporting, case‑by‑case handling, and consequences' +- header: 'Responding to Misconduct: Reporting, case-by-case handling, and consequences' - line: This is a big challenge. We were talking about this diversity aspect in terms of culture or nationality. In different countries, you have different behavior that is accepted or not accepted. That's what I mean with the hardline. Some things @@ -966,7 +1085,7 @@ transcript: sec: 3210 time: '53:30' who: Dania -- header: 'Community‑to‑Client Matching: Leveraging member expertise for projects' +- header: 'Community-to-Client Matching: Leveraging member expertise for projects' - line: I wanted to talk a bit more about the Guild. Right now, you're a for-profit organization and you offer consulting. So how does it work? Companies approach you saying, “Hey, we want to deploy some models.” Help us.” Something like that? @@ -1054,7 +1173,7 @@ transcript: sec: 3430 time: '57:10' who: Dania -- header: 'Scaling Strategy: Freelance network today, hiring full‑time as demand grows' +- header: 'Scaling Strategy: Freelance network today, hiring full-time as demand grows' - line: I see an interesting question from Azif. “What if you have too many customers and cannot cope with the numbers because there are just two of you?” How do you do this? Do you start finding somebody in the community to delegate work to? @@ -1163,138 +1282,12 @@ transcript: sec: 3677 time: '1:01:17' who: Dania -- header: Closing Remarks and Sign‑off +- header: Closing Remarks and Sign-off - line: '[laughs] Okay. Thanks a lot. Thanks, everyone, for joining us today. Today is Friday, so everyone – have a great weekend.' sec: 3678 time: '1:01:18' who: Alexey -description: Discover how to build and scale a data science community, boost diversity, - deploy ML, and accelerate career growth with mentoring & hiring strategies. -intro: 'How do you build and scale a data science community that actually advances - diversity, supports machine learning deployment, and accelerates career growth? - In this episode, Dânia Meira — AI Guild co‑founder, data scientist, teacher and speaker - with a Master’s in Computer Science (AI) — walks through her journey from applied - math and marketing analytics to founding a global data science community in Berlin. -

We cover practical community building: turning women’s meetups into monthly - dinners and an international membership, curating meetup content and the Datalift - Summit, and policies like visibility-first speaker invites, codes of conduct, and - misconduct response. Dânia explains why diversity (gender, nationality, neurodiversity) - improves product fit and market reach, how to create psychological safety, and how - to source and train diverse talent for regulated industries. She also outlines a - vendor‑agnostic consulting model for machine learning deployment, community‑to‑client - matching, and scaling from a freelance network to full‑time teams. Recommended readings - include Weapons of Math Destruction and Coded Bias.

Listen to gain actionable - tactics for community building, inclusive leadership, ML deployment strategies, - and career growth pathways for data scientists and AI practitioners.' -dateadded: '2023-02-25' -duration: PT00H59M44S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=0 - endOffset: 94 -- name: 'Guest Introduction: Dania — AI Guild co‑founder, machine learning background' - startOffset: 94 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=94 - endOffset: 152 -- name: 'Early Career: Applied math, Spark vs Hadoop thesis and marketing analytics' - startOffset: 152 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=152 - endOffset: 216 -- name: 'Move to Berlin: Startup roles and building end‑to‑end data skills' - startOffset: 216 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=216 - endOffset: 319 -- name: 'Role Evolution: Data scientist generalist to specialized career paths' - startOffset: 319 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=319 - endOffset: 392 -- name: 'Teaching & Mentoring: Bootcamps, Data Science for Good, and skills sharing' - startOffset: 392 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=392 - endOffset: 677 -- name: 'Community Origin Story: From women’s meetups to a broader support network' - startOffset: 677 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=677 - endOffset: 921 -- name: 'Community Growth: Monthly dinners, global expansion, and membership scale' - startOffset: 921 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=921 - endOffset: 1005 -- name: 'Datalift Summit Origin: Organizing the first in‑person conference post‑COVID' - startOffset: 1005 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1005 - endOffset: 1191 -- name: 'Meetup Content Strategy: Curating panels on career options and practical - topics' - startOffset: 1191 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1191 - endOffset: 1395 -- name: 'Diversity in Berlin: Internationality, gender balance, and workplace culture' - startOffset: 1395 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1395 - endOffset: 1579 -- name: 'Broader Diversity Dimensions: Backgrounds, nationality, and neurodiversity' - startOffset: 1579 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1579 - endOffset: 1639 -- name: 'Business Case for Diversity: Inclusive teams, product fit, and market reach' - startOffset: 1639 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1639 - endOffset: 1884 -- name: 'Psychological Safety: Leadership signals and enabling open conversations' - startOffset: 1884 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=1884 - endOffset: 2023 -- name: 'Consulting Model: Vendor‑agnostic machine learning deployment support' - startOffset: 2023 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2023 - endOffset: 2061 -- name: 'Recruitment & Training: Sourcing diverse talent pools for regulated industries' - startOffset: 2061 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2061 - endOffset: 2376 -- name: 'Visibility Policy: Inviting women speakers first to increase participation' - startOffset: 2376 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2376 - endOffset: 2601 -- name: 'Career Growth Advice: Networks, visibility, and stepping into leadership' - startOffset: 2601 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2601 - endOffset: 2736 -- name: 'Code of Conduct: Crafting practical rules and expected community behavior' - startOffset: 2736 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2736 - endOffset: 2970 -- name: 'Responding to Misconduct: Reporting, case‑by‑case handling, and consequences' - startOffset: 2970 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=2970 - endOffset: 3228 -- name: 'Community‑to‑Client Matching: Leveraging member expertise for projects' - startOffset: 3228 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3228 - endOffset: 3373 -- name: 'Team Roles: Founders’ split — machine learning delivery and career coaching' - startOffset: 3373 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3373 - endOffset: 3476 -- name: 'Scaling Strategy: Freelance network today, hiring full‑time as demand grows' - startOffset: 3476 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3476 - endOffset: 3551 -- name: 'Recommended Resources: Weapons of Math Destruction and Coded Bias' - startOffset: 3551 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3551 - endOffset: 3642 -- name: 'Datalift Summit 2023: Call for speakers, workshops, and production use cases' - startOffset: 3642 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3642 - endOffset: 3678 -- name: Closing Remarks and Sign‑off - startOffset: 3678 - url: https://www.youtube.com/watch?v=SRUwwvk_YCk&t=3678 - endOffset: 3584 --- Links: diff --git a/_podcast/s04e04-ml-startup.md b/_podcast/building-mlops-startup.md similarity index 97% rename from _podcast/s04e04-ml-startup.md rename to _podcast/building-mlops-startup.md index d5ea9c3c..310ebbd4 100644 --- a/_podcast/s04e04-ml-startup.md +++ b/_podcast/building-mlops-startup.md @@ -1,12 +1,11 @@ --- -title: 'How to Build a Successful ML Startup: MLOps, Model Monitoring, Open Source - & Founder Fit' -short: I Want to Build a Machine Learning Startup! -guests: -- elenasamuylova -image: images/podcast/s04e04-ml-startup.jpg +title: "How to Build a Successful ML Startup: MLOps, Model Monitoring, Open Source & Founder Fit" +short: "I Want to Build a Machine Learning Startup!" season: 4 episode: 4 +guests: +- elenasamuylova +image: images/podcast/building-mlops-startup.jpg ids: youtube: DiDs5aMjEWg anchor: I-Want-to-Build-a-Machine-Learning-Startup----Elena-Samuylova-e139ste @@ -15,6 +14,150 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/I-Want-to-Build-a-Machine-Learning-Startup----Elena-Samuylova-e139ste spotify: https://open.spotify.com/episode/7fwbqo5tDrtakuqWaIuEjc apple: https://podcasts.apple.com/us/podcast/i-want-to-build-a-machine-learning-startup-elena-samuylova/id1541710331?i=1000529106923 + +description: "Discover practical MLOps, model monitoring and founder-fit tactics to build an ML startup: hire, fund, productize, and reach product-market fit faster." +intro: "What does it take to build a successful ML startup—especially around MLOps, model monitoring, open source, and founder fit? Elena Samuylova, Co-founder & CEO of Evidently AI, joins to answer that question drawing on her applied machine learning experience since 2014, including roles at Yandex Data Factory and an industrial AI startup.

This episode walks through practical founder decisions: sourcing problem-first ideas, finding compatible co-founders and establishing pre-launch alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena explains what “AI-first” positioning really means, how developer tools and open source shape go-to-market strategies (open core, cloud, monetization and cloning risks), and how Evidently validated model monitoring as a business. You’ll hear tactical guidance on customer discovery, persuading engineers to adopt your tool, data safety and on-prem deployments, hiring and scaling tradeoffs, funding paths, productizing services for non-technical founders, and normalizing failure and work–life tradeoffs.

Listen to gain actionable frameworks for building an ML startup—covering model monitoring, MLOps, open source strategy, founder-market fit, and the concrete signals that indicate product–market fit." +topics: +- startup +- machine learning +- MLOps +- open-source +- entrepreneurship +- founder +dateadded: 2021-07-16 + +duration: PT00H58M30S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=0 + endOffset: 121 +- name: 'Guest Background: Elena Samuylova’s ML & Startup Journey' + startOffset: 121 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=121 + endOffset: 202 +- name: 'Career Highlights: Yandex, Data Factory, and Industrial AI' + startOffset: 202 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=202 + endOffset: 318 +- name: 'Motivations: Startup vs. Employee Trade-offs' + startOffset: 318 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=318 + endOffset: 443 +- name: 'Sourcing Ideas: Problem-First Approach for ML Startups' + startOffset: 443 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=443 + endOffset: 704 +- name: 'Co-founder Search: Compatibility, Founder–Market Fit, and Finding Partners' + startOffset: 704 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=704 + endOffset: 1015 +- name: 'Pre-Launch Alignment: Commitment, Company Type, and Fundraising Path' + startOffset: 1015 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1015 + endOffset: 1294 +- name: 'Market Choice: Vertical Solutions vs. Infrastructure & MLOps' + startOffset: 1294 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1294 + endOffset: 1390 +- name: 'AI-First Positioning: What It Really Means' + startOffset: 1390 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1390 + endOffset: 1473 +- name: 'Developer Tools Market: Selling to Engineers and Open Source Dynamics' + startOffset: 1473 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1473 + endOffset: 1581 +- name: 'Founder Skills: Self-Starter Mindset and Learning Agility' + startOffset: 1581 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1581 + endOffset: 1697 +- name: 'Startup Risks: Financial, Cultural, and Career Considerations' + startOffset: 1697 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1697 + endOffset: 1910 +- name: 'Failure Preparedness: Normalizing Risk and Learning from Failure' + startOffset: 1910 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1910 + endOffset: 1967 +- name: 'Work–Life Tradeoffs: Time Commitment in Early Stages' + startOffset: 1967 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1967 + endOffset: 2046 +- name: 'Part-Time Startups: Weekend MVPs, Bootstrapping, and Grants' + startOffset: 2046 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2046 + endOffset: 2147 +- name: 'Funding Models: Accelerators, Angels, and Equity Considerations' + startOffset: 2147 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2147 + endOffset: 2288 +- name: 'Non-Technical Founders: No-Code MVPs and Productizing Services' + startOffset: 2288 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2288 + endOffset: 2365 +- name: 'Productizing Services: From Manual Delivery to Scalable SaaS' + startOffset: 2365 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2365 + endOffset: 2413 +- name: 'Hiring Expertise: When to Bring in Domain or Technical Help' + startOffset: 2413 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2413 + endOffset: 2535 +- name: 'Customer Discovery: Interview Counts and Signals for Product–Market Fit' + startOffset: 2535 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2535 + endOffset: 2639 +- name: 'Evidently Origin: Validating Model Monitoring as a Business' + startOffset: 2639 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2639 + endOffset: 2792 +- name: 'Founder Role at Evidently: CEO Tasks, Content, and Community' + startOffset: 2792 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2792 + endOffset: 2891 +- name: 'Open Source Strategy: Open Core, Cloud, and Monetization Paths' + startOffset: 2891 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2891 + endOffset: 2969 +- name: 'Open Source Risks: Cloning, Cloud Providers, and Licensing' + startOffset: 2969 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2969 + endOffset: 3108 +- name: 'Bottom-Up Adoption: Engineers First, Enterprise Later' + startOffset: 3108 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3108 + endOffset: 3189 +- name: 'Demonstrating Value: Persuading Clients to Share Data' + startOffset: 3189 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3189 + endOffset: 3237 +- name: 'Geographic Differences: Market Dynamics and Data Attitudes' + startOffset: 3237 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3237 + endOffset: 3377 +- name: 'Data Safety Options: On-Premise Deployments with Open Source' + startOffset: 3377 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3377 + endOffset: 3426 +- name: 'Scaling Teams: When to Hire Engineers vs. Stay Small' + startOffset: 3426 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3426 + endOffset: 3494 +- name: 'Market Intelligence: Following Startups, Investors, and Trends' + startOffset: 3494 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3494 + endOffset: 3572 +- name: 'Final Advice: Build from Genuine Interest, Not Just Hype' + startOffset: 3572 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3572 + endOffset: 3611 +- name: 'Contact & Resources: Evidently, LinkedIn, and Twitter' + startOffset: 3611 + url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3611 + endOffset: 3510 + transcript: - header: Podcast Introduction - header: 'Guest Background: Elena Samuylova’s ML & Startup Journey' @@ -580,7 +723,7 @@ transcript: sec: 1571 time: '26:11' who: Elena -- header: 'Founder Skills: Self‑Starter Mindset and Learning Agility' +- header: 'Founder Skills: Self-Starter Mindset and Learning Agility' - line: What kind of skills do I need to start a startup? sec: 1581 time: '26:21' @@ -750,7 +893,7 @@ transcript: sec: 2008 time: '33:28' who: Elena -- header: 'Part‑Time Startups: Weekend MVPs, Bootstrapping, and Grants' +- header: 'Part-Time Startups: Weekend MVPs, Bootstrapping, and Grants' - line: We already started getting questions. You already mentioned that when you start a startup, you don't necessarily have to bootstrap yourself – meaning that you don’t have to live off of your savings. You can do a startup while still working. @@ -839,7 +982,7 @@ transcript: sec: 2276 time: '37:56' who: Elena -- header: 'Non‑Technical Founders: No-Code MVPs and Productizing Services' +- header: 'Non-Technical Founders: No-Code MVPs and Productizing Services' - line: We talked about a situation where “I'm a technical person, and I want to start a startup, this is what I do.” What if I'm not super-technical? I also don't have any resources to develop something, what do I do? @@ -1169,7 +1312,7 @@ transcript: sec: 3082 time: '51:22' who: Elena -- header: 'Bottom‑Up Adoption: Engineers First, Enterprise Later' +- header: 'Bottom-Up Adoption: Engineers First, Enterprise Later' - line: I guess in case of open source, what can happen is that the engineers and data scientists find your library, start using it, and then it reaches the management. The management sees it and then you sell the company your enterprise offer, right? @@ -1262,7 +1405,7 @@ transcript: sec: 3334 time: '55:34' who: Elena -- header: 'Data Safety Options: On‑Premise Deployments with Open Source' +- header: 'Data Safety Options: On-Premise Deployments with Open Source' - line: I think when Emely did a presentation a while ago at DataTalks.Club, one of the questions was “Hey, I'm a bit concerned about my data going to Russia.” What you answered was “Hey, it's open source. You don't have to be concerned. You just @@ -1366,155 +1509,6 @@ transcript: sec: 3631 time: '1:00:31' who: Elena -description: 'Discover practical MLOps, model monitoring and founder‑fit tactics to - build an ML startup: hire, fund, productize, and reach product‑market fit faster.' -intro: 'What does it take to build a successful ML startup—especially around MLOps, - model monitoring, open source, and founder fit? Elena Samuylova, Co‑founder & CEO - of Evidently AI, joins to answer that question drawing on her applied machine learning - experience since 2014, including roles at Yandex Data Factory and an industrial - AI startup.

This episode walks through practical founder decisions: sourcing - problem‑first ideas, finding compatible co‑founders and establishing pre‑launch - alignment, and choosing between vertical solutions and infrastructure/MLOps. Elena - explains what “AI‑first” positioning really means, how developer tools and open - source shape go‑to‑market strategies (open core, cloud, monetization and cloning - risks), and how Evidently validated model monitoring as a business. You’ll hear - tactical guidance on customer discovery, persuading engineers to adopt your tool, - data safety and on‑prem deployments, hiring and scaling tradeoffs, funding paths, - productizing services for non‑technical founders, and normalizing failure and work–life - tradeoffs.

Listen to gain actionable frameworks for building an ML startup—covering - model monitoring, MLOps, open source strategy, founder‑market fit, and the concrete - signals that indicate product–market fit.' -dateadded: '2021-07-16' -duration: PT00H58M30S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=0 - endOffset: 121 -- name: 'Guest Background: Elena Samuylova’s ML & Startup Journey' - startOffset: 121 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=121 - endOffset: 202 -- name: 'Career Highlights: Yandex, Data Factory, and Industrial AI' - startOffset: 202 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=202 - endOffset: 318 -- name: 'Motivations: Startup vs. Employee Trade-offs' - startOffset: 318 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=318 - endOffset: 443 -- name: 'Sourcing Ideas: Problem-First Approach for ML Startups' - startOffset: 443 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=443 - endOffset: 704 -- name: 'Co-founder Search: Compatibility, Founder–Market Fit, and Finding Partners' - startOffset: 704 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=704 - endOffset: 1015 -- name: 'Pre-Launch Alignment: Commitment, Company Type, and Fundraising Path' - startOffset: 1015 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1015 - endOffset: 1294 -- name: 'Market Choice: Vertical Solutions vs. Infrastructure & MLOps' - startOffset: 1294 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1294 - endOffset: 1390 -- name: 'AI-First Positioning: What It Really Means' - startOffset: 1390 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1390 - endOffset: 1473 -- name: 'Developer Tools Market: Selling to Engineers and Open Source Dynamics' - startOffset: 1473 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1473 - endOffset: 1581 -- name: 'Founder Skills: Self‑Starter Mindset and Learning Agility' - startOffset: 1581 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1581 - endOffset: 1697 -- name: 'Startup Risks: Financial, Cultural, and Career Considerations' - startOffset: 1697 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1697 - endOffset: 1910 -- name: 'Failure Preparedness: Normalizing Risk and Learning from Failure' - startOffset: 1910 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1910 - endOffset: 1967 -- name: 'Work–Life Tradeoffs: Time Commitment in Early Stages' - startOffset: 1967 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=1967 - endOffset: 2046 -- name: 'Part‑Time Startups: Weekend MVPs, Bootstrapping, and Grants' - startOffset: 2046 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2046 - endOffset: 2147 -- name: 'Funding Models: Accelerators, Angels, and Equity Considerations' - startOffset: 2147 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2147 - endOffset: 2288 -- name: 'Non‑Technical Founders: No-Code MVPs and Productizing Services' - startOffset: 2288 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2288 - endOffset: 2365 -- name: 'Productizing Services: From Manual Delivery to Scalable SaaS' - startOffset: 2365 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2365 - endOffset: 2413 -- name: 'Hiring Expertise: When to Bring in Domain or Technical Help' - startOffset: 2413 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2413 - endOffset: 2535 -- name: 'Customer Discovery: Interview Counts and Signals for Product–Market Fit' - startOffset: 2535 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2535 - endOffset: 2639 -- name: 'Evidently Origin: Validating Model Monitoring as a Business' - startOffset: 2639 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2639 - endOffset: 2792 -- name: 'Founder Role at Evidently: CEO Tasks, Content, and Community' - startOffset: 2792 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2792 - endOffset: 2891 -- name: 'Open Source Strategy: Open Core, Cloud, and Monetization Paths' - startOffset: 2891 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2891 - endOffset: 2969 -- name: 'Open Source Risks: Cloning, Cloud Providers, and Licensing' - startOffset: 2969 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=2969 - endOffset: 3108 -- name: 'Bottom‑Up Adoption: Engineers First, Enterprise Later' - startOffset: 3108 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3108 - endOffset: 3189 -- name: 'Demonstrating Value: Persuading Clients to Share Data' - startOffset: 3189 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3189 - endOffset: 3237 -- name: 'Geographic Differences: Market Dynamics and Data Attitudes' - startOffset: 3237 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3237 - endOffset: 3377 -- name: 'Data Safety Options: On‑Premise Deployments with Open Source' - startOffset: 3377 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3377 - endOffset: 3426 -- name: 'Scaling Teams: When to Hire Engineers vs. Stay Small' - startOffset: 3426 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3426 - endOffset: 3494 -- name: 'Market Intelligence: Following Startups, Investors, and Trends' - startOffset: 3494 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3494 - endOffset: 3572 -- name: 'Final Advice: Build from Genuine Interest, Not Just Hype' - startOffset: 3572 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3572 - endOffset: 3611 -- name: 'Contact & Resources: Evidently, LinkedIn, and Twitter' - startOffset: 3611 - url: https://www.youtube.com/watch?v=DiDs5aMjEWg&t=3611 - endOffset: 3510 --- diff --git a/_podcast/s11e04-large-scale-entity-resolution.md b/_podcast/building-open-source-data-product-for-identity-resolution.md similarity index 97% rename from _podcast/s11e04-large-scale-entity-resolution.md rename to _podcast/building-open-source-data-product-for-identity-resolution.md index 4713f36e..90878932 100644 --- a/_podcast/s11e04-large-scale-entity-resolution.md +++ b/_podcast/building-open-source-data-product-for-identity-resolution.md @@ -1,20 +1,170 @@ --- +title: "Building an Open-Source ML-Powered Identity Resolution Tool in the Modern Data Stack" +short: "Building an Open-Source ML-Powered Identity Resolution Tool" +season: 11 episode: 4 guests: - sonalgoyal +image: images/podcast/building-open-source-data-product-for-identity-resolution.jpg ids: anchor: Large-Scale-Entity-Resolution---Sonal-Goyal-e1pibrh youtube: lpjffCOPxlY -image: images/podcast/s11e04-large-scale-entity-resolution.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Large-Scale-Entity-Resolution---Sonal-Goyal-e1pibrh apple: https://podcasts.apple.com/us/podcast/large-scale-entity-resolution-sonal-goyal/id1541710331?i=1000584270745 spotify: https://open.spotify.com/episode/54DufG1ZVj0GMSoWTbJsen?si=d7XNSW2_Tfa4qKJxmFQpIA youtube: https://www.youtube.com/watch?v=lpjffCOPxlY -season: 11 -short: Large-Scale Entity Resolution -title: Eliminate Duplicate Records with ML-Powered Identity Resolution — Snowflake-native - & Open Source + +description: "Discover how to build an open-source, ML-powered identity resolution tool. Learn about the practical challenges across industries." +topics: +- machine learning +- MLOps +- data engineering +- open-source +- product management +intro: "How do you build an open-source, ML-powered identity resolution tool that becomes the single source of truth in a modern data stack? In this episode Sonal Goyal—founder of Zingg and a 23-year data product veteran—walks through the practical challenges of identity resolution and entity resolution across industries like investment banking, telecom, gaming, and insurance. Sonal explains why ML-powered approaches matter, how an open-source framework like Zingg can fit into your modern data stack, and what it takes to reconcile customer and supplier records into a reliable single source of truth.

Expect discussion of architecture and integration trade-offs, the role of machine learning in matching and deduplication, and lessons from building production data products at scale. If you manage customer data, data integration, or are evaluating open-source identity resolution solutions, this episode offers concrete insights and pointers—including Zingg’s open-source repository—to help you evaluate adoption, reduce duplicate records, and improve downstream analytics and personalization" +dateadded: 2022-10-29 + +duration: PT01H23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=0 + endOffset: 71 +- name: 'Guest Overview: Sonal Goyal and Zingg identity resolution' + startOffset: 71 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=71 + endOffset: 126 +- name: 'Career Overview: 24 years in tech, data consulting background' + startOffset: 126 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=126 + endOffset: 178 +- name: 'Origin Story: Consulting projects reveal recurring identity gaps' + startOffset: 178 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=178 + endOffset: 291 +- name: 'Modern Data Stack: Centralized data exposing identity challenges' + startOffset: 291 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=291 + endOffset: 343 +- name: 'Product Overview: Zingg — ML-powered identity resolution' + startOffset: 343 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=343 + endOffset: 434 +- name: 'Terminology: Entity resolution vs identity resolution' + startOffset: 434 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=434 + endOffset: 472 +- name: 'Duplicate Detection vs Deduplication: Outcomes and use cases' + startOffset: 472 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=472 + endOffset: 548 +- name: 'Motivation: Recurring duplicate problems across domains' + startOffset: 548 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=548 + endOffset: 669 +- name: 'Solution Generality: Customers, products, patients and suppliers' + startOffset: 669 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=669 + endOffset: 818 +- name: 'Related Terms: Record linkage, entity matching, entity disambiguation' + startOffset: 818 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=818 + endOffset: 842 +- name: 'Core Approach: ML training, blocking, indexing for scale' + startOffset: 842 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=842 + endOffset: 1093 +- name: 'Implementation: Spark distribution, Snowflake-native & Python API' + startOffset: 1093 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1093 + endOffset: 1241 +- name: 'Interfaces & Integrations: CLI, Python SDK, Databricks, dbt, UI plans' + startOffset: 1241 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1241 + endOffset: 1311 +- name: 'Founder Transition: From consultancy to full-time product build' + startOffset: 1311 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1311 + endOffset: 1380 +- name: 'Development Timeline: Proof-of-concept to public release (~18 months)' + startOffset: 1380 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1380 + endOffset: 1454 +- name: 'Open Source Strategy: Community, adoption, and business rationale' + startOffset: 1454 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1454 + endOffset: 1620 +- name: 'Licensing Choice: AGPL to prevent SaaS rehosting and protect IP' + startOffset: 1620 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1620 + endOffset: 1870 +- name: 'Open Source Trade-offs: IP concerns vs discoverability and growth' + startOffset: 1870 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1870 + endOffset: 1920 +- name: 'Team Evolution: Solo founder, consultants, and initial hires' + startOffset: 1920 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1920 + endOffset: 1979 +- name: 'Founder Role: Product, ecosystem integrations, community and hiring' + startOffset: 1979 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1979 + endOffset: 2114 +- name: 'Team & Hiring: First developer hire and fully remote setup' + startOffset: 2114 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2114 + endOffset: 2241 +- name: 'Scaling Challenge: Recruiting the right engineering talent' + startOffset: 2241 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2241 + endOffset: 2323 +- name: 'Prevention Limits: Data governance won’t fully eliminate identity issues' + startOffset: 2323 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2323 + endOffset: 2436 +- name: 'Beyond Joins: When fuzzy joins and basic ETL aren’t enough' + startOffset: 2436 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2436 + endOffset: 2665 +- name: 'Deterministic Rules vs Probabilistic ML: Trade-offs for accuracy' + startOffset: 2665 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2665 + endOffset: 2750 +- name: 'Fraud Use Cases: Identity resolution for AML and fraud detection' + startOffset: 2750 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2750 + endOffset: 2963 +- name: 'Graph + ML: Pairwise matching, graph clustering and downstream use' + startOffset: 2963 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2963 + endOffset: 3020 +- name: 'Data Mapping: Need to specify field correspondences for matching' + startOffset: 3020 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3020 + endOffset: 3099 +- name: 'Impact Case Studies: Public-data donors, e-commerce and classifieds' + startOffset: 3099 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3099 + endOffset: 3251 +- name: 'Retrospective: Seeking cofounder earlier and open-sourcing sooner' + startOffset: 3251 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3251 + endOffset: 3367 +- name: 'Founder Advice: Validate use cases, distribution channels, and conviction' + startOffset: 3367 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3367 + endOffset: 3566 +- name: 'Recommended Reading: Creative Selection on product design' + startOffset: 3566 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3566 + endOffset: 3638 +- name: 'Closing Remarks: Follow-ups, demos and contact options' + startOffset: 3638 + url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3638 + endOffset: 3623 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Sonal Goyal and Zingg identity resolution' @@ -948,7 +1098,7 @@ transcript: sec: 3098 time: '51:38' who: Sonal -- header: 'Impact Case Studies: Public-data donors, e‑commerce and classifieds' +- header: 'Impact Case Studies: Public-data donors, e-commerce and classifieds' - line: Okay. Another interesting question is about some success stories of implementing identity resolution in products. Maybe I can start with fraud detection. We didn't use Zingg for that at OLX, but there is a nice article at OLX’s tech blog (tech.OLX.com) @@ -1117,162 +1267,6 @@ transcript: sec: 3694 time: '1:01:34' who: Sonal -description: Discover ML-powered identity resolution to remove duplicate records, - Snowflake-native, open-source deduplication for scalable fraud detection and integrations. -intro: 'How do you eliminate duplicate records across modern data stacks without breaking - pipelines or overfitting rules? In this episode, Sonal Goyal, founder of Zingg and - a 24‑year veteran in data consulting, walks through ML‑powered identity resolution - and entity resolution approaches to create a single source of truth for customers, - suppliers, products and patients.

We cover core distinctions—entity vs - identity resolution, duplicate detection vs deduplication—and practical architecture: - ML training, blocking and indexing for scale, Spark distribution, Snowflake‑native - deployment and a Python API. Sonal explains integrations (CLI, Python SDK, Databricks, - dbt), graph + ML workflows, data mapping needs, and real-world impact examples from - public‑data donors, e‑commerce and classifieds. She also discusses open source strategy - and licensing (AGPL), trade‑offs between deterministic rules and probabilistic ML, - and fraud/AML use cases.

If you’re wrestling with record linkage, entity - matching, or persistent duplicate records in a centralized data stack, this episode - offers concrete implementation patterns, scaling considerations, and open‑source - tradeoffs to help you choose the right identity resolution path.' -dateadded: '2022-10-29' -duration: PT01H23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=0 - endOffset: 71 -- name: 'Guest Overview: Sonal Goyal and Zingg identity resolution' - startOffset: 71 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=71 - endOffset: 126 -- name: 'Career Overview: 24 years in tech, data consulting background' - startOffset: 126 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=126 - endOffset: 178 -- name: 'Origin Story: Consulting projects reveal recurring identity gaps' - startOffset: 178 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=178 - endOffset: 291 -- name: 'Modern Data Stack: Centralized data exposing identity challenges' - startOffset: 291 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=291 - endOffset: 343 -- name: 'Product Overview: Zingg — ML-powered identity resolution' - startOffset: 343 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=343 - endOffset: 434 -- name: 'Terminology: Entity resolution vs identity resolution' - startOffset: 434 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=434 - endOffset: 472 -- name: 'Duplicate Detection vs Deduplication: Outcomes and use cases' - startOffset: 472 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=472 - endOffset: 548 -- name: 'Motivation: Recurring duplicate problems across domains' - startOffset: 548 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=548 - endOffset: 669 -- name: 'Solution Generality: Customers, products, patients and suppliers' - startOffset: 669 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=669 - endOffset: 818 -- name: 'Related Terms: Record linkage, entity matching, entity disambiguation' - startOffset: 818 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=818 - endOffset: 842 -- name: 'Core Approach: ML training, blocking, indexing for scale' - startOffset: 842 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=842 - endOffset: 1093 -- name: 'Implementation: Spark distribution, Snowflake-native & Python API' - startOffset: 1093 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1093 - endOffset: 1241 -- name: 'Interfaces & Integrations: CLI, Python SDK, Databricks, dbt, UI plans' - startOffset: 1241 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1241 - endOffset: 1311 -- name: 'Founder Transition: From consultancy to full-time product build' - startOffset: 1311 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1311 - endOffset: 1380 -- name: 'Development Timeline: Proof-of-concept to public release (~18 months)' - startOffset: 1380 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1380 - endOffset: 1454 -- name: 'Open Source Strategy: Community, adoption, and business rationale' - startOffset: 1454 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1454 - endOffset: 1620 -- name: 'Licensing Choice: AGPL to prevent SaaS rehosting and protect IP' - startOffset: 1620 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1620 - endOffset: 1870 -- name: 'Open Source Trade-offs: IP concerns vs discoverability and growth' - startOffset: 1870 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1870 - endOffset: 1920 -- name: 'Team Evolution: Solo founder, consultants, and initial hires' - startOffset: 1920 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1920 - endOffset: 1979 -- name: 'Founder Role: Product, ecosystem integrations, community and hiring' - startOffset: 1979 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=1979 - endOffset: 2114 -- name: 'Team & Hiring: First developer hire and fully remote setup' - startOffset: 2114 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2114 - endOffset: 2241 -- name: 'Scaling Challenge: Recruiting the right engineering talent' - startOffset: 2241 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2241 - endOffset: 2323 -- name: 'Prevention Limits: Data governance won’t fully eliminate identity issues' - startOffset: 2323 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2323 - endOffset: 2436 -- name: 'Beyond Joins: When fuzzy joins and basic ETL aren’t enough' - startOffset: 2436 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2436 - endOffset: 2665 -- name: 'Deterministic Rules vs Probabilistic ML: Trade-offs for accuracy' - startOffset: 2665 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2665 - endOffset: 2750 -- name: 'Fraud Use Cases: Identity resolution for AML and fraud detection' - startOffset: 2750 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2750 - endOffset: 2963 -- name: 'Graph + ML: Pairwise matching, graph clustering and downstream use' - startOffset: 2963 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=2963 - endOffset: 3020 -- name: 'Data Mapping: Need to specify field correspondences for matching' - startOffset: 3020 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3020 - endOffset: 3099 -- name: 'Impact Case Studies: Public-data donors, e‑commerce and classifieds' - startOffset: 3099 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3099 - endOffset: 3251 -- name: 'Retrospective: Seeking cofounder earlier and open-sourcing sooner' - startOffset: 3251 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3251 - endOffset: 3367 -- name: 'Founder Advice: Validate use cases, distribution channels, and conviction' - startOffset: 3367 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3367 - endOffset: 3566 -- name: 'Recommended Reading: Creative Selection on product design' - startOffset: 3566 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3566 - endOffset: 3638 -- name: 'Closing Remarks: Follow-ups, demos and contact options' - startOffset: 3638 - url: https://www.youtube.com/watch?v=lpjffCOPxlY&t=3638 - endOffset: 3623 --- Links: diff --git a/_podcast/s13e09-building-open-source-nlp-tool.md b/_podcast/building-open-source-nlp-tool.md similarity index 97% rename from _podcast/s13e09-building-open-source-nlp-tool.md rename to _podcast/building-open-source-nlp-tool.md index a5a1dd35..5f20c24a 100644 --- a/_podcast/s13e09-building-open-source-nlp-tool.md +++ b/_podcast/building-open-source-nlp-tool.md @@ -1,19 +1,150 @@ --- +title: "Build Open-Source NLP Tools: Weak Supervision, LLM Heuristics & Enterprise ML Product Strategy" +short: "Build Open-Source NLP Tools" +season: 13 episode: 9 guests: - johanneshotter +image: images/podcast/building-open-source-nlp-tool.jpg ids: anchor: ow/datatalksclub/episodes/Building-an-Open-Source-NLP-Tool---Johannes-Htter-e22lbn4 youtube: WIpnyiHp4IE -image: images/podcast/s13e09-building-open-source-nlp-tool.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Building-an-Open-Source-NLP-Tool---Johannes-Htter-e22lbn4 apple: https://podcasts.apple.com/us/podcast/building-an-open-source-nlp-tool-johannes-h%C3%B6tter/id1541710331?i=1000610117894 spotify: https://open.spotify.com/episode/5SjY4vatlUYFCZUMV7dE7W?si=MC4ZZrKbSTKUEDVEfedGwA youtube: https://www.youtube.com/watch?v=WIpnyiHp4IE -season: 13 -short: Building an Open-Source NLP Tool -title: 'Improve NLP Labeling with Weak Supervision: Refinery, Bricks & GPT Heuristics' + +description: "Discover weak supervision, NLP labeling & GPT heuristics to build high-quality datasets faster — combine Refinery, Bricks, ensemble heuristics & active learning" +intro: "How can teams scale high-quality NLP labeling without hand-labeling every example? In this episode, Johannes Hötter, data scientist, engineer, and co-founder of kern, explains practical approaches to that problem using weak supervision, heuristics, and open-source tooling. We walk through demos of Refinery and Bricks, with a close look at Refinery’s weak supervision and labeling workflows, and why Jupyter widgets leave a gap for NLP tooling.

You’ll hear about common NLP challenges—messy labels and text metadata—and how ChatGPT can be used as a labeling heuristic. Johannes outlines combining heuristics: GPT-driven rules, active learning, and crowd labels as an ensemble of “workers,” plus foundations like Hugging Face, embeddings, and robust data management. Bricks is presented as a heuristic library with recipes and ensemble methods to streamline labeling.

The conversation also covers productization choices (open-source vs commercial), targeting engineers, enterprise workflows, community support, and niche document/PDF NLP issues. Listen to learn actionable strategies to improve NLP labeling quality, adopt weak supervision and GPT heuristics, and make tooling and go-to-market decisions for scalable data labeling and model training." +topics: +- NLP +- machine learning +- strategy +- entrepreneurship +- founder +dateadded: 2023-04-23 + +duration: PT01H27S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=0 + endOffset: 96 +- name: Background & early AI curiosity + startOffset: 96 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=96 + endOffset: 273 +- name: 'Open-source demos overview: Refinery and Bricks' + startOffset: 273 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=273 + endOffset: 393 +- name: 'Refinery features: weak supervision & labeling workflows' + startOffset: 393 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=393 + endOffset: 540 +- name: Jupyter widgets gap and NLP tooling needs + startOffset: 540 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=540 + endOffset: 614 +- name: 'NLP challenges: text metadata and messy labels' + startOffset: 614 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=614 + endOffset: 802 +- name: ChatGPT as a labeling heuristic + startOffset: 802 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=802 + endOffset: 958 +- name: 'Combining heuristics: GPT, active learning, crowd labels' + startOffset: 958 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=958 + endOffset: 1054 +- name: 'Foundations: Hugging Face, embeddings, and data management' + startOffset: 1054 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1054 + endOffset: 1113 +- name: 'Bricks: heuristic library, recipes, and ensemble methods' + startOffset: 1113 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1113 + endOffset: 1188 +- name: 'Weak supervision analogy: heuristics as ensemble workers' + startOffset: 1188 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1188 + endOffset: 1222 +- name: 'Productization: consultancy to Kern and product pivot' + startOffset: 1222 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1222 + endOffset: 1440 +- name: 'Targeting engineers: control over training data' + startOffset: 1440 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1440 + endOffset: 1582 +- name: 'Choosing open source: motivations and concerns' + startOffset: 1582 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1582 + endOffset: 1691 +- name: 'Open-source trade-offs: distribution versus revenue' + startOffset: 1691 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1691 + endOffset: 1799 +- name: 'Open-source adoption: free users vs paying customers' + startOffset: 1799 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1799 + endOffset: 1907 +- name: 'Business model: open-core, multi-user SaaS, and services' + startOffset: 1907 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1907 + endOffset: 2043 +- name: 'Enterprise engagements: workshops, customization, and domain expertise' + startOffset: 2043 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2043 + endOffset: 2160 +- name: 'Community support: Discord, workarounds, and feedback loops' + startOffset: 2160 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2160 + endOffset: 2303 +- name: 'Enterprise outreach: networking and segment strategies' + startOffset: 2303 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2303 + endOffset: 2421 +- name: 'Developer-focused sales: DevRel, education, and trust-building' + startOffset: 2421 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2421 + endOffset: 2592 +- name: 'Team structure: development, developer relations, go-to-market' + startOffset: 2592 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2592 + endOffset: 2840 +- name: 'Founder role evolution: prototyping, GTM, and coding balance' + startOffset: 2840 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2840 + endOffset: 2991 +- name: 'Co-founder division: complementary strengths and responsibilities' + startOffset: 2991 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2991 + endOffset: 3160 +- name: 'Niche use cases: PDF and document NLP challenges' + startOffset: 3160 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3160 + endOffset: 3363 +- name: Open source as trust-builder with developer teams + startOffset: 3363 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3363 + endOffset: 3422 +- name: 'Fundraising recap: 2.7M raise and investor interest in open source ML' + startOffset: 3422 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3422 + endOffset: 3598 +- name: 'Recommended reading: Prediction Machines (applied AI economics)' + startOffset: 3598 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3598 + endOffset: 3675 +- name: Podcast Outro and closing remarks + startOffset: 3675 + url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3675 + endOffset: 3627 + transcript: - header: Podcast Introduction - line: Today we will talk about open source and creating startups in open source @@ -1163,143 +1294,6 @@ transcript: sec: 3688 time: '1:01:28' who: Johannes -description: Discover weak supervision, NLP labeling & GPT heuristics to build high-quality - datasets faster — combine Refinery, Bricks, ensemble heuristics & active learning. -intro: 'How can teams scale high-quality NLP labeling without hand-labeling every - example? In this episode, Johannes Hötter, data scientist, engineer, and co-founder - of kern, explains practical approaches to that problem using weak supervision, heuristics, - and open-source tooling. We walk through demos of Refinery and Bricks, with a close - look at Refinery’s weak supervision and labeling workflows, and why Jupyter widgets - leave a gap for NLP tooling.

You’ll hear about common NLP challenges—messy - labels and text metadata—and how ChatGPT can be used as a labeling heuristic. Johannes - outlines combining heuristics: GPT-driven rules, active learning, and crowd labels - as an ensemble of “workers,” plus foundations like Hugging Face, embeddings, and - robust data management. Bricks is presented as a heuristic library with recipes - and ensemble methods to streamline labeling.

The conversation also covers - productization choices (open-source vs commercial), targeting engineers, enterprise - workflows, community support, and niche document/PDF NLP issues. Listen to learn - actionable strategies to improve NLP labeling quality, adopt weak supervision and - GPT heuristics, and make tooling and go-to-market decisions for scalable data labeling - and model training.' -dateadded: '2023-04-23' -duration: PT01H27S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=0 - endOffset: 96 -- name: Background & early AI curiosity - startOffset: 96 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=96 - endOffset: 273 -- name: 'Open-source demos overview: Refinery and Bricks' - startOffset: 273 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=273 - endOffset: 393 -- name: 'Refinery features: weak supervision & labeling workflows' - startOffset: 393 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=393 - endOffset: 540 -- name: Jupyter widgets gap and NLP tooling needs - startOffset: 540 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=540 - endOffset: 614 -- name: 'NLP challenges: text metadata and messy labels' - startOffset: 614 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=614 - endOffset: 802 -- name: ChatGPT as a labeling heuristic - startOffset: 802 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=802 - endOffset: 958 -- name: 'Combining heuristics: GPT, active learning, crowd labels' - startOffset: 958 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=958 - endOffset: 1054 -- name: 'Foundations: Hugging Face, embeddings, and data management' - startOffset: 1054 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1054 - endOffset: 1113 -- name: 'Bricks: heuristic library, recipes, and ensemble methods' - startOffset: 1113 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1113 - endOffset: 1188 -- name: 'Weak supervision analogy: heuristics as ensemble workers' - startOffset: 1188 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1188 - endOffset: 1222 -- name: 'Productization: consultancy to Kern and product pivot' - startOffset: 1222 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1222 - endOffset: 1440 -- name: 'Targeting engineers: control over training data' - startOffset: 1440 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1440 - endOffset: 1582 -- name: 'Choosing open source: motivations and concerns' - startOffset: 1582 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1582 - endOffset: 1691 -- name: 'Open-source trade-offs: distribution versus revenue' - startOffset: 1691 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1691 - endOffset: 1799 -- name: 'Open-source adoption: free users vs paying customers' - startOffset: 1799 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1799 - endOffset: 1907 -- name: 'Business model: open-core, multi-user SaaS, and services' - startOffset: 1907 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=1907 - endOffset: 2043 -- name: 'Enterprise engagements: workshops, customization, and domain expertise' - startOffset: 2043 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2043 - endOffset: 2160 -- name: 'Community support: Discord, workarounds, and feedback loops' - startOffset: 2160 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2160 - endOffset: 2303 -- name: 'Enterprise outreach: networking and segment strategies' - startOffset: 2303 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2303 - endOffset: 2421 -- name: 'Developer-focused sales: DevRel, education, and trust-building' - startOffset: 2421 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2421 - endOffset: 2592 -- name: 'Team structure: development, developer relations, go-to-market' - startOffset: 2592 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2592 - endOffset: 2840 -- name: 'Founder role evolution: prototyping, GTM, and coding balance' - startOffset: 2840 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2840 - endOffset: 2991 -- name: 'Co-founder division: complementary strengths and responsibilities' - startOffset: 2991 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=2991 - endOffset: 3160 -- name: 'Niche use cases: PDF and document NLP challenges' - startOffset: 3160 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3160 - endOffset: 3363 -- name: Open source as trust-builder with developer teams - startOffset: 3363 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3363 - endOffset: 3422 -- name: 'Fundraising recap: 2.7M raise and investor interest in open source ML' - startOffset: 3422 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3422 - endOffset: 3598 -- name: 'Recommended reading: Prediction Machines (applied AI economics)' - startOffset: 3598 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3598 - endOffset: 3675 -- name: Podcast Outro and closing remarks - startOffset: 3675 - url: https://www.youtube.com/watch?v=WIpnyiHp4IE&t=3675 - endOffset: 3627 --- Links: diff --git a/_podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md b/_podcast/building-production-ml-platform-and-mlops-team.md similarity index 96% rename from _podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md rename to _podcast/building-production-ml-platform-and-mlops-team.md index 8a1e87a7..18cae147 100644 --- a/_podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.md +++ b/_podcast/building-production-ml-platform-and-mlops-team.md @@ -1,19 +1,154 @@ --- +title: "Building Production ML Platforms: Infrastructure, Workflows, Teams & Governance That Scale" +short: "From Scratch to Success: Building an MLOps Team and ML Platform" +season: 14 episode: 8 guests: - simonstiebellehner +image: images/podcast/building-production-ml-platform-and-mlops-team.jpg ids: - anchor: atatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c + anchor: datatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c youtube: CB1YIsxQRtc -image: images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Scratch-to-Success-Building-an-MLOps-Team-and-ML-Platform---Simon-Stiebellehner-e26d01c apple: https://podcasts.apple.com/us/podcast/from-scratch-to-success-building-an-mlops-team-and/id1541710331?i=1000618899065 spotify: https://open.spotify.com/episode/0raudIf9XsKdUfr5m2YlUE?si=x1PuaBqwTVyMlfNlGape2A youtube: https://www.youtube.com/watch?v=CB1YIsxQRtc -season: 14 -short: 'From Scratch to Success: Building an MLOps Team and ML Platform' -title: 'Designing MLOps Platforms: Deploy, Track Experiments, Manage Models & Compliance' + +description: "Discover MLOps strategies to build an ML platform with experiment tracking, improved reproducibility, faster releases and compliance-ready model operations" +intro: "How do you design an ML platform that reliably deploys models, tracks experiments, and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining & Data Warehousing — walks through practical MLOps platform design grounded in real-world deployment challenges.

We cover a clear definition of MLOps as people, processes, and technology, and dig into core platform skills (cloud infrastructure, Kubernetes, Terraform), user-centric design for notebooks and data science workflows, and software engineering fundamentals for production ML. Simon explains experiment tracking, model registry practices, deployment patterns (batch vs online), orchestration choices like Airflow, and stitching SaaS and open-source tools into a coherent ML platform. The episode also addresses compliance and data governance — GDPR, fintech security constraints — plus metadata, lineage, API design, and monitoring. We close with build vs buy trade-offs, staffing and on-call considerations, and how emerging LLM needs affect platforms.

Listen to learn concrete guidance on model deployment, reproducibility, orchestration, and compliance to help you design a pragmatic, scalable ML platform" +topics: +- MLOps +- machine learning +- leadership +- career growth +dateadded: 2023-07-02 + +duration: PT00H58M42S + +quotableClips: +- name: 'Episode Introduction: MLOps & ML platform conversation with Simon' + startOffset: 74 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=74 + endOffset: 120 +- name: 'Career & Transition: Research to industry, early platform work and management' + startOffset: 120 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=120 + endOffset: 282 +- name: 'MLOps Definition: People, processes, and technology' + startOffset: 282 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=282 + endOffset: 415 +- name: 'Deployment Challenges: Early blockers that launched MLOps work' + startOffset: 415 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=415 + endOffset: 491 +- name: 'Core Platform Skills: Cloud infrastructure, Kubernetes, Terraform' + startOffset: 491 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=491 + endOffset: 647 +- name: 'User-Centric Platform Design: Understanding data science workflows and notebooks' + startOffset: 647 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=647 + endOffset: 805 +- name: 'Engineering Fundamentals: Software engineering for ML platforms' + startOffset: 805 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=805 + endOffset: 830 +- name: 'Team Composition: Specialist vs generalist skill balance' + startOffset: 830 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=830 + endOffset: 934 +- name: 'Team Size & On-Call: Staffing and operational considerations' + startOffset: 934 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=934 + endOffset: 1012 +- name: 'Build vs Buy Decision: When to consider building an ML platform' + startOffset: 1012 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1012 + endOffset: 1034 +- name: 'Platform Triggers: Signs you need standardization across teams' + startOffset: 1034 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1034 + endOffset: 1204 +- name: 'Single-Team Value: SaaS components and incremental platform adoption' + startOffset: 1204 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1204 + endOffset: 1263 +- name: 'Data Science Workflow: Exploration to training and evaluation' + startOffset: 1263 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1263 + endOffset: 1700 +- name: 'Self-Service Compute: Notebooks, BigQuery, Databricks provisioning' + startOffset: 1700 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1700 + endOffset: 1781 +- name: 'Experiment Tracking: Low-hanging fruit for reproducibility and collaboration' + startOffset: 1781 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1781 + endOffset: 1832 +- name: 'Model Registry: Persisting models for downstream consumption' + startOffset: 1832 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1832 + endOffset: 1875 +- name: 'Deployment Patterns: Batch inference versus online serving' + startOffset: 1875 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1875 + endOffset: 1911 +- name: 'Orchestration Choices: Airflow, pipelines, and production workflows' + startOffset: 1911 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1911 + endOffset: 2041 +- name: 'Tool Integration: Stitching SaaS and open-source into a coherent platform' + startOffset: 2041 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2041 + endOffset: 2126 +- name: 'LLMs & Emerging Needs: Platform implications and vendor updates' + startOffset: 2126 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2126 + endOffset: 2320 +- name: 'Developer Experience: Thin abstraction layers over cloud providers' + startOffset: 2320 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2320 + endOffset: 2394 +- name: 'Regulatory Constraints: Fintech, security, and compliance impact' + startOffset: 2394 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2394 + endOffset: 2568 +- name: 'Metadata & Lineage: Reproducibility, artifact logging, and tracking' + startOffset: 2568 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2568 + endOffset: 2750 +- name: 'Data Governance: GDPR implications of logging and dataset storage' + startOffset: 2750 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2750 + endOffset: 2828 +- name: 'Business-First Strategy: Models before heavy platform investment' + startOffset: 2828 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2828 + endOffset: 2959 +- name: 'Parallelization Strategy: Building minimal platform pieces alongside use + cases' + startOffset: 2959 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2959 + endOffset: 3101 +- name: 'MLOps Skill Focus: When platform engineers should learn model internals' + startOffset: 3101 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3101 + endOffset: 3255 +- name: 'API Design & Logging: Unified prediction schemas for monitoring and analytics' + startOffset: 3255 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3255 + endOffset: 3452 +- name: 'Learning Resources: Books, practical projects, and MLOps training' + startOffset: 3452 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3452 + endOffset: 3579 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3579 + url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3579 + endOffset: 3522 + transcript: - header: 'Episode Introduction: MLOps & ML platform conversation with Simon' - line: This week we'll talk about MLOps and building machine learning platforms. @@ -313,7 +448,7 @@ transcript: sec: 902 time: '15:02' who: Simon -- header: 'Team Size & On‑Call: Staffing and operational considerations' +- header: 'Team Size & On-Call: Staffing and operational considerations' - line: How many people should there be? At least two? sec: 934 time: '15:34' @@ -529,7 +664,7 @@ transcript: sec: 1679 time: '27:59' who: Alexey -- header: 'Self‑Service Compute: Notebooks, BigQuery, Databricks provisioning' +- header: 'Self-Service Compute: Notebooks, BigQuery, Databricks provisioning' - line: It could be, let's say, GCP of BigQuery and then you have some Colab notebook and you authenticate to BigQuery, write your SQL query, and the notebook pulls in your data. That would be an exploratory setup. Of course, you want to have @@ -563,7 +698,7 @@ transcript: sec: 1774 time: '29:34' who: Simon -- header: 'Experiment Tracking: Low‑hanging fruit for reproducibility and collaboration' +- header: 'Experiment Tracking: Low-hanging fruit for reproducibility and collaboration' - line: Okay, so that's the data exploration part, where we pull the data, we explore, and we see what we can actually do with this data. The second step is, once we did the initial exploration, we train and evaluate models. Then you mentioned @@ -1130,7 +1265,7 @@ transcript: sec: 3469 time: '57:49' who: Simon -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: Yeah, thank you, Simon. Thanks a lot, everyone, for joining us today. Thanks, Simon, for joining us today too, and sharing all your expertise. That's all we have for now. Enjoy the rest of your day and the rest of the week. See you soon. @@ -1146,148 +1281,6 @@ transcript: sec: 3596 time: '59:56' who: Alexey -description: Discover MLOps strategies to build an ML platform with experiment tracking, - improved reproducibility, faster releases and compliance-ready model operations. -intro: How do you design an ML platform that reliably deploys models, tracks experiments, - and meets regulatory constraints? In this episode, Simon Stiebellehner — Lead MLOps - Engineer at Transaction Monitoring Netherlands and university lecturer in Data Mining - & Data Warehousing — walks through practical MLOps platform design grounded in real-world - deployment challenges.

We cover a clear definition of MLOps as people, - processes, and technology, and dig into core platform skills (cloud infrastructure, - Kubernetes, Terraform), user‑centric design for notebooks and data science workflows, - and software engineering fundamentals for production ML. Simon explains experiment - tracking, model registry practices, deployment patterns (batch vs online), orchestration - choices like Airflow, and stitching SaaS and open‑source tools into a coherent ML - platform. The episode also addresses compliance and data governance — GDPR, fintech - security constraints — plus metadata, lineage, API design, and monitoring. We close - with build vs buy trade‑offs, staffing and on‑call considerations, and how emerging - LLM needs affect platforms.

Listen to learn concrete guidance on model - deployment, reproducibility, orchestration, and compliance to help you design a - pragmatic, scalable ML platform. -dateadded: '2023-07-02' -duration: PT00H58M42S -quotableClips: -- name: 'Episode Introduction: MLOps & ML platform conversation with Simon' - startOffset: 74 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=74 - endOffset: 120 -- name: 'Career & Transition: Research to industry, early platform work and management' - startOffset: 120 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=120 - endOffset: 282 -- name: 'MLOps Definition: People, processes, and technology' - startOffset: 282 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=282 - endOffset: 415 -- name: 'Deployment Challenges: Early blockers that launched MLOps work' - startOffset: 415 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=415 - endOffset: 491 -- name: 'Core Platform Skills: Cloud infrastructure, Kubernetes, Terraform' - startOffset: 491 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=491 - endOffset: 647 -- name: 'User-Centric Platform Design: Understanding data science workflows and notebooks' - startOffset: 647 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=647 - endOffset: 805 -- name: 'Engineering Fundamentals: Software engineering for ML platforms' - startOffset: 805 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=805 - endOffset: 830 -- name: 'Team Composition: Specialist vs generalist skill balance' - startOffset: 830 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=830 - endOffset: 934 -- name: 'Team Size & On‑Call: Staffing and operational considerations' - startOffset: 934 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=934 - endOffset: 1012 -- name: 'Build vs Buy Decision: When to consider building an ML platform' - startOffset: 1012 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1012 - endOffset: 1034 -- name: 'Platform Triggers: Signs you need standardization across teams' - startOffset: 1034 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1034 - endOffset: 1204 -- name: 'Single-Team Value: SaaS components and incremental platform adoption' - startOffset: 1204 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1204 - endOffset: 1263 -- name: 'Data Science Workflow: Exploration to training and evaluation' - startOffset: 1263 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1263 - endOffset: 1700 -- name: 'Self‑Service Compute: Notebooks, BigQuery, Databricks provisioning' - startOffset: 1700 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1700 - endOffset: 1781 -- name: 'Experiment Tracking: Low‑hanging fruit for reproducibility and collaboration' - startOffset: 1781 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1781 - endOffset: 1832 -- name: 'Model Registry: Persisting models for downstream consumption' - startOffset: 1832 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1832 - endOffset: 1875 -- name: 'Deployment Patterns: Batch inference versus online serving' - startOffset: 1875 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1875 - endOffset: 1911 -- name: 'Orchestration Choices: Airflow, pipelines, and production workflows' - startOffset: 1911 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=1911 - endOffset: 2041 -- name: 'Tool Integration: Stitching SaaS and open-source into a coherent platform' - startOffset: 2041 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2041 - endOffset: 2126 -- name: 'LLMs & Emerging Needs: Platform implications and vendor updates' - startOffset: 2126 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2126 - endOffset: 2320 -- name: 'Developer Experience: Thin abstraction layers over cloud providers' - startOffset: 2320 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2320 - endOffset: 2394 -- name: 'Regulatory Constraints: Fintech, security, and compliance impact' - startOffset: 2394 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2394 - endOffset: 2568 -- name: 'Metadata & Lineage: Reproducibility, artifact logging, and tracking' - startOffset: 2568 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2568 - endOffset: 2750 -- name: 'Data Governance: GDPR implications of logging and dataset storage' - startOffset: 2750 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2750 - endOffset: 2828 -- name: 'Business-First Strategy: Models before heavy platform investment' - startOffset: 2828 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2828 - endOffset: 2959 -- name: 'Parallelization Strategy: Building minimal platform pieces alongside use - cases' - startOffset: 2959 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=2959 - endOffset: 3101 -- name: 'MLOps Skill Focus: When platform engineers should learn model internals' - startOffset: 3101 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3101 - endOffset: 3255 -- name: 'API Design & Logging: Unified prediction schemas for monitoring and analytics' - startOffset: 3255 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3255 - endOffset: 3452 -- name: 'Learning Resources: Books, practical projects, and MLOps training' - startOffset: 3452 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3452 - endOffset: 3579 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3579 - url: https://www.youtube.com/watch?v=CB1YIsxQRtc&t=3579 - endOffset: 3522 --- Links: diff --git a/_podcast/s17e09-building-production-search-systems.md b/_podcast/building-production-search-systems.md similarity index 96% rename from _podcast/s17e09-building-production-search-systems.md rename to _podcast/building-production-search-systems.md index d2c2aa05..c0fd7465 100644 --- a/_podcast/s17e09-building-production-search-systems.md +++ b/_podcast/building-production-search-systems.md @@ -1,20 +1,172 @@ --- +title: "Building Search Systems: Dense Embeddings, MLOps and Evaluation Metrics" +short: "Building Production Search Systems" +season: 17 episode: 9 guests: - danielsvonava +image: images/podcast/building-production-search-systems.jpg ids: - anchor: atatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh + anchor: datatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh youtube: gEmSrknGKDE -image: images/podcast/s17e09-building-production-search-systems.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Production-Search-Systems---Daniel-Svonava-e2hccnh apple: https://podcasts.apple.com/us/podcast/building-production-search-systems-daniel-svonava/id1541710331?i=1000650138905 spotify: https://open.spotify.com/episode/19R0rLA8hULTBZi9FhZuTs?si=xggb0OzfRHCFSmXtJWm7bA youtube: https://www.youtube.com/watch?v=gEmSrknGKDE -season: 17 -short: Building Production Search Systems -title: 'Vector Search & Databases: Indexing, Embeddings, Hybrid Retrieval, MLOps & - CLIP' +description: "Learn dense embeddings, vector databases & MLOps to productionize search—get indexing, hybrid search, evaluation metrics and deploy tips to boost relevance." +topics: +- information retrieval +- vector databases +- embeddings +- MLOps +- evaluation metrics +- production +- search +intro: "How do you build search systems that balance dense embeddings, MLOps, and meaningful evaluation metrics? In this episode Daniel Svonava — an entrepreneurial technologist with 20 years of experience (from competitive programming and research internships to leading ML infrastructure at YouTube Ads) and co-founder of Superlinked/VectorHub — walks through practical design and operational decisions for modern search and retrieval.

We cover core topics: framing search as a decision problem, representation learning from bag-of-words to dense vector embeddings, inverted index mechanics, document chunking and ingestion, and when to use Lucene/Elasticsearch versus dedicated vector databases. Daniel explains vector compute trade-offs (ingestion vs query-time encoding), model versioning and recomputing embeddings, hybrid search strategies, CLIP-style cross-modal retrieval, multi-embedding designs, and techniques for encoding recency and timestamps. He also digs into MLOps concerns — pipeline brittleness, configuration debt, and deployment trade-offs — plus evaluation: business metrics, A/B testing, offline evaluation and operational metrics.

Listeners will get concrete guidance on embedding strategy, vector database selection, indexing and ranking trade-offs, and how to measure search impact so teams can prototype faster and productionize reliable retrieval systems." +dateadded: 2024-03-25 +duration: PT01H05M23S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=0 + endOffset: 107 +- name: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' + startOffset: 107 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=107 + endOffset: 160 +- name: 'Career Highlights: Internships, YouTube Ads, and Startups' + startOffset: 160 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=160 + endOffset: 299 +- name: Competitive Programming Influence on Engineering + startOffset: 299 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=299 + endOffset: 380 +- name: 'Framing Search: Decision Problem & Relevance' + startOffset: 380 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=380 + endOffset: 550 +- name: Information Retrieval vs Recommender Boundaries; Representation Learning + startOffset: 550 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=550 + endOffset: 689 +- name: From Bag-of-Words to Dense Vector Representations + startOffset: 689 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=689 + endOffset: 765 +- name: Inverted Index Mechanics, Candidate Generation & Ranking + startOffset: 765 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=765 + endOffset: 1005 +- name: 'Practical Indexing: Document Chunking and Ingestion' + startOffset: 1005 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1005 + endOffset: 1060 +- name: 'Use Existing Engines: Lucene and Open-source Tools' + startOffset: 1060 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1060 + endOffset: 1129 +- name: 'Index Data Structures: Trees, Alphabetical Ordering, and Lookups' + startOffset: 1129 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1129 + endOffset: 1202 +- name: 'Search Maintenance: Brittleness, Synonyms, and Configuration Debt' + startOffset: 1202 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1202 + endOffset: 1315 +- name: Multi-modal Retrieval and Personalization Requirements + startOffset: 1315 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1315 + endOffset: 1641 +- name: 'Vector Databases: Storing Embeddings and Nearest-Neighbor Search' + startOffset: 1641 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1641 + endOffset: 1740 +- name: 'Vector Compute: Ingestion Encoding vs Query-Time Encoding' + startOffset: 1740 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1740 + endOffset: 1822 +- name: 'Pipeline Challenges: Recomputing Embeddings and Model Versioning' + startOffset: 1822 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1822 + endOffset: 1963 +- name: 'CLIP Example: Text-to-Image Cross-modal Search' + startOffset: 1963 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1963 + endOffset: 1993 +- name: 'Embedding Strategy Changes: Model Swaps and Pipeline Flexibility' + startOffset: 1993 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1993 + endOffset: 2040 +- name: 'Hybrid Search: Combining Vector Similarity with Filters and Recency' + startOffset: 2040 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2040 + endOffset: 2181 +- name: Custom Embeddings, Ranking Models, and MLOps Trade-offs + startOffset: 2181 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2181 + endOffset: 2291 +- name: 'Multi-embedding Design: Titles, Content, Images, and Behavioral Signals' + startOffset: 2291 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2291 + endOffset: 2393 +- name: 'Expressing Constraints: Lucene Must/Should vs Vector-query Approaches' + startOffset: 2393 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2393 + endOffset: 2448 +- name: 'Recency and Bias: Encoding Time and Applying Weights in Embeddings' + startOffset: 2448 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2448 + endOffset: 2516 +- name: Timestamp & Positional Encoding Techniques in Vector Space + startOffset: 2516 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2516 + endOffset: 2711 +- name: Normalizing Components and Late-binding Query Weights + startOffset: 2711 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2711 + endOffset: 2778 +- name: 'LLM Contexting: Prompted Timestamps and Limitations' + startOffset: 2778 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2778 + endOffset: 2857 +- name: Limits of LLM-only Retrieval; Value of Specialized Encoders + startOffset: 2857 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2857 + endOffset: 2976 +- name: 'Resources & Tutorials: VectorHub Guides on Combining Modalities' + startOffset: 2976 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2976 + endOffset: 3155 +- name: 'Vendor Selection: Vector DB Feature Comparison and Trade-offs' + startOffset: 3155 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3155 + endOffset: 3296 +- name: When to Use Lucene/Elasticsearch vs Dedicated Vector Databases + startOffset: 3296 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3296 + endOffset: 3468 +- name: 'E-commerce Strategy: Prototype with Embeddings for Mid-size D2C' + startOffset: 3468 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3468 + endOffset: 3497 +- name: Rapid Prototyping with CLIP and Steps to Productionize + startOffset: 3497 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3497 + endOffset: 3685 +- name: 'Measuring Search Impact: Business Metrics, A/B Testing, and USD' + startOffset: 3685 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3685 + endOffset: 3830 +- name: Operational Metrics, Offline Evaluation, and Empowering Engineers + startOffset: 3830 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3830 + endOffset: 4008 +- name: Closing Remarks and How to Connect with Daniel/VectorHub + startOffset: 4008 + url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=4008 + endOffset: 3923 transcript: - header: Podcast Introduction - header: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' @@ -1046,169 +1198,16 @@ transcript: sec: 4030 time: '1:07:10' who: Daniel -description: Discover vector search, embeddings & vector database practices - indexing, - hybrid retrieval, CLIP prototype and MLOps tips to boost relevance & ship faster -intro: How do you design and operate reliable vector search systems that balance embeddings, - traditional indexing, and production MLOps? In this episode, Daniel Svonava — co-founder - of Superlinked and VectorHub, former ML infrastructure tech lead for YouTube Ads - with a 20‑year engineering background including competitive programming and research - internships at Google and IBM — answers that question with practical detail.

- We dig into the mechanics of indexing (inverted indexes, document chunking, candidate - generation and ranking), the evolution from bag‑of‑words to dense embeddings, and - the role of vector databases for nearest‑neighbor search. Daniel walks through vector - compute tradeoffs (ingestion vs query‑time encoding), model versioning, pipeline - challenges like recomputing embeddings, and hybrid retrieval strategies that combine - vector similarity with filters, recency, and Lucene-style constraints. He also explains - multi‑modal retrieval with CLIP, multi‑embedding designs, timestamp/positional encoding, - and vendor selection criteria.

Listen to learn concrete guidance on prototyping - with CLIP, when to use Lucene/Elasticsearch versus dedicated vector DBs, MLOps tradeoffs, - and how to measure search impact through A/B testing and operational metrics — actionable - insight for engineers building production search and recommender systems. -dateadded: '2024-03-25' -duration: PT01H05M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=0 - endOffset: 107 -- name: 'Guest Introduction: Daniel Svonava, Superlinked & VectorHub' - startOffset: 107 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=107 - endOffset: 160 -- name: 'Career Highlights: Internships, YouTube Ads, and Startups' - startOffset: 160 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=160 - endOffset: 299 -- name: Competitive Programming Influence on Engineering - startOffset: 299 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=299 - endOffset: 380 -- name: 'Framing Search: Decision Problem & Relevance' - startOffset: 380 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=380 - endOffset: 550 -- name: Information Retrieval vs Recommender Boundaries; Representation Learning - startOffset: 550 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=550 - endOffset: 689 -- name: From Bag-of-Words to Dense Vector Representations - startOffset: 689 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=689 - endOffset: 765 -- name: Inverted Index Mechanics, Candidate Generation & Ranking - startOffset: 765 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=765 - endOffset: 1005 -- name: 'Practical Indexing: Document Chunking and Ingestion' - startOffset: 1005 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1005 - endOffset: 1060 -- name: 'Use Existing Engines: Lucene and Open-source Tools' - startOffset: 1060 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1060 - endOffset: 1129 -- name: 'Index Data Structures: Trees, Alphabetical Ordering, and Lookups' - startOffset: 1129 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1129 - endOffset: 1202 -- name: 'Search Maintenance: Brittleness, Synonyms, and Configuration Debt' - startOffset: 1202 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1202 - endOffset: 1315 -- name: Multi-modal Retrieval and Personalization Requirements - startOffset: 1315 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1315 - endOffset: 1641 -- name: 'Vector Databases: Storing Embeddings and Nearest-Neighbor Search' - startOffset: 1641 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1641 - endOffset: 1740 -- name: 'Vector Compute: Ingestion Encoding vs Query-Time Encoding' - startOffset: 1740 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1740 - endOffset: 1822 -- name: 'Pipeline Challenges: Recomputing Embeddings and Model Versioning' - startOffset: 1822 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1822 - endOffset: 1963 -- name: 'CLIP Example: Text-to-Image Cross-modal Search' - startOffset: 1963 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1963 - endOffset: 1993 -- name: 'Embedding Strategy Changes: Model Swaps and Pipeline Flexibility' - startOffset: 1993 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=1993 - endOffset: 2040 -- name: 'Hybrid Search: Combining Vector Similarity with Filters and Recency' - startOffset: 2040 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2040 - endOffset: 2181 -- name: Custom Embeddings, Ranking Models, and MLOps Trade-offs - startOffset: 2181 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2181 - endOffset: 2291 -- name: 'Multi-embedding Design: Titles, Content, Images, and Behavioral Signals' - startOffset: 2291 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2291 - endOffset: 2393 -- name: 'Expressing Constraints: Lucene Must/Should vs Vector-query Approaches' - startOffset: 2393 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2393 - endOffset: 2448 -- name: 'Recency and Bias: Encoding Time and Applying Weights in Embeddings' - startOffset: 2448 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2448 - endOffset: 2516 -- name: Timestamp & Positional Encoding Techniques in Vector Space - startOffset: 2516 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2516 - endOffset: 2711 -- name: Normalizing Components and Late-binding Query Weights - startOffset: 2711 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2711 - endOffset: 2778 -- name: 'LLM Contexting: Prompted Timestamps and Limitations' - startOffset: 2778 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2778 - endOffset: 2857 -- name: Limits of LLM-only Retrieval; Value of Specialized Encoders - startOffset: 2857 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2857 - endOffset: 2976 -- name: 'Resources & Tutorials: VectorHub Guides on Combining Modalities' - startOffset: 2976 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=2976 - endOffset: 3155 -- name: 'Vendor Selection: Vector DB Feature Comparison and Trade-offs' - startOffset: 3155 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3155 - endOffset: 3296 -- name: When to Use Lucene/Elasticsearch vs Dedicated Vector Databases - startOffset: 3296 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3296 - endOffset: 3468 -- name: 'E-commerce Strategy: Prototype with Embeddings for Mid-size D2C' - startOffset: 3468 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3468 - endOffset: 3497 -- name: Rapid Prototyping with CLIP and Steps to Productionize - startOffset: 3497 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3497 - endOffset: 3685 -- name: 'Measuring Search Impact: Business Metrics, A/B Testing, and USD' - startOffset: 3685 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3685 - endOffset: 3830 -- name: Operational Metrics, Offline Evaluation, and Empowering Engineers - startOffset: 3830 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=3830 - endOffset: 4008 -- name: Closing Remarks and How to Connect with Daniel/VectorHub - startOffset: 4008 - url: https://www.youtube.com/watch?v=gEmSrknGKDE&t=4008 - endOffset: 3923 +context: 'Central narrative: Building effective, real-world search and retrieval is + a systems engineering problem that pragmatically combines modern representation + learning (dense, multimodal embeddings and specialized encoders) with classical + IR techniques (inverted indexes, filters, recency, and ranking), wrapped in robust + MLOps, evaluation, and product-oriented trade-offs. The episode’s through-line is + that success comes from hybrid architectures and operational discipline—careful + choices about embeddings, indexing, model versioning, pipeline design, vendor/tool + selection, and business metrics—so teams can move fast from prototype (e.g., CLIP + experiments) to scalable, maintainable, and measurable production search.' --- - Links: * [VectorHub](https://superlinked.com/vectorhub/?utm_source=community&utm_medium=podcast&utm_campaign=datatalks){:target="_blank"} diff --git a/_podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.md b/_podcast/building-scalable-and-reliable-machine-learning-systems.md similarity index 96% rename from _podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.md rename to _podcast/building-scalable-and-reliable-machine-learning-systems.md index 986dd8ea..fb7ebd05 100644 --- a/_podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.md +++ b/_podcast/building-scalable-and-reliable-machine-learning-systems.md @@ -1,20 +1,127 @@ --- +title: "Build Scalable, Reliable ML Systems (MLOps): Design Docs, Data Strategy & Edge Constraints" +short: "Building Scalable and Reliable Machine Learning Systems" +season: 14 episode: 1 guests: - arsenykravchenko +image: images/podcast/building-scalable-and-reliable-machine-learning-systems.jpg ids: - anchor: atatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q + anchor: datatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q youtube: i-pIdekjUow -image: images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Scalable-and-Reliable-Machine-Learning-Systems---Arseny-Kravchenko-e23m33q apple: https://podcasts.apple.com/us/podcast/building-scalable-and-reliable-machine-learning/id1541710331?i=1000612813133 spotify: https://open.spotify.com/episode/6iDyJuhfXibDB6kXFhvaqG?si=urjDGVl6RrWtjVXIAUgOvQ youtube: https://www.youtube.com/watch?v=i-pIdekjUow -season: 14 -short: Building Scalable and Reliable Machine Learning Systems -title: 'Build Scalable, Reliable ML Systems (MLOps): Design Docs, Data Strategy & - Edge Constraints' + +description: "Learn MLOps design doc and data strategy to build scalable, reliable machine learning systems; navigate edge constraints, metrics, pipelines, and testing" +intro: "How do you design machine learning systems that scale, stay reliable in production, and meet tight edge and mobile constraints? In this episode, Arseny Kravchenko — a seasoned ML engineer focused on computer vision, active in ML since 2015 and a former Kaggle Master — walks through practical MLOps patterns for turning models into production systems.

We cover where startups trade off productionization and who owns those decisions; how to define ML system goals, non-goals, and assumptions; and why a lightweight design phase with a problem-first design doc (50/50 problem vs solution) pays off. Arseny breaks down edge and mobile ML constraints (latency, FPS, energy, Core ML), managing known and unknown risks with early tests, and building a solution blueprint: baselines, metrics, pipeline components, and data strategy (availability, processing, features, data lakes). He also explains system diagramming for data flow and real-time vs batch, dataset heuristics, and shares design doc examples (photostock search and retail pricing). Listeners will get concrete guidance on MLOps, design docs, data strategy, and edge ML trade-offs — plus pointers to deeper learning resources and a book offer discussed at the end." +topics: +- machine learning +- MLOps +- data strategy +- data engineering +- system design +- MLOps +dateadded: 2023-05-13 + +duration: PT00H59M25S + +quotableClips: +- name: 'Episode Overview: Building Scalable & Reliable Machine Learning Systems' + startOffset: 0 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=0 + endOffset: 154 +- name: Guest Bio & Startup Experience (deep learning, MLOps, Ntropy, AR, Lyft) + startOffset: 154 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=154 + endOffset: 371 +- name: 'Startups: ML Productionization Trade-offs and Decision Ownership' + startOffset: 371 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=371 + endOffset: 474 +- name: 'Defining Machine Learning System Design: Goals and Constraints' + startOffset: 474 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=474 + endOffset: 634 +- name: 'Edge & Mobile ML Constraints: Latency, FPS, Energy, Core ML' + startOffset: 634 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=634 + endOffset: 889 +- name: 'Managing Unknowns: Known Unknowns, Unknown Unknowns, Early Tests' + startOffset: 889 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=889 + endOffset: 1129 +- name: 'Planning Value: Why a Lightweight Design Phase Matters' + startOffset: 1129 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1129 + endOffset: 1221 +- name: 'Design Document Approach: Problem-First, 50/50 Problem vs Solution' + startOffset: 1221 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1221 + endOffset: 1368 +- name: 'Problem Framing: Product Scenarios, Realism vs Appeal Trade-offs' + startOffset: 1368 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1368 + endOffset: 1741 +- name: 'Goals, Non-Goals & Assumptions: Turning Requirements into Metrics' + startOffset: 1741 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1741 + endOffset: 1902 +- name: 'Solution Blueprint: Baseline, Metrics, Pipeline Components' + startOffset: 1902 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1902 + endOffset: 1957 +- name: 'Data Strategy: Availability, Processing, Feature Needs, Data Lakes' + startOffset: 1957 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1957 + endOffset: 2235 +- name: 'System Diagramming: Data Flow, Dependencies, Real-time vs Batch' + startOffset: 2235 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2235 + endOffset: 2382 +- name: 'Motivation for the Book: Generalizing Experience into Patterns' + startOffset: 2382 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2382 + endOffset: 2505 +- name: 'Heuristics for Datasets: Intuition, Limits, and Practical Guidance' + startOffset: 2505 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2505 + endOffset: 2710 +- name: 'Design Doc Examples: Photostock Search & Super Mega Retail Pricing' + startOffset: 2710 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2710 + endOffset: 2829 +- name: 'Reader Types: Theory-Focused vs Template-Focused Audiences' + startOffset: 2829 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2829 + endOffset: 2907 +- name: 'Co-author Dynamics: Balancing Corporate & Hands-on Perspectives' + startOffset: 2907 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2907 + endOffset: 3099 +- name: 'Book Development: Scope Decisions, Publisher Constraints, Reviewer Feedback' + startOffset: 3099 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3099 + endOffset: 3348 +- name: 'Favorite Chapter: Preliminary Research, Reuse, and External Sources' + startOffset: 3348 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3348 + endOffset: 3508 +- name: 'Further Learning: System Design Fundamentals & Software Engineering Skills' + startOffset: 3508 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3508 + endOffset: 3600 +- name: 'Book Offer & Giveaway: Discount Code, Twitter Giveaway Winners' + startOffset: 3600 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3600 + endOffset: 3637 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3637 + url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3637 + endOffset: 3565 + transcript: - header: 'Episode Overview: Building Scalable & Reliable Machine Learning Systems' - line: This week, we'll talk about building scalable and reliable machine learning @@ -944,118 +1051,6 @@ transcript: sec: 3658 time: '1:00:58' who: Alexey -description: Learn MLOps design doc and data strategy to build scalable, reliable - machine learning systems; navigate edge constraints, metrics, pipelines, and testing. -intro: 'How do you design machine learning systems that scale, stay reliable in production, - and meet tight edge and mobile constraints? In this episode, Arseny Kravchenko — - a seasoned ML engineer focused on computer vision, active in ML since 2015 and a - former Kaggle Master — walks through practical MLOps patterns for turning models - into production systems.

We cover where startups trade off productionization - and who owns those decisions; how to define ML system goals, non-goals, and assumptions; - and why a lightweight design phase with a problem-first design doc (50/50 problem - vs solution) pays off. Arseny breaks down edge and mobile ML constraints (latency, - FPS, energy, Core ML), managing known and unknown risks with early tests, and building - a solution blueprint: baselines, metrics, pipeline components, and data strategy - (availability, processing, features, data lakes). He also explains system diagramming - for data flow and real-time vs batch, dataset heuristics, and shares design doc - examples (photostock search and retail pricing). Listeners will get concrete guidance - on MLOps, design docs, data strategy, and edge ML trade-offs — plus pointers to - deeper learning resources and a book offer discussed at the end.' -dateadded: '2023-05-13' -duration: PT00H59M25S -quotableClips: -- name: 'Episode Overview: Building Scalable & Reliable Machine Learning Systems' - startOffset: 0 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=0 - endOffset: 154 -- name: Guest Bio & Startup Experience (deep learning, MLOps, Ntropy, AR, Lyft) - startOffset: 154 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=154 - endOffset: 371 -- name: 'Startups: ML Productionization Trade-offs and Decision Ownership' - startOffset: 371 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=371 - endOffset: 474 -- name: 'Defining Machine Learning System Design: Goals and Constraints' - startOffset: 474 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=474 - endOffset: 634 -- name: 'Edge & Mobile ML Constraints: Latency, FPS, Energy, Core ML' - startOffset: 634 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=634 - endOffset: 889 -- name: 'Managing Unknowns: Known Unknowns, Unknown Unknowns, Early Tests' - startOffset: 889 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=889 - endOffset: 1129 -- name: 'Planning Value: Why a Lightweight Design Phase Matters' - startOffset: 1129 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1129 - endOffset: 1221 -- name: 'Design Document Approach: Problem-First, 50/50 Problem vs Solution' - startOffset: 1221 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1221 - endOffset: 1368 -- name: 'Problem Framing: Product Scenarios, Realism vs Appeal Trade-offs' - startOffset: 1368 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1368 - endOffset: 1741 -- name: 'Goals, Non-Goals & Assumptions: Turning Requirements into Metrics' - startOffset: 1741 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1741 - endOffset: 1902 -- name: 'Solution Blueprint: Baseline, Metrics, Pipeline Components' - startOffset: 1902 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1902 - endOffset: 1957 -- name: 'Data Strategy: Availability, Processing, Feature Needs, Data Lakes' - startOffset: 1957 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=1957 - endOffset: 2235 -- name: 'System Diagramming: Data Flow, Dependencies, Real-time vs Batch' - startOffset: 2235 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2235 - endOffset: 2382 -- name: 'Motivation for the Book: Generalizing Experience into Patterns' - startOffset: 2382 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2382 - endOffset: 2505 -- name: 'Heuristics for Datasets: Intuition, Limits, and Practical Guidance' - startOffset: 2505 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2505 - endOffset: 2710 -- name: 'Design Doc Examples: Photostock Search & Super Mega Retail Pricing' - startOffset: 2710 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2710 - endOffset: 2829 -- name: 'Reader Types: Theory-Focused vs Template-Focused Audiences' - startOffset: 2829 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2829 - endOffset: 2907 -- name: 'Co-author Dynamics: Balancing Corporate & Hands-on Perspectives' - startOffset: 2907 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=2907 - endOffset: 3099 -- name: 'Book Development: Scope Decisions, Publisher Constraints, Reviewer Feedback' - startOffset: 3099 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3099 - endOffset: 3348 -- name: 'Favorite Chapter: Preliminary Research, Reuse, and External Sources' - startOffset: 3348 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3348 - endOffset: 3508 -- name: 'Further Learning: System Design Fundamentals & Software Engineering Skills' - startOffset: 3508 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3508 - endOffset: 3600 -- name: 'Book Offer & Giveaway: Discount Code, Twitter Giveaway Winners' - startOffset: 3600 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3600 - endOffset: 3637 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3637 - url: https://www.youtube.com/watch?v=i-pIdekjUow&t=3637 - endOffset: 3565 --- Links: diff --git a/_podcast/s15e06-democratizing-causality.md b/_podcast/causal-inference-for-machine-learning.md similarity index 97% rename from _podcast/s15e06-democratizing-causality.md rename to _podcast/causal-inference-for-machine-learning.md index 43bbc2e1..d2236c4f 100644 --- a/_podcast/s15e06-democratizing-causality.md +++ b/_podcast/causal-inference-for-machine-learning.md @@ -1,19 +1,147 @@ --- +title: "Causal Inference for Real-World ML: Uplift Modeling, Counterfactuals, Treatment Effects & LLM Integration" +short: "Democratizing Causality" +season: 15 episode: 6 guests: - aleksandermolak +image: images/podcast/causal-inference-for-machine-learning.jpg ids: - anchor: atatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh + anchor: datatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh youtube: 0I2FHH95Ofs -image: images/podcast/s15e06-democratizing-causality.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Democratizing-Causality---Aleksander-Molak-e28e0vh apple: https://podcasts.apple.com/us/podcast/democratizing-causality-aleksander-molak/id1541710331?i=1000625694605 spotify: https://open.spotify.com/episode/17U3RWz5BupRIwoBvGWqYQ?si=g6XypIZnSwG4hznNIOs7mw youtube: https://www.youtube.com/watch?v=0I2FHH95Ofs -season: 15 -short: Democratizing Causality -title: 'Practical Causal ML: Counterfactuals, Uplift (CATE), A/B Testing & LLMs' +description: "Master causal inference for production ML: uplift modeling, treatment effects, counterfactuals, causal discovery, and LLM integration strategies." +intro: "How do you move from correlation to actionable decisions — using counterfactuals, uplift modeling, treatment effect estimation, and LLMs — without falling into confounding traps or biased estimators? In this episode, Aleksander Molak, an independent ML researcher, author and educator specializing in causality, NLP and AI strategy, walks through practical causal inference techniques for real-world machine learning applications.

We explore foundational concepts like counterfactuals and Judea Pearl's causal hierarchy, then dive into meta-learners (T-learner, S-learner), Conditional Average Treatment Effect (CATE) estimation, uplift modeling, and when A/B testing or causal feature selection are essential for achieving unconfoundedness. Aleksander covers deployment challenges, debiasing methods (double/debiased ML), refutation testing for model validation, causal discovery algorithms, and cost-benefit analysis that uncovered wasted marketing spend. The conversation also examines how LLMs integrate into causal workflows: feature extraction from text, using text as outcomes or treatments, inferring unobserved confounders, and practical demonstrations with CausalBERT. You'll gain actionable frameworks for building, evaluating, and deploying causal ML systems in production, plus resources and code examples to implement these methods in your own projects." +topics: +- causal inference +- LLMs +- machine learning +dateadded: 2023-09-10 + +duration: PT01H06M38S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=0 + endOffset: 82 +- name: 'Guest Intro: Aleksander Molak & book overview' + startOffset: 82 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=82 + endOffset: 126 +- name: Career highlights and dyslexia prediction project + startOffset: 126 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=126 + endOffset: 375 +- name: 'Causal advocacy: democratizing causal thinking' + startOffset: 375 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=375 + endOffset: 451 +- name: 'Association vs causation: limits of correlational reasoning' + startOffset: 451 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=451 + endOffset: 535 +- name: 'Illustrative confounders: race example and ice cream–drowning' + startOffset: 535 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=535 + endOffset: 761 +- name: 'Predictive ML vs decision-making: Zillow and IID assumptions' + startOffset: 761 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=761 + endOffset: 936 +- name: 'Counterfactuals in practice: marketing and recommender systems' + startOffset: 936 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=936 + endOffset: 1095 +- name: Counterfactuals defined and Judea Pearl’s intervention view + startOffset: 1095 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1095 + endOffset: 1282 +- name: 'Meta-learners overview: T-learner and counterfactual estimation' + startOffset: 1282 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1282 + endOffset: 1464 +- name: Conditional Average Treatment Effect (CATE) estimation + startOffset: 1464 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1464 + endOffset: 1576 +- name: 'Achieving unconfoundedness: A/B tests vs causal feature selection' + startOffset: 1576 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1576 + endOffset: 1672 +- name: Targeting decisions from uplift estimates + startOffset: 1672 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1672 + endOffset: 1757 +- name: Deployment risks and debiasing estimators (double/triple ML) + startOffset: 1757 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1757 + endOffset: 1960 +- name: 'Uplift modeling: policy evaluation and business metrics' + startOffset: 1960 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1960 + endOffset: 1994 +- name: 'Evaluating causal models: refutation tests and estimator quality' + startOffset: 1994 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1994 + endOffset: 2257 +- name: Causal discovery and heterogeneous treatment effects (book coverage) + startOffset: 2257 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2257 + endOffset: 2334 +- name: 'Cost–benefit of causal models: complexity vs value' + startOffset: 2334 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2334 + endOffset: 2474 +- name: 'Real-world impact: discovering wasted marketing spend' + startOffset: 2474 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2474 + endOffset: 2605 +- name: 'Incremental rollout: A/B testing as validation baseline' + startOffset: 2605 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2605 + endOffset: 2666 +- name: 'LLMs in causal workflows: feature extraction and scoring' + startOffset: 2666 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2666 + endOffset: 2814 +- name: 'Text as outcome: using LLMs to score experimental text' + startOffset: 2814 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2814 + endOffset: 2957 +- name: 'Text as treatment/confounder: style extraction and embeddings' + startOffset: 2957 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2957 + endOffset: 3278 +- name: Inferring unobserved variables (e.g., gender/style) with LLMs + startOffset: 3278 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3278 + endOffset: 3494 +- name: CausalBert demo and code note (PyData Berlin talk) + startOffset: 3494 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3494 + endOffset: 3573 +- name: 'Causal ML without experiments: partial identification & sensitivity' + startOffset: 3573 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3573 + endOffset: 3843 +- name: 'Causal graphs and nonparametric identification: minimal observables' + startOffset: 3843 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3843 + endOffset: 3967 +- name: 'Recommended resources: The Book of Why, Molak’s book & GitHub' + startOffset: 3967 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3967 + endOffset: 4048 +- name: Closing remarks and next steps + startOffset: 4048 + url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=4048 + endOffset: 3998 + transcript: - header: Episode Introduction - header: 'Guest Intro: Aleksander Molak & book overview' @@ -359,7 +487,7 @@ transcript: sec: 1244 time: '20:44' who: Alexey -- header: 'Meta-learners overview: T‑learner and counterfactual estimation' +- header: 'Meta-learners overview: T-learner and counterfactual estimation' - line: That's a great question. You are correct. Out of the box, supervised models do not have the capabilities to reason causally and there are many different types of causal models. But the one that I think is relatively the easiest to to grasp, @@ -1172,143 +1300,6 @@ transcript: sec: 4080 time: '1:08:00' who: Alexey -description: 'Discover Causal ML counterfactuals and uplift (CATE): actionable debiasing, - targeting strategies, policy evaluation and deployment tips to boost ROI.' -intro: 'How do you move from correlation to actionable decisions — using counterfactuals, - uplift (CATE), A/B testing and LLMs — without getting misled by confounders or biased - estimators? In this episode, Aleksander Molak, an independent ML researcher, author - and educator specializing in causality, NLP and AI strategy (and author of a dyslexia - prediction project), walks through practical causal ML techniques and real-world - tradeoffs.

We cover foundational ideas — counterfactuals and Judea Pearl’s - intervention view — then meta-learners (T‑learner), Conditional Average Treatment - Effect (CATE) estimation, uplift modeling and when A/B tests or causal feature selection - are needed to achieve unconfoundedness. Aleksander discusses deployment risks and - debiasing approaches (double/triple ML), refutation tests for estimator quality, - causal discovery and cost–benefit tradeoffs that revealed wasted marketing spend. - He also shows how LLMs fit into causal workflows: feature extraction, scoring text - as outcome, text as treatment or confounder, inferring unobserved variables and - a CausalBert demo. Listeners will come away with practical guidance on building, - evaluating and validating causal ML systems, plus recommended resources and code - to start applying these methods.' -dateadded: '2023-09-10' -duration: PT01H06M38S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=0 - endOffset: 82 -- name: 'Guest Intro: Aleksander Molak & book overview' - startOffset: 82 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=82 - endOffset: 126 -- name: Career highlights and dyslexia prediction project - startOffset: 126 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=126 - endOffset: 375 -- name: 'Causal advocacy: democratizing causal thinking' - startOffset: 375 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=375 - endOffset: 451 -- name: 'Association vs causation: limits of correlational reasoning' - startOffset: 451 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=451 - endOffset: 535 -- name: 'Illustrative confounders: race example and ice cream–drowning' - startOffset: 535 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=535 - endOffset: 761 -- name: 'Predictive ML vs decision-making: Zillow and IID assumptions' - startOffset: 761 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=761 - endOffset: 936 -- name: 'Counterfactuals in practice: marketing and recommender systems' - startOffset: 936 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=936 - endOffset: 1095 -- name: Counterfactuals defined and Judea Pearl’s intervention view - startOffset: 1095 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1095 - endOffset: 1282 -- name: 'Meta-learners overview: T‑learner and counterfactual estimation' - startOffset: 1282 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1282 - endOffset: 1464 -- name: Conditional Average Treatment Effect (CATE) estimation - startOffset: 1464 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1464 - endOffset: 1576 -- name: 'Achieving unconfoundedness: A/B tests vs causal feature selection' - startOffset: 1576 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1576 - endOffset: 1672 -- name: Targeting decisions from uplift estimates - startOffset: 1672 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1672 - endOffset: 1757 -- name: Deployment risks and debiasing estimators (double/triple ML) - startOffset: 1757 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1757 - endOffset: 1960 -- name: 'Uplift modeling: policy evaluation and business metrics' - startOffset: 1960 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1960 - endOffset: 1994 -- name: 'Evaluating causal models: refutation tests and estimator quality' - startOffset: 1994 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=1994 - endOffset: 2257 -- name: Causal discovery and heterogeneous treatment effects (book coverage) - startOffset: 2257 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2257 - endOffset: 2334 -- name: 'Cost–benefit of causal models: complexity vs value' - startOffset: 2334 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2334 - endOffset: 2474 -- name: 'Real-world impact: discovering wasted marketing spend' - startOffset: 2474 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2474 - endOffset: 2605 -- name: 'Incremental rollout: A/B testing as validation baseline' - startOffset: 2605 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2605 - endOffset: 2666 -- name: 'LLMs in causal workflows: feature extraction and scoring' - startOffset: 2666 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2666 - endOffset: 2814 -- name: 'Text as outcome: using LLMs to score experimental text' - startOffset: 2814 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2814 - endOffset: 2957 -- name: 'Text as treatment/confounder: style extraction and embeddings' - startOffset: 2957 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=2957 - endOffset: 3278 -- name: Inferring unobserved variables (e.g., gender/style) with LLMs - startOffset: 3278 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3278 - endOffset: 3494 -- name: CausalBert demo and code note (PyData Berlin talk) - startOffset: 3494 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3494 - endOffset: 3573 -- name: 'Causal ML without experiments: partial identification & sensitivity' - startOffset: 3573 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3573 - endOffset: 3843 -- name: 'Causal graphs and nonparametric identification: minimal observables' - startOffset: 3843 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3843 - endOffset: 3967 -- name: 'Recommended resources: The Book of Why, Molak’s book & GitHub' - startOffset: 3967 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=3967 - endOffset: 4048 -- name: Closing remarks and next steps - startOffset: 4048 - url: https://www.youtube.com/watch?v=0I2FHH95Ofs&t=4048 - endOffset: 3998 --- Links: diff --git a/_podcast/s04e09-chief-data-officer.md b/_podcast/chief-data-officer-data-strategy-and-org-design.md similarity index 97% rename from _podcast/s04e09-chief-data-officer.md rename to _podcast/chief-data-officer-data-strategy-and-org-design.md index c6602984..63ef1827 100644 --- a/_podcast/s04e09-chief-data-officer.md +++ b/_podcast/chief-data-officer-data-strategy-and-org-design.md @@ -1,11 +1,11 @@ --- -title: 'Mastering the Chief Data Officer Role: Build Data Strategy, Org Design & AI' -short: Chief Data Officer -guests: -- marcodesa -image: images/podcast/s04e09-chief-data-officer.jpg +title: "Mastering the Chief Data Officer Role: Build Data Strategy, Org Design & AI" +short: "Chief Data Officer" season: 4 episode: 9 +guests: +- marcodesa +image: images/podcast/chief-data-officer-data-strategy-and-org-design.jpg ids: youtube: IdaZOD46FEw anchor: Chief-Data-Officer---Marco-De-Sa-e16hm4t @@ -14,6 +14,131 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Chief-Data-Officer---Marco-De-Sa-e16hm4t spotify: https://open.spotify.com/episode/64lEB0Wv0a6DfkDi672Ulk apple: https://podcasts.apple.com/us/podcast/chief-data-officer-marco-de-sa/id1541710331?i=1000533326308 + +description: "Learn how Chief Data Officers build data strategy, org design and roadmaps—get tactics on governance, KPIs, delegation, career growth and remote leadership" +intro: "How do you move from head of data to an effective Chief Data Officer who builds strategy, designs the org, and delivers AI? In this episode, Marco De Sa — CDO at OLX Group with prior data leadership roles at Yahoo, Facebook, Twitter, and Spotify — lays out what modern data leadership really requires.

We explore the evolving CDO scope: data strategy, governance, and AI; balancing vision versus tactics; and future-proofing data collection for tomorrow’s products. Marco breaks down organizational design and delegation — when to hire multiple VPs, how CDO responsibilities differ from VP, CTO and CPO roles, and how to structure reporting lines. Practical topics include working backwards from goals to data platform and machine learning investment, measuring progress with meaningful KPIs, time management and productivity for senior data leaders, and managing distributed teams.

Listeners will walk away with concrete frameworks for data strategy, org design, and building a data-driven culture, plus career guidance for aspiring CDOs on technical breadth, soft skills, interviewing, and overcoming resistance with evidence-based persuasion. Ideal for data leaders and executives shaping data strategy, governance, and AI roadmaps." +topics: +- data strategy +- data governance +- AI +- leadership +- career growth +- communication +- team building +dateadded: 2021-08-29 + +duration: PT01H01M51S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=0 + endOffset: 78 +- name: 'Guest Overview: Marco''s Career & Roles' + startOffset: 78 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=78 + endOffset: 203 +- name: 'Industry Experience: Yahoo, Facebook, Twitter, Spotify' + startOffset: 203 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=203 + endOffset: 341 +- name: Transition to OLX Group and CDO Appointment + startOffset: 341 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=341 + endOffset: 368 +- name: 'Chief Data Officer Scope: Data Strategy, Governance, AI' + startOffset: 368 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=368 + endOffset: 437 +- name: Traditional Responsibilities vs Modern CDO Expectations + startOffset: 437 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=437 + endOffset: 619 +- name: 'Future-Proofing Data: Collecting for Tomorrow''s Products' + startOffset: 619 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=619 + endOffset: 700 +- name: Delegation and Organisational Design for Data Leadership + startOffset: 700 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=700 + endOffset: 864 +- name: 'Career Progression: From Head of Data to CDO' + startOffset: 864 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=864 + endOffset: 1057 +- name: 'Strategy vs Tactics: Vision, KPIs, and Execution' + startOffset: 1057 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1057 + endOffset: 1217 +- name: 'CDO vs VP of Data: Scope, Influence, and Proactivity' + startOffset: 1217 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1217 + endOffset: 1495 +- name: 'Structuring a Data Org: Multiple VPs and Reporting Lines' + startOffset: 1495 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1495 + endOffset: 1571 +- name: 'Splitting Work: CDO Responsibilities vs VP Execution' + startOffset: 1571 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1571 + endOffset: 1682 +- name: Differentiating CTO, CPO, and CDO Roles + startOffset: 1682 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1682 + endOffset: 1910 +- name: 'Working Backwards: Goals to Data Platform & ML Investment' + startOffset: 1910 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1910 + endOffset: 2083 +- name: 'Measuring Progress: Metrics, Accountability, and Visibility' + startOffset: 2083 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2083 + endOffset: 2205 +- name: 'Meeting Load: Time Management for Senior Data Leaders' + startOffset: 2205 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2205 + endOffset: 2396 +- name: 'Productivity Practices: Documentation, Async, and Slack' + startOffset: 2396 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2396 + endOffset: 2522 +- name: 'Building a Data-Driven Culture: Democratization & Usability' + startOffset: 2522 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2522 + endOffset: 2652 +- name: 'Remote Leadership: Challenges of Managing Distributed Teams' + startOffset: 2652 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2652 + endOffset: 2884 +- name: 'Technical Skills for CDOs: Breadth vs Depth (ML, SQL, Engineering)' + startOffset: 2884 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2884 + endOffset: 3020 +- name: 'Business Education: MBA Value for Executive Data Roles' + startOffset: 3020 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3020 + endOffset: 3138 +- name: 'Essential Soft Skills: Communication, Empathy, Influence' + startOffset: 3138 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3138 + endOffset: 3256 +- name: 'OLX Group Challenges: Geographic, Product, and Tech Complexity' + startOffset: 3256 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3256 + endOffset: 3396 +- name: 'Interviewing for CDO: Demonstrating Strategic Thinking' + startOffset: 3396 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3396 + endOffset: 3580 +- name: 'Overcoming Resistance: Persuasion, Evidence, and Constraints' + startOffset: 3580 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3580 + endOffset: 3744 +- name: Closing Remarks and Key Takeaways + startOffset: 3744 + url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3744 + endOffset: 3711 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Marco''s Career & Roles' @@ -996,132 +1121,4 @@ transcript: sec: 3789 time: '1:03:09' who: Marco -description: Learn how Chief Data Officers build data strategy, org design and roadmaps—get - tactics on governance, KPIs, delegation, career growth and remote leadership. -intro: 'How do you move from head of data to an effective Chief Data Officer who builds - strategy, designs the org, and delivers AI? In this episode, Marco De Sa — CDO at - OLX Group with prior data leadership roles at Yahoo, Facebook, Twitter, and Spotify - — lays out what modern data leadership really requires.

We explore the - evolving CDO scope: data strategy, governance, and AI; balancing vision versus tactics; - and future-proofing data collection for tomorrow’s products. Marco breaks down organizational - design and delegation — when to hire multiple VPs, how CDO responsibilities differ - from VP, CTO and CPO roles, and how to structure reporting lines. Practical topics - include working backwards from goals to data platform and machine learning investment, - measuring progress with meaningful KPIs, time management and productivity for senior - data leaders, and managing distributed teams.

Listeners will walk away - with concrete frameworks for data strategy, org design, and building a data-driven - culture, plus career guidance for aspiring CDOs on technical breadth, soft skills, - interviewing, and overcoming resistance with evidence-based persuasion. Ideal for - data leaders and executives shaping data strategy, governance, and AI roadmaps.' -dateadded: '2021-08-29' -duration: PT01H01M51S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=0 - endOffset: 78 -- name: 'Guest Overview: Marco''s Career & Roles' - startOffset: 78 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=78 - endOffset: 203 -- name: 'Industry Experience: Yahoo, Facebook, Twitter, Spotify' - startOffset: 203 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=203 - endOffset: 341 -- name: Transition to OLX Group and CDO Appointment - startOffset: 341 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=341 - endOffset: 368 -- name: 'Chief Data Officer Scope: Data Strategy, Governance, AI' - startOffset: 368 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=368 - endOffset: 437 -- name: Traditional Responsibilities vs Modern CDO Expectations - startOffset: 437 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=437 - endOffset: 619 -- name: 'Future-Proofing Data: Collecting for Tomorrow''s Products' - startOffset: 619 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=619 - endOffset: 700 -- name: Delegation and Organisational Design for Data Leadership - startOffset: 700 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=700 - endOffset: 864 -- name: 'Career Progression: From Head of Data to CDO' - startOffset: 864 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=864 - endOffset: 1057 -- name: 'Strategy vs Tactics: Vision, KPIs, and Execution' - startOffset: 1057 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1057 - endOffset: 1217 -- name: 'CDO vs VP of Data: Scope, Influence, and Proactivity' - startOffset: 1217 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1217 - endOffset: 1495 -- name: 'Structuring a Data Org: Multiple VPs and Reporting Lines' - startOffset: 1495 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1495 - endOffset: 1571 -- name: 'Splitting Work: CDO Responsibilities vs VP Execution' - startOffset: 1571 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1571 - endOffset: 1682 -- name: Differentiating CTO, CPO, and CDO Roles - startOffset: 1682 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1682 - endOffset: 1910 -- name: 'Working Backwards: Goals to Data Platform & ML Investment' - startOffset: 1910 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=1910 - endOffset: 2083 -- name: 'Measuring Progress: Metrics, Accountability, and Visibility' - startOffset: 2083 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2083 - endOffset: 2205 -- name: 'Meeting Load: Time Management for Senior Data Leaders' - startOffset: 2205 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2205 - endOffset: 2396 -- name: 'Productivity Practices: Documentation, Async, and Slack' - startOffset: 2396 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2396 - endOffset: 2522 -- name: 'Building a Data-Driven Culture: Democratization & Usability' - startOffset: 2522 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2522 - endOffset: 2652 -- name: 'Remote Leadership: Challenges of Managing Distributed Teams' - startOffset: 2652 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2652 - endOffset: 2884 -- name: 'Technical Skills for CDOs: Breadth vs Depth (ML, SQL, Engineering)' - startOffset: 2884 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=2884 - endOffset: 3020 -- name: 'Business Education: MBA Value for Executive Data Roles' - startOffset: 3020 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3020 - endOffset: 3138 -- name: 'Essential Soft Skills: Communication, Empathy, Influence' - startOffset: 3138 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3138 - endOffset: 3256 -- name: 'OLX Group Challenges: Geographic, Product, and Tech Complexity' - startOffset: 3256 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3256 - endOffset: 3396 -- name: 'Interviewing for CDO: Demonstrating Strategic Thinking' - startOffset: 3396 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3396 - endOffset: 3580 -- name: 'Overcoming Resistance: Persuasion, Evidence, and Constraints' - startOffset: 3580 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3580 - endOffset: 3744 -- name: Closing Remarks and Key Takeaways - startOffset: 3744 - url: https://www.youtube.com/watch?v=IdaZOD46FEw&t=3744 - endOffset: 3711 --- diff --git a/_podcast/s03e10-data-governance.md b/_podcast/cloud-data-governance.md similarity index 96% rename from _podcast/s03e10-data-governance.md rename to _podcast/cloud-data-governance.md index 3f89b6c5..400299f3 100644 --- a/_podcast/s03e10-data-governance.md +++ b/_podcast/cloud-data-governance.md @@ -1,13 +1,12 @@ --- -title: 'How to Build Data Governance in the Cloud: Classification, Catalogs, Policies - & ROI' -short: Data Governance +title: "How to Build Data Governance in the Cloud: Classification, Catalogs, Policies & ROI" +short: "Data Governance" +season: 3 +episode: 10 guests: - jessiashdown - urigilad -image: images/podcast/s03e10-data-governance.jpg -season: 3 -episode: 10 +image: images/podcast/cloud-data-governance.jpg ids: youtube: tJ3v8h7A7RY anchor: Data-Governance---Jessi-Ashdown--Uri-Gilad-e12jmoo @@ -16,6 +15,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Governance---Jessi-Ashdown--Uri-Gilad-e12jmoo spotify: https://open.spotify.com/episode/2zaLMrgbIgVkVEWY09b1Wn apple: https://podcasts.apple.com/us/podcast/data-governance-jessi-ashdown-uri-gilad/id1541710331?i=1000525176805 + +description: "Learn data governance in the cloud: build data classification, catalogs & policies, automate tagging, add stewards and measure ROI for trusted, compliant data" +intro: "How do you build data governance in the cloud that enables access, meets regulation, and demonstrates ROI? In this episode, Jessi Ashdown, Senior UX Researcher for Google Cloud, and Uri Gilad, Product Manager for Data Governance at Google Cloud, walk through practical approaches to data governance in the cloud—grounded in real user research and product experience.

They define governance beyond security and PII, explain how GDPR and high-profile events like Cambridge Analytica accelerated adoption, and outline the core components: people, processes, and tools. Key topics include data classification and taxonomy, building scalable data catalogs versus spreadsheets, policy design (retention, freshness, purpose-based access), enforcement models, and access workflows. They cover roles such as data stewards and producers, data quality signals, automation (tagging and requests), and what to measure for ROI—catalog metrics, cost versus usage, and compliance value. You’ll also hear an MVP strategy for minimum viable governance and what to include in a catalog (technical metadata, lineage, business glossary).

Listen to gain actionable steps to scope a cloud data governance program, prioritize by the “why,” and implement classification, catalogs, and policies that balance control and democratized access." +topics: +- data governance +- data compliance +- cloud +dateadded: 2021-06-13 + +duration: PT00H58M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=0 + endOffset: 209 +- name: 'Guest Background — Jessi: UX Researcher & Data Governance at Google Cloud' + startOffset: 209 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=209 + endOffset: 286 +- name: 'Guest Background — Uri: Product Management & Data Governance Experience' + startOffset: 286 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=286 + endOffset: 400 +- name: 'Defining Data Governance: Beyond Security and PII' + startOffset: 400 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=400 + endOffset: 537 +- name: Cloud & Regulation Driving Governance Adoption (GDPR, Cambridge Analytica) + startOffset: 537 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=537 + endOffset: 844 +- name: 'Core Components: People, Processes, Tools and Cataloging' + startOffset: 844 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=844 + endOffset: 933 +- name: 'Practical Implementation: Classify Data and Establish Policies' + startOffset: 933 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=933 + endOffset: 1180 +- name: 'Assessing Necessity: When Governance Can Be Minimal' + startOffset: 1180 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1180 + endOffset: 1380 +- name: 'Prioritization: Start with the "Why" to Scope Your Program' + startOffset: 1380 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1380 + endOffset: 1454 +- name: 'Data Classification & Taxonomy: Defining Meaningful Data Classes' + startOffset: 1454 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1454 + endOffset: 1668 +- name: 'Tools vs Spreadsheets: Scalable Data Catalog Approaches' + startOffset: 1668 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1668 + endOffset: 1820 +- name: 'Aligning Storage and Systems: Make Data Work for You' + startOffset: 1820 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1820 + endOffset: 1983 +- name: 'Human Roles: Data Stewards, Producers and Decision Makers' + startOffset: 1983 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1983 + endOffset: 2099 +- name: 'Data Quality: Trust Signals, Source, and Measurable Checks' + startOffset: 2099 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2099 + endOffset: 2305 +- name: 'Policy Design: Retention, Freshness and Purpose-based Access' + startOffset: 2305 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2305 + endOffset: 2524 +- name: 'Policies as Enablement: Guardrails for Democratized Data Access' + startOffset: 2524 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2524 + endOffset: 2704 +- name: 'Enforcement Models: Catalog Interfaces vs Storage Control Plane' + startOffset: 2704 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2704 + endOffset: 2822 +- name: 'Access Workflows: Request/Approval "Shopping Cart" Experience' + startOffset: 2822 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2822 + endOffset: 2855 +- name: 'Governance Tools & Platforms: Dataplex, Collibra and Integrations' + startOffset: 2855 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2855 + endOffset: 2930 +- name: 'Automation: Tagging, Requests and Reducing Manual Effort' + startOffset: 2930 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2930 + endOffset: 3019 +- name: 'Measuring ROI: Catalog Metrics, Cost vs Usage and Compliance Value' + startOffset: 3019 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3019 + endOffset: 3201 +- name: 'MVP Strategy: Minimum Viable Governance and Future-proofing' + startOffset: 3201 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3201 + endOffset: 3277 +- name: 'Data Catalog Contents: Technical Metadata, Lineage and Business Glossary' + startOffset: 3277 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3277 + endOffset: 3466 +- name: 'Governance Scope: Why It Extends Beyond the Catalog' + startOffset: 3466 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3466 + endOffset: 3544 +- name: Closing Remarks, Contact Links and Next Steps + startOffset: 3544 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3544 + endOffset: 3560 +- name: 'Recommended Resource: Data Governance (O''Reilly Book)' + startOffset: 3560 + url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3560 + endOffset: 3489 + transcript: - header: Podcast Introduction - line: This week we will talk about data governance. We have two special guests. @@ -859,132 +975,6 @@ transcript: sec: 3640 time: '1:00:40' who: Uri -description: 'Learn data governance in the cloud: build data classification, catalogs - & policies, automate tagging, add stewards and measure ROI for trusted, compliant - data' -intro: 'How do you build data governance in the cloud that enables access, meets regulation, - and demonstrates ROI? In this episode, Jessi Ashdown, Senior UX Researcher for Google - Cloud, and Uri Gilad, Product Manager for Data Governance at Google Cloud, walk - through practical approaches to data governance in the cloud—grounded in real user - research and product experience.

They define governance beyond security - and PII, explain how GDPR and high-profile events like Cambridge Analytica accelerated - adoption, and outline the core components: people, processes, and tools. Key topics - include data classification and taxonomy, building scalable data catalogs versus - spreadsheets, policy design (retention, freshness, purpose-based access), enforcement - models, and access workflows. They cover roles such as data stewards and producers, - data quality signals, automation (tagging and requests), and what to measure for - ROI—catalog metrics, cost versus usage, and compliance value. You’ll also hear an - MVP strategy for minimum viable governance and what to include in a catalog (technical - metadata, lineage, business glossary).

Listen to gain actionable steps - to scope a cloud data governance program, prioritize by the “why,” and implement - classification, catalogs, and policies that balance control and democratized access.' -dateadded: '2021-06-13' -duration: PT00H58M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=0 - endOffset: 209 -- name: 'Guest Background — Jessi: UX Researcher & Data Governance at Google Cloud' - startOffset: 209 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=209 - endOffset: 286 -- name: 'Guest Background — Uri: Product Management & Data Governance Experience' - startOffset: 286 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=286 - endOffset: 400 -- name: 'Defining Data Governance: Beyond Security and PII' - startOffset: 400 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=400 - endOffset: 537 -- name: Cloud & Regulation Driving Governance Adoption (GDPR, Cambridge Analytica) - startOffset: 537 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=537 - endOffset: 844 -- name: 'Core Components: People, Processes, Tools and Cataloging' - startOffset: 844 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=844 - endOffset: 933 -- name: 'Practical Implementation: Classify Data and Establish Policies' - startOffset: 933 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=933 - endOffset: 1180 -- name: 'Assessing Necessity: When Governance Can Be Minimal' - startOffset: 1180 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1180 - endOffset: 1380 -- name: 'Prioritization: Start with the "Why" to Scope Your Program' - startOffset: 1380 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1380 - endOffset: 1454 -- name: 'Data Classification & Taxonomy: Defining Meaningful Data Classes' - startOffset: 1454 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1454 - endOffset: 1668 -- name: 'Tools vs Spreadsheets: Scalable Data Catalog Approaches' - startOffset: 1668 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1668 - endOffset: 1820 -- name: 'Aligning Storage and Systems: Make Data Work for You' - startOffset: 1820 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1820 - endOffset: 1983 -- name: 'Human Roles: Data Stewards, Producers and Decision Makers' - startOffset: 1983 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=1983 - endOffset: 2099 -- name: 'Data Quality: Trust Signals, Source, and Measurable Checks' - startOffset: 2099 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2099 - endOffset: 2305 -- name: 'Policy Design: Retention, Freshness and Purpose-based Access' - startOffset: 2305 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2305 - endOffset: 2524 -- name: 'Policies as Enablement: Guardrails for Democratized Data Access' - startOffset: 2524 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2524 - endOffset: 2704 -- name: 'Enforcement Models: Catalog Interfaces vs Storage Control Plane' - startOffset: 2704 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2704 - endOffset: 2822 -- name: 'Access Workflows: Request/Approval "Shopping Cart" Experience' - startOffset: 2822 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2822 - endOffset: 2855 -- name: 'Governance Tools & Platforms: Dataplex, Collibra and Integrations' - startOffset: 2855 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2855 - endOffset: 2930 -- name: 'Automation: Tagging, Requests and Reducing Manual Effort' - startOffset: 2930 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=2930 - endOffset: 3019 -- name: 'Measuring ROI: Catalog Metrics, Cost vs Usage and Compliance Value' - startOffset: 3019 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3019 - endOffset: 3201 -- name: 'MVP Strategy: Minimum Viable Governance and Future-proofing' - startOffset: 3201 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3201 - endOffset: 3277 -- name: 'Data Catalog Contents: Technical Metadata, Lineage and Business Glossary' - startOffset: 3277 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3277 - endOffset: 3466 -- name: 'Governance Scope: Why It Extends Beyond the Catalog' - startOffset: 3466 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3466 - endOffset: 3544 -- name: Closing Remarks, Contact Links and Next Steps - startOffset: 3544 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3544 - endOffset: 3560 -- name: 'Recommended Resource: Data Governance (O''Reilly Book)' - startOffset: 3560 - url: https://www.youtube.com/watch?v=tJ3v8h7A7RY&t=3560 - endOffset: 3489 --- Links: diff --git a/_podcast/s18e05-community-building-and-teaching-in-ai-tech.md b/_podcast/community-building-and-teaching-in-ai-tech.md similarity index 89% rename from _podcast/s18e05-community-building-and-teaching-in-ai-tech.md rename to _podcast/community-building-and-teaching-in-ai-tech.md index e0cfe171..dfc632fc 100644 --- a/_podcast/s18e05-community-building-and-teaching-in-ai-tech.md +++ b/_podcast/community-building-and-teaching-in-ai-tech.md @@ -1,20 +1,142 @@ --- +title: "Community Building and Teaching in AI & Tech: Project-to-Course Model for AI Education" +short: "Community Building and Teaching in AI & Tech" +season: 18 episode: 5 guests: - erumafzal +image: images/podcast/community-building-and-teaching-in-ai-tech.jpg ids: - anchor: lub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r + anchor: datatalksclub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r youtube: 7SLd5V7z3xQ -image: images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Community-Building-and-Teaching-in-AI--Tech---Erum-Afzal-e2jg61r apple: https://podcasts.apple.com/us/podcast/community-building-and-teaching-in-ai-tech-erum-afzal/id1541710331?i=1000655187649 spotify: https://open.spotify.com/episode/4iAvz4Qu0l28fxXvaHdAPj?si=7MdKKu1fTrqxIGPQBT61Ag youtube: https://www.youtube.com/watch?v=7SLd5V7z3xQ -season: 18 -short: Community Building and Teaching in AI & Tech -title: 'Omdena Academy: Project-to-Course AI Education for Data Science Careers & - Instructors' +description: "Discover Omdena's project-to-course model for AI education and community building—master curriculum, instructor pipeline, and pathways to join real AI projects." +topics: +- AI +- NLP +- data science +- career growth +- leadership +- community building +- teaching +intro: "How can communities turn real-world AI projects into repeatable courses that scale learning and careers? In this episode Erum Afzal — lead ML engineer, PhD researcher in AI for teacher training, and head of Omdena Academy — walks through a project-to-course model for AI education rooted in community collaboration.

We cover Omdena’s evolution from global, problem-focused projects into structured courses, the design of foundational data science curricula (Python, Pandas, NumPy, NLP), and the practical steps for course development: instructor application, content review, delivery, and engagement strategies (live sessions, selection, graduation). Erum explains the instructor pipeline and open applications, access pathways for learners, the academy’s free-course business model with organizational partnerships, and options for monetization or volunteer teaching.

Listeners will get concrete guidance on curriculum tiers (basic to advanced), community growth tactics (start small, empower sub-communities), ethical concerns around hiring integrity and responsible ChatGPT use, and where to apply to teach (Omdena.com/Omdena-Academy). This episode is for educators, community builders, and early-career practitioners who want actionable models for teaching, curriculum design, and building inclusive AI learning communities." +dateadded: 2024-05-12 +duration: PT00H57M03S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=0 + endOffset: 85 +- name: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' + startOffset: 85 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=85 + endOffset: 152 +- name: 'Background: Journey from Pakistan to PhD & community teaching' + startOffset: 152 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=152 + endOffset: 303 +- name: 'Omdena Academy: Evolution from projects to structured courses' + startOffset: 303 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=303 + endOffset: 364 +- name: 'Omdena Projects: Global collaborators solving real-world AI problems' + startOffset: 364 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=364 + endOffset: 619 +- name: 'Project-to-Course Model: Teaching skills learned from projects' + startOffset: 619 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=619 + endOffset: 692 +- name: 'Foundational Data Science Courses: Python, Pandas, NumPy, NLP' + startOffset: 692 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=692 + endOffset: 872 +- name: 'Course Development: Instructor application, content review, delivery' + startOffset: 872 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=872 + endOffset: 952 +- name: 'Business Model: Free learner courses with organizational partnerships' + startOffset: 952 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=952 + endOffset: 1058 +- name: 'Access Pathways: Enroll without prior Omdena membership; pathway to projects' + startOffset: 1058 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1058 + endOffset: 1248 +- name: 'Instructor Pipeline: Open applications and project-based recruitment' + startOffset: 1248 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1248 + endOffset: 1349 +- name: 'Course Engagement: Live sessions, selection process, graduation rates' + startOffset: 1349 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1349 + endOffset: 1458 +- name: 'Selection & Motivation: Prereqs, availability, and incentive programs' + startOffset: 1458 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1458 + endOffset: 1600 +- name: 'Roles & Responsibilities: Teaching focus vs community management' + startOffset: 1600 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1600 + endOffset: 1788 +- name: 'Leadership Development: Network-building and taking initiative' + startOffset: 1788 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1788 + endOffset: 1983 +- name: 'Community Growth Strategy: Start small and scale (AI Wonder Girl example)' + startOffset: 1983 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1983 + endOffset: 2246 +- name: 'Communities for Career Building: Skill discovery and rapid learning' + startOffset: 2246 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2246 + endOffset: 2400 +- name: 'Empowering Sub-communities: Regional chapters, branding, ethics' + startOffset: 2400 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2400 + endOffset: 2570 +- name: 'Boosting Attendance: Clear takeaways and live event value' + startOffset: 2570 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2570 + endOffset: 2793 +- name: 'Curriculum Design: Basic, intermediate, and advanced course tiers' + startOffset: 2793 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2793 + endOffset: 2900 +- name: 'Talent Market Dynamics: Standing out amid data science competition' + startOffset: 2900 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2900 + endOffset: 3056 +- name: 'Hiring Integrity & Tools: Originality, plagiarism, and responsible ChatGPT + use' + startOffset: 3056 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3056 + endOffset: 3147 +- name: 'How to Apply: Becoming an Omdena Academy instructor (Omdena.com/Omdena-Academy)' + startOffset: 3147 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3147 + endOffset: 3203 +- name: 'Monetization Options: Volunteer teaching vs selling courses on platforms' + startOffset: 3203 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3203 + endOffset: 3289 +- name: 'Access & Scholarship Resources: Courses, GitHub projects, and women-focused + support' + startOffset: 3289 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3289 + endOffset: 3399 +- name: 'Recommended Readings: AI ethics newsletter and curated resources' + startOffset: 3399 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3399 + endOffset: 3466 +- name: Episode Wrap-Up & Closing Remarks + startOffset: 3466 + url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3466 + endOffset: 3423 transcript: - header: Podcast Introduction - header: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' @@ -106,7 +228,7 @@ transcript: sec: 328 time: '5:28' who: Alexey -- header: 'Omdena Projects: Global collaborators solving real‑world AI problems' +- header: 'Omdena Projects: Global collaborators solving real-world AI problems' - line: Omdena is a global community that started in 2019. Every participant is called a "collaborator." We solve real-world challenges with AI. We started with a mental assistive application in 2019, then projects like finding anomalies on Mars. Our @@ -518,7 +640,7 @@ transcript: sec: 3269 time: '54:29' who: Alexey -- header: 'Access & Scholarship Resources: Courses, GitHub projects, and women‑focused +- header: 'Access & Scholarship Resources: Courses, GitHub projects, and women-focused support' - line: Join AI communities. There are courses available, and sometimes scholarships on platforms like Coursera. Learning through projects on GitHub is also valuable. @@ -557,140 +679,17 @@ transcript: sec: 3508 time: '58:28' who: Alexey -description: 'Discover Omdena Academy''s project-to-course AI education for data science: - learn Python, NLP, instructor paths, and gain real-world project experience.' -intro: 'How do you turn real-world AI project experience into repeatable courses that - launch data science careers and train instructors? In this episode, Erum Afzal — - lead ML engineer, Teaching Expert at Women in AI Academy, and PhD researcher in - AI for teacher training — explains how Omdena Academy evolved from collaborative - projects into a project-to-course model for AI education.

We cover the - Academy’s shift from global Omdena projects to structured data science courses, - foundational topics taught (Python, Pandas, NumPy, NLP), and the process for developing - courses: instructor application, content review, delivery, and evaluation. Erum - outlines access pathways—enrolling without prior Omdena membership, pathways into - projects, and an open instructor pipeline—plus community and leadership development - through regional chapters and sub-communities. You’ll hear about curriculum tiers - (basic to advanced), boosting engagement with live sessions, and maintaining hiring - integrity amid plagiarism and responsible ChatGPT use. Practical details include - how to apply (Omdena.com/Omdena-Academy), scholarship and GitHub resources, and - options for instructors to volunteer or monetize content.

Listen to learn - actionable steps for joining, teaching, or designing project-based data science - courses that prepare learners for careers in AI.' -dateadded: '2024-05-12' -duration: PT00H57M03S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=0 - endOffset: 85 -- name: 'Guest Introduction: Erum Afzal — AI for education & Omdena Academy' - startOffset: 85 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=85 - endOffset: 152 -- name: 'Background: Journey from Pakistan to PhD & community teaching' - startOffset: 152 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=152 - endOffset: 303 -- name: 'Omdena Academy: Evolution from projects to structured courses' - startOffset: 303 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=303 - endOffset: 364 -- name: 'Omdena Projects: Global collaborators solving real‑world AI problems' - startOffset: 364 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=364 - endOffset: 619 -- name: 'Project-to-Course Model: Teaching skills learned from projects' - startOffset: 619 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=619 - endOffset: 692 -- name: 'Foundational Data Science Courses: Python, Pandas, NumPy, NLP' - startOffset: 692 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=692 - endOffset: 872 -- name: 'Course Development: Instructor application, content review, delivery' - startOffset: 872 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=872 - endOffset: 952 -- name: 'Business Model: Free learner courses with organizational partnerships' - startOffset: 952 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=952 - endOffset: 1058 -- name: 'Access Pathways: Enroll without prior Omdena membership; pathway to projects' - startOffset: 1058 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1058 - endOffset: 1248 -- name: 'Instructor Pipeline: Open applications and project-based recruitment' - startOffset: 1248 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1248 - endOffset: 1349 -- name: 'Course Engagement: Live sessions, selection process, graduation rates' - startOffset: 1349 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1349 - endOffset: 1458 -- name: 'Selection & Motivation: Prereqs, availability, and incentive programs' - startOffset: 1458 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1458 - endOffset: 1600 -- name: 'Roles & Responsibilities: Teaching focus vs community management' - startOffset: 1600 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1600 - endOffset: 1788 -- name: 'Leadership Development: Network-building and taking initiative' - startOffset: 1788 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1788 - endOffset: 1983 -- name: 'Community Growth Strategy: Start small and scale (AI Wonder Girl example)' - startOffset: 1983 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=1983 - endOffset: 2246 -- name: 'Communities for Career Building: Skill discovery and rapid learning' - startOffset: 2246 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2246 - endOffset: 2400 -- name: 'Empowering Sub-communities: Regional chapters, branding, ethics' - startOffset: 2400 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2400 - endOffset: 2570 -- name: 'Boosting Attendance: Clear takeaways and live event value' - startOffset: 2570 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2570 - endOffset: 2793 -- name: 'Curriculum Design: Basic, intermediate, and advanced course tiers' - startOffset: 2793 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2793 - endOffset: 2900 -- name: 'Talent Market Dynamics: Standing out amid data science competition' - startOffset: 2900 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=2900 - endOffset: 3056 -- name: 'Hiring Integrity & Tools: Originality, plagiarism, and responsible ChatGPT - use' - startOffset: 3056 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3056 - endOffset: 3147 -- name: 'How to Apply: Becoming an Omdena Academy instructor (Omdena.com/Omdena-Academy)' - startOffset: 3147 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3147 - endOffset: 3203 -- name: 'Monetization Options: Volunteer teaching vs selling courses on platforms' - startOffset: 3203 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3203 - endOffset: 3289 -- name: 'Access & Scholarship Resources: Courses, GitHub projects, and women‑focused - support' - startOffset: 3289 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3289 - endOffset: 3399 -- name: 'Recommended Readings: AI ethics newsletter and curated resources' - startOffset: 3399 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3399 - endOffset: 3466 -- name: Episode Wrap-Up & Closing Remarks - startOffset: 3466 - url: https://www.youtube.com/watch?v=7SLd5V7z3xQ&t=3466 - endOffset: 3423 ---- +context: 'Context: This episode follows Erum Afzal and Omdena Academy’s evolution—how + global, project-based AI collaborations and community organizing were systematized + into accessible, tiered courses and local chapters to teach practical, ethical AI + skills. + Core theme: The unifying idea is that democratizing real-world AI expertise requires + a community-first, project-to-course approach—turning collaborative problem-solving + into structured learning pathways, open instructor pipelines, regional sub-communities, + and integrity-focused practices so diverse learners can rapidly gain practical skills, + leadership opportunities, and ethical career pathways in AI.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/erum-afzal-64827b24/){:target="_blank"} diff --git a/_podcast/s01e02-processes.md b/_podcast/crisp-dm.md similarity index 95% rename from _podcast/s01e02-processes.md rename to _podcast/crisp-dm.md index 324b776e..d359eb7f 100644 --- a/_podcast/s01e02-processes.md +++ b/_podcast/crisp-dm.md @@ -1,19 +1,11 @@ --- -title: 'CRISP-DM Methodology for Data Science Projects: Business Understanding, Data - Preparation, Modeling, Evaluation & Deployment' -short: Processes in a Data Science Project -guests: -- alexeygrigorev -image: images/podcast/s01e02-processes.jpg -description: Learn the CRISP-DM methodology for managing data science projects. Step-by-step - guide covering business understanding, data preparation, modeling, evaluation, and - deployment. -keywords: CRISP-DM, data science process, machine learning methodology, data science - project management, ML project lifecycle, data science workflow, A/B testing, model - deployment, data science best practices, ML model evaluation, cross-functional data - teams +title: "CRISP-DM Methodology for Data Science Projects: Business Understanding, Data Preparation, Modeling, Evaluation & Deployment" +short: "Processes in a Data Science Project" season: 1 episode: 2 +guests: +- alexeygrigorev +image: images/podcast/crisp-dm.jpg ids: youtube: SesVTDklFYQ anchor: Processes-in-a-Data-Science-Project---Alexey-Grigorev-encdlg @@ -22,7 +14,17 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Processes-in-a-Data-Science-Project---Alexey-Grigorev-encdlg spotify: TODO apple: TODO -dateadded: '2021-02-23' + +description: "Learn the CRISP-DM methodology for managing data science projects. Step-by-step guide covering business understanding, data preparation, modeling, evaluation, and deployment" +topics: +- data science +- machine learning +- project management +dateadded: 2021-02-23 + + + +keywords: CRISP-DM, data science process, machine learning methodology, data science project management, ML project lifecycle, data science workflow, A/B testing, model deployment, data science best practices, ML model evaluation, cross-functional data teams --- The topic today is the processes in a data science project. We want to understand how cross-functional teams work together to ship real value. We'll use a concrete example (auto-categorizing marketplace listings) and walk through CRISP-DM step by step. diff --git a/_podcast/s12e03-data-centric-ai.md b/_podcast/data-centric-ai.md similarity index 97% rename from _podcast/s12e03-data-centric-ai.md rename to _podcast/data-centric-ai.md index af422bc0..8dedec92 100644 --- a/_podcast/s12e03-data-centric-ai.md +++ b/_podcast/data-centric-ai.md @@ -1,19 +1,154 @@ --- +title: "Data-Centric AI: Improve Label Quality & Edit Datasets to Boost Model Performance" +short: "Data-Centric AI" +season: 12 episode: 3 guests: - marysiawinkels +image: images/podcast/data-centric.jpg ids: anchor: Data-Centric-AI---Marysia-Winkels-e1shctn youtube: t3HDdVWQzNM -image: images/podcast/s12e03-data-centric-ai.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Centric-AI---Marysia-Winkels-e1shctn apple: https://podcasts.apple.com/us/podcast/data-centric-ai-marysia-winkels/id1541710331?i=1000592911172 spotify: https://open.spotify.com/episode/6q1yago5iyMt8OmCX1abG3?si=-OaRAwjaRfOfyQ7_QZEbBw youtube: https://www.youtube.com/watch?v=t3HDdVWQzNM -season: 12 -short: Data-Centric AI -title: 'Data-Centric AI: Improve Label Quality & Edit Datasets to Boost Model Performance' + +description: "Discover Data-Centric AI tactics to improve label quality and edit datasets to boost model performance, practical workflows, relabeling, augmentation tips" +topics: +- machine learning +- data science +- MLOps +- tools +- data governance +intro: "How much can improving label quality and editing your dataset actually boost model performance? In this episode, Marysia Winkels — Lead Data Scientist at GoDataDriven with a Master’s in Artificial Intelligence and a focus on data-efficient deep learning, and co-organizer of PyData Amsterdam/Global — walks through a practical, data-centric approach to that question.

We cover why shifting from “more data” to “better data” matters, especially for transfer learning and fine-tuning, and contrast model-centric vs data-centric workflows. Marysia breaks down a data-centric competition that used a fixed ResNet with an editable dataset, strategies for targeted relabeling using model confidence and embeddings, lightweight data versioning and low-tech tooling (Google Sheets + scripts), and when to use synthetic augmentation versus manual fixes. You’ll also hear about validation-split integrity, detecting dataset gaps with UMAP, acceptance criteria for real-world contexts, shadow-mode rollouts, and the trade-offs of automating dataset repairs.

Listen to learn concrete workflows and heuristics to prioritize impactful data fixes, improve label quality, and make dataset edits that measurably increase model performance. Find additional resources at marysia.nl and PyData" +dateadded: 2023-01-07 + +duration: PT00H57M34S + +quotableClips: +- name: Podcast Introduction + startOffset: 86 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=86 + endOffset: 123 +- name: AI education & geometric deep learning in medical imaging + startOffset: 123 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=123 + endOffset: 184 +- name: Data science education and course development + startOffset: 184 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=184 + endOffset: 291 +- name: Building a community of practice and improving product maturity + startOffset: 291 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=291 + endOffset: 324 +- name: 'Data-Centric AI: shifting focus from Big Data to Good Data' + startOffset: 324 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=324 + endOffset: 354 +- name: Model-centric vs data-centric approaches; challenges with unstructured data + startOffset: 354 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=354 + endOffset: 628 +- name: 'Transfer learning & fine-tuning: why label quality matters more now' + startOffset: 628 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=628 + endOffset: 825 +- name: 'Data-centric competition case: fixed ResNet model with editable dataset' + startOffset: 825 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=825 + endOffset: 905 +- name: 'Competition lessons: accessibility, strategy, and innovation award' + startOffset: 905 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=905 + endOffset: 1064 +- name: Strategic data augmentation vs brute-force data collection + startOffset: 1064 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1064 + endOffset: 1126 +- name: 'Mindset shift: treating datasets as editable artifacts' + startOffset: 1126 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1126 + endOffset: 1164 +- name: Validation split adjustments and maintaining fair model comparisons + startOffset: 1164 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1164 + endOffset: 1345 +- name: Iterating on both data and model; prioritizing impactful data fixes + startOffset: 1345 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1345 + endOffset: 1382 +- name: 'Tooling spectrum: labeling, synthetic data, and data versioning' + startOffset: 1382 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1382 + endOffset: 1404 +- name: 'Practical workflows: lightweight versioning and easy data edits' + startOffset: 1404 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1404 + endOffset: 1586 +- name: 'Low-tech iteration: Google Sheets labeling plus automation scripts' + startOffset: 1586 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1586 + endOffset: 1675 +- name: Targeted relabeling using model confidence and image embeddings + startOffset: 1675 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1675 + endOffset: 1942 +- name: 'Curated resources: Haiti Research and WhyData tool directories' + startOffset: 1942 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1942 + endOffset: 1996 +- name: 'Iterative loop: baseline model, error analysis, and SME validation' + startOffset: 1996 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1996 + endOffset: 2124 +- name: 'Beyond cleaning: representativeness, bias, and dataset completeness' + startOffset: 2124 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2124 + endOffset: 2174 +- name: Detecting dataset gaps with embeddings and UMAP (penguin example) + startOffset: 2174 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2174 + endOffset: 2386 +- name: 'Defining real-world contexts: lighting, angles, and edge cases' + startOffset: 2386 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2386 + endOffset: 2507 +- name: 'Acceptance criteria: deciding when dataset quality is sufficient' + startOffset: 2507 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2507 + endOffset: 2653 +- name: 'Production feedback loops: collecting user feedback post-deployment' + startOffset: 2653 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2653 + endOffset: 2812 +- name: 'Shadow mode rollout: passive deployment for safe feedback collection' + startOffset: 2812 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2812 + endOffset: 2949 +- name: 'Scarce or low-quality data: feasibility, manual fixes, and limits' + startOffset: 2949 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2949 + endOffset: 3045 +- name: Automating dataset repairs vs manual editing trade-offs + startOffset: 3045 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3045 + endOffset: 3056 +- name: 'PyData involvement: organizing meetups, tutorials, and global events' + startOffset: 3056 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3056 + endOffset: 3361 +- name: 'PyData vs PyCon: data focus, language inclusivity, and NumFOCUS support' + startOffset: 3361 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3361 + endOffset: 3504 +- name: 'Contact & resources: marysia.nl, LinkedIn, and PyData engagement' + startOffset: 3504 + url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3504 + endOffset: 3454 + transcript: - header: Podcast Introduction - line: This week, we'll talk about data-centric AI. We have a special guest today, @@ -1243,147 +1378,6 @@ transcript: sec: 3540 time: '59:00' who: Alexey -description: Discover Data-Centric AI tactics to improve label quality and edit datasets - to boost model performance, practical workflows, relabeling, augmentation tips. -intro: How much can improving label quality and editing your dataset actually boost - model performance? In this episode, Marysia Winkels — Lead Data Scientist at GoDataDriven - with a Master’s in Artificial Intelligence and a focus on data-efficient deep learning, - and co-organizer of PyData Amsterdam/Global — walks through a practical, data-centric - approach to that question.

We cover why shifting from “more data” to “better - data” matters, especially for transfer learning and fine-tuning, and contrast model-centric - vs data-centric workflows. Marysia breaks down a data-centric competition that used - a fixed ResNet with an editable dataset, strategies for targeted relabeling using - model confidence and embeddings, lightweight data versioning and low-tech tooling - (Google Sheets + scripts), and when to use synthetic augmentation versus manual - fixes. You’ll also hear about validation-split integrity, detecting dataset gaps - with UMAP, acceptance criteria for real-world contexts, shadow-mode rollouts, and - the trade-offs of automating dataset repairs.

Listen to learn concrete - workflows and heuristics to prioritize impactful data fixes, improve label quality, - and make dataset edits that measurably increase model performance. Find additional - resources at marysia.nl and PyData. -dateadded: '2023-01-07' -duration: PT00H57M34S -quotableClips: -- name: Podcast Introduction - startOffset: 86 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=86 - endOffset: 123 -- name: AI education & geometric deep learning in medical imaging - startOffset: 123 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=123 - endOffset: 184 -- name: Data science education and course development - startOffset: 184 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=184 - endOffset: 291 -- name: Building a community of practice and improving product maturity - startOffset: 291 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=291 - endOffset: 324 -- name: 'Data-Centric AI: shifting focus from Big Data to Good Data' - startOffset: 324 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=324 - endOffset: 354 -- name: Model-centric vs data-centric approaches; challenges with unstructured data - startOffset: 354 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=354 - endOffset: 628 -- name: 'Transfer learning & fine-tuning: why label quality matters more now' - startOffset: 628 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=628 - endOffset: 825 -- name: 'Data-centric competition case: fixed ResNet model with editable dataset' - startOffset: 825 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=825 - endOffset: 905 -- name: 'Competition lessons: accessibility, strategy, and innovation award' - startOffset: 905 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=905 - endOffset: 1064 -- name: Strategic data augmentation vs brute-force data collection - startOffset: 1064 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1064 - endOffset: 1126 -- name: 'Mindset shift: treating datasets as editable artifacts' - startOffset: 1126 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1126 - endOffset: 1164 -- name: Validation split adjustments and maintaining fair model comparisons - startOffset: 1164 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1164 - endOffset: 1345 -- name: Iterating on both data and model; prioritizing impactful data fixes - startOffset: 1345 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1345 - endOffset: 1382 -- name: 'Tooling spectrum: labeling, synthetic data, and data versioning' - startOffset: 1382 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1382 - endOffset: 1404 -- name: 'Practical workflows: lightweight versioning and easy data edits' - startOffset: 1404 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1404 - endOffset: 1586 -- name: 'Low-tech iteration: Google Sheets labeling plus automation scripts' - startOffset: 1586 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1586 - endOffset: 1675 -- name: Targeted relabeling using model confidence and image embeddings - startOffset: 1675 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1675 - endOffset: 1942 -- name: 'Curated resources: Haiti Research and WhyData tool directories' - startOffset: 1942 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1942 - endOffset: 1996 -- name: 'Iterative loop: baseline model, error analysis, and SME validation' - startOffset: 1996 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=1996 - endOffset: 2124 -- name: 'Beyond cleaning: representativeness, bias, and dataset completeness' - startOffset: 2124 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2124 - endOffset: 2174 -- name: Detecting dataset gaps with embeddings and UMAP (penguin example) - startOffset: 2174 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2174 - endOffset: 2386 -- name: 'Defining real-world contexts: lighting, angles, and edge cases' - startOffset: 2386 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2386 - endOffset: 2507 -- name: 'Acceptance criteria: deciding when dataset quality is sufficient' - startOffset: 2507 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2507 - endOffset: 2653 -- name: 'Production feedback loops: collecting user feedback post-deployment' - startOffset: 2653 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2653 - endOffset: 2812 -- name: 'Shadow mode rollout: passive deployment for safe feedback collection' - startOffset: 2812 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2812 - endOffset: 2949 -- name: 'Scarce or low-quality data: feasibility, manual fixes, and limits' - startOffset: 2949 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=2949 - endOffset: 3045 -- name: Automating dataset repairs vs manual editing trade-offs - startOffset: 3045 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3045 - endOffset: 3056 -- name: 'PyData involvement: organizing meetups, tutorials, and global events' - startOffset: 3056 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3056 - endOffset: 3361 -- name: 'PyData vs PyCon: data focus, language inclusivity, and NumFOCUS support' - startOffset: 3361 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3361 - endOffset: 3504 -- name: 'Contact & resources: marysia.nl, LinkedIn, and PyData engagement' - startOffset: 3504 - url: https://www.youtube.com/watch?v=t3HDdVWQzNM&t=3504 - endOffset: 3454 --- Links: diff --git a/_podcast/s13e04-starting-consultancy-in-data-space.md b/_podcast/data-consulting-business-pricing-and-client-acquisition.md similarity index 97% rename from _podcast/s13e04-starting-consultancy-in-data-space.md rename to _podcast/data-consulting-business-pricing-and-client-acquisition.md index 377ddeea..c1350592 100644 --- a/_podcast/s13e04-starting-consultancy-in-data-space.md +++ b/_podcast/data-consulting-business-pricing-and-client-acquisition.md @@ -1,29 +1,129 @@ --- +title: "Build a Data Consulting Business: Customer Validation, User Interviews & Pricing Strategy" +short: "Starting a Consultancy in the Data Space" +season: 13 episode: 4 guests: - aleksanderkruszelnicki -date: 2025-11-07 -topics: -- consulting -- entrepreneurship -- freelance -- data strategy -- Business Development -- Career Growth -- Startups +image: images/podcast/data-consulting-business-pricing-and-client-acquisition.jpg ids: anchor: ow/datatalksclub/episodes/Starting-a-Consultancy-in-the-Data-Space---Aleksander-Kruszelnicki-e203c8g youtube: rh_pE35m3vE -image: images/podcast/s13e04-starting-consultancy-in-data-space.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Starting-a-Consultancy-in-the-Data-Space---Aleksander-Kruszelnicki-e203c8g apple: https://podcasts.apple.com/us/podcast/starting-a-consultancy-in-the-data-space/id1541710331?i=1000604682286 spotify: https://open.spotify.com/episode/2Y0mKRHq6wVfr25HJ5Ji3Y?si=kUkmMW2AT6-FeRd6SpXWlg youtube: https://www.youtube.com/watch?v=rh_pE35m3vE -season: 13 -short: Starting a Consultancy in the Data Space -title: 'Build a Data Consulting Business: Customer Validation, User Interviews & Pricing - Strategy' + +description: "Learn data consulting: customer validation, user interviews and pricing strategy to validate ideas, win clients, set value-based rates & scale your practice." +intro: "How do you validate customers, run effective user interviews, and set pricing to build a sustainable data consulting business? In this episode, Aleksander Kruszelnicki — ex-Delivery Hero product manager turned co-founder of leukos, a boutique data analytics agency in Berlin — walks through the practical steps he took shifting from product ideas to a consulting model after early startup failures.

We cover market and technical limits of “data stack as a service,” first-customer stories, customer validation techniques for pre-product ideas, and a repeatable user interview strategy (questions, cadence, roles, and note-taking). Aleksander explains why value often sits in data modeling over infrastructure, the decision to pivot to hands-on consulting, and team composition (PM + engineer). You’ll also hear tactical guidance on client acquisition (network-first outreach), positioning for European customers and VCs, messaging examples for revenue/marketing optimization, marketing mix (networking, content, LinkedIn), pricing frameworks and rate setting, contract models (day rates vs project pricing), and practical legal/admin steps for registering a consultancy in Germany.

Listen to get actionable methods for customer validation, user interviews, pricing strategy, and building a data consulting business that captures real client value" +topics: +- consulting +- entrepreneurship +- freelance +- data strategy +- business development +- career growth +- startups +dateadded: 2023-03-19 +date: 2025-11-07 + +duration: PT01H16S + +quotableClips: +- name: Podcast Introduction + startOffset: 70 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=70 + endOffset: 107 +- name: Career Journey & Archaeology Origin Story + startOffset: 107 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=107 + endOffset: 256 +- name: 'Data Stack as a Service: Market and Technical Limits' + startOffset: 256 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=256 + endOffset: 436 +- name: 'Transition to Consulting: Early Projects and First Customer' + startOffset: 436 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=436 + endOffset: 548 +- name: Customer Validation Techniques for Pre-Product Ideas + startOffset: 548 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=548 + endOffset: 773 +- name: 'User Interview Strategy: Questions, Frequency, and Evidence' + startOffset: 773 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=773 + endOffset: 955 +- name: 'Conducting Interviews: Pair Roles and Note-Taking' + startOffset: 955 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=955 + endOffset: 1067 +- name: 'Team Composition: PM + Engineer Partnership' + startOffset: 1067 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1067 + endOffset: 1081 +- name: 'Lessons from a Failed Product: Premature Build and Market Size' + startOffset: 1081 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1081 + endOffset: 1299 +- name: 'Value Realization: Data Modeling vs Infrastructure' + startOffset: 1299 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1299 + endOffset: 1362 +- name: 'Pivot Decision: Choosing Consulting to Capture Value' + startOffset: 1362 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1362 + endOffset: 1545 +- name: 'Consulting Approach: Hands-On Implementation and Accountability' + startOffset: 1545 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1545 + endOffset: 1679 +- name: 'Client Acquisition: Network-First Outreach' + startOffset: 1679 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1679 + endOffset: 1817 +- name: 'Positioning Services: Target Customers and Timing' + startOffset: 1817 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1817 + endOffset: 1919 +- name: 'Geographic Strategy: Europe Focus and VC Introductions' + startOffset: 1919 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1919 + endOffset: 2223 +- name: 'Messaging Example: Revenue and Marketing Optimization Offers' + startOffset: 2223 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2223 + endOffset: 2450 +- name: 'Marketing Mix: Networking, Content, and LinkedIn' + startOffset: 2450 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2450 + endOffset: 2719 +- name: 'Pricing Framework: Value-Based Benchmarking' + startOffset: 2719 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2719 + endOffset: 2958 +- name: 'Rate Setting: Starting Rates, Maximums and Minimums' + startOffset: 2958 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2958 + endOffset: 3158 +- name: 'Contract Models: Day Rates vs Project Pricing and Incentives' + startOffset: 3158 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3158 + endOffset: 3478 +- name: 'Legal & Administrative: Registering a Consultancy in Germany' + startOffset: 3478 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3478 + endOffset: 3569 +- name: 'Recommended Reading: Decision-Making and Interviewing Books' + startOffset: 3569 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3569 + endOffset: 3686 +- name: Closing Remarks and Episode Wrap-Up + startOffset: 3686 + url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3686 + endOffset: 3616 + transcript: - header: Podcast Introduction - header: Podcast Introduction @@ -1115,120 +1215,6 @@ transcript: sec: 3686 time: '1:01:26' who: Alexey -intro: How do you validate customers, run effective user interviews, and set pricing - to build a sustainable data consulting business? In this episode, Aleksander Kruszelnicki - — ex-Delivery Hero product manager turned co-founder of leukos, a boutique data - analytics agency in Berlin — walks through the practical steps he took shifting - from product ideas to a consulting model after early startup failures.

- We cover market and technical limits of “data stack as a service,” first-customer - stories, customer validation techniques for pre-product ideas, and a repeatable - user interview strategy (questions, cadence, roles, and note-taking). Aleksander - explains why value often sits in data modeling over infrastructure, the decision - to pivot to hands-on consulting, and team composition (PM + engineer). You’ll also - hear tactical guidance on client acquisition (network-first outreach), positioning - for European customers and VCs, messaging examples for revenue/marketing optimization, - marketing mix (networking, content, LinkedIn), pricing frameworks and rate setting, - contract models (day rates vs project pricing), and practical legal/admin steps - for registering a consultancy in Germany.

Listen to get actionable methods - for customer validation, user interviews, pricing strategy, and building a data - consulting business that captures real client value. -description: 'Learn data consulting: customer validation, user interviews and pricing - strategy to validate ideas, win clients, set value-based rates & scale your practice.' -dateadded: '2023-03-19' -duration: PT01H16S -quotableClips: -- name: Podcast Introduction - startOffset: 70 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=70 - endOffset: 107 -- name: Career Journey & Archaeology Origin Story - startOffset: 107 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=107 - endOffset: 256 -- name: 'Data Stack as a Service: Market and Technical Limits' - startOffset: 256 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=256 - endOffset: 436 -- name: 'Transition to Consulting: Early Projects and First Customer' - startOffset: 436 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=436 - endOffset: 548 -- name: Customer Validation Techniques for Pre-Product Ideas - startOffset: 548 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=548 - endOffset: 773 -- name: 'User Interview Strategy: Questions, Frequency, and Evidence' - startOffset: 773 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=773 - endOffset: 955 -- name: 'Conducting Interviews: Pair Roles and Note-Taking' - startOffset: 955 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=955 - endOffset: 1067 -- name: 'Team Composition: PM + Engineer Partnership' - startOffset: 1067 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1067 - endOffset: 1081 -- name: 'Lessons from a Failed Product: Premature Build and Market Size' - startOffset: 1081 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1081 - endOffset: 1299 -- name: 'Value Realization: Data Modeling vs Infrastructure' - startOffset: 1299 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1299 - endOffset: 1362 -- name: 'Pivot Decision: Choosing Consulting to Capture Value' - startOffset: 1362 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1362 - endOffset: 1545 -- name: 'Consulting Approach: Hands-On Implementation and Accountability' - startOffset: 1545 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1545 - endOffset: 1679 -- name: 'Client Acquisition: Network-First Outreach' - startOffset: 1679 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1679 - endOffset: 1817 -- name: 'Positioning Services: Target Customers and Timing' - startOffset: 1817 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1817 - endOffset: 1919 -- name: 'Geographic Strategy: Europe Focus and VC Introductions' - startOffset: 1919 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=1919 - endOffset: 2223 -- name: 'Messaging Example: Revenue and Marketing Optimization Offers' - startOffset: 2223 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2223 - endOffset: 2450 -- name: 'Marketing Mix: Networking, Content, and LinkedIn' - startOffset: 2450 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2450 - endOffset: 2719 -- name: 'Pricing Framework: Value-Based Benchmarking' - startOffset: 2719 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2719 - endOffset: 2958 -- name: 'Rate Setting: Starting Rates, Maximums and Minimums' - startOffset: 2958 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=2958 - endOffset: 3158 -- name: 'Contract Models: Day Rates vs Project Pricing and Incentives' - startOffset: 3158 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3158 - endOffset: 3478 -- name: 'Legal & Administrative: Registering a Consultancy in Germany' - startOffset: 3478 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3478 - endOffset: 3569 -- name: 'Recommended Reading: Decision-Making and Interviewing Books' - startOffset: 3569 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3569 - endOffset: 3686 -- name: Closing Remarks and Episode Wrap-Up - startOffset: 3686 - url: https://www.youtube.com/watch?v=rh_pE35m3vE&t=3686 - endOffset: 3616 --- Links: diff --git a/_podcast/s08e08-teaching-data-engineers.md b/_podcast/data-engineering-career-path-and-skills.md similarity index 97% rename from _podcast/s08e08-teaching-data-engineers.md rename to _podcast/data-engineering-career-path-and-skills.md index 0bc9599a..88e74269 100644 --- a/_podcast/s08e08-teaching-data-engineers.md +++ b/_podcast/data-engineering-career-path-and-skills.md @@ -1,40 +1,148 @@ --- +title: "Build a Data Engineering Career: Bootcamp Curriculum, SQL Mastery & Interview Prep" +short: "Teaching Data Engineers" +season: 8 episode: 8 guests: - jeffkatz -intro: How do you build a data engineering career from zero — what should you learn, - how do you master SQL, and how do you pass the interviews? In this episode, Jeff - Katz — former lawyer turned developer, founder of Jigsaw Labs, and current ML engineer - at AppFolio — walks through practical paths into data engineering and how to design - bootcamp curriculum that actually leads to hires.

We cover curriculum development - and pedagogy (active learning, conceptual-first lessons, reinforcement cycles), - core skills to prioritize (Python, SQL, cloud fundamentals), and why junior-focused - programs drop Spark/Kafka/Kubernetes early. Jeff details analytics engineering tools - (DBT, Snowflake, Mode, Fivetran), backend and ETL practices (Flask, codebase navigation, - testing), data modeling (OLTP vs OLAP), and SQL mastery (window functions, medium - LeetCode problems). You’ll also hear about admissions and screening, mid-program - internships for real experience, interview stages (screening calls, SQL tests, on-site - expectations), and tactics for transitioning from data analyst to data engineer. -

Listen for actionable guidance on building a bootcamp-ready portfolio, - targeted interview prep, and the concrete curriculum choices that employers value - in data engineering hires. +image: images/podcast/data-engineering-career-path-and-skills.jpg ids: anchor: Teaching-Data-Engineers---Jeff-Katz-e1iaoru youtube: dFo10l8B6Go -image: images/podcast/s08e08-teaching-data-engineers.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Teaching-Data-Engineers---Jeff-Katz-e1iaoru apple: https://podcasts.apple.com/us/podcast/teaching-data-engineers-jeff-katz/id1541710331?i=1000561145955 spotify: https://open.spotify.com/episode/0Fo6Y62xaWPy7C24eZKfJw?si=lnjgqHUiRdGiZNxE76QMYQ youtube: https://www.youtube.com/watch?v=dFo10l8B6Go -season: 8 -short: Teaching Data Engineers -title: 'Build a Data Engineering Career: Bootcamp Curriculum, SQL Mastery & Interview - Prep' + +description: "Master data engineering and SQL with a bootcamp curriculum: employer-validated projects, cloud basics, SQL window functions & interview prep for junior roles." +intro: "How do you build a data engineering career from zero — what should you learn, how do you master SQL, and how do you pass the interviews? In this episode, Jeff Katz — former lawyer turned developer, founder of Jigsaw Labs, and current ML engineer at AppFolio — walks through practical paths into data engineering and how to design bootcamp curriculum that actually leads to hires.

We cover curriculum development and pedagogy (active learning, conceptual-first lessons, reinforcement cycles), core skills to prioritize (Python, SQL, cloud fundamentals), and why junior-focused programs drop Spark/Kafka/Kubernetes early. Jeff details analytics engineering tools (DBT, Snowflake, Mode, Fivetran), backend and ETL practices (Flask, codebase navigation, testing), data modeling (OLTP vs OLAP), and SQL mastery (window functions, medium LeetCode problems). You’ll also hear about admissions and screening, mid-program internships for real experience, interview stages (screening calls, SQL tests, on-site expectations), and tactics for transitioning from data analyst to data engineer.

Listen for actionable guidance on building a bootcamp-ready portfolio, targeted interview prep, and the concrete curriculum choices that employers value in data engineering hires" topics: - data engineering - education - career growth +dateadded: 2022-05-16 + +duration: PT01H31S + +quotableClips: +- name: Episode Overview & Guest Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=0 + endOffset: 80 +- name: 'Guest Background: Lawyer → Developer → Educator' + startOffset: 80 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=80 + endOffset: 236 +- name: Active Learning & Continuous Student Feedback (teaching methods) + startOffset: 236 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=236 + endOffset: 392 +- name: 'Education as Social Impact: Training, Refugees, Last-mile' + startOffset: 392 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=392 + endOffset: 522 +- name: 'Early Bootcamps: General Assembly and Flatiron School Origins' + startOffset: 522 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=522 + endOffset: 598 +- name: 'Curriculum Development: Market Research & Employer Validation' + startOffset: 598 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=598 + endOffset: 704 +- name: 'Lesson Structure: Syllabi, Labs, Reinforcement Cycles' + startOffset: 704 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=704 + endOffset: 870 +- name: 'Pedagogy: Conceptual Understanding Before Implementation' + startOffset: 870 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=870 + endOffset: 924 +- name: 'Market Shift: Why Data Science Moved Toward Data Engineering' + startOffset: 924 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=924 + endOffset: 1018 +- name: 'Building a School: Affordability, Part-time Model, Career Services' + startOffset: 1018 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1018 + endOffset: 1218 +- name: 'Lowering Barriers: Workshops, Part-time Pathways, Admissions' + startOffset: 1218 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1218 + endOffset: 1415 +- name: 'Data Engineering Core Skills: Python, SQL, Cloud Fundamentals' + startOffset: 1415 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1415 + endOffset: 1600 +- name: 'Ensuring Hires: Admissions Criteria, Curriculum-Employer Fit, Follow-up' + startOffset: 1600 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1600 + endOffset: 1661 +- name: 'Mid-Program Internships: Employer Projects for Real Experience' + startOffset: 1661 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1661 + endOffset: 1832 +- name: 'Applicant Screening: Technical Interview & Learning Agility' + startOffset: 1832 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1832 + endOffset: 1985 +- name: 'Interview Practice: Apply Early, Learn from Rejection' + startOffset: 1985 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1985 + endOffset: 2178 +- name: 'Analytics Engineering Module: DBT, Snowflake, Mode, Fivetran' + startOffset: 2178 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2178 + endOffset: 2261 +- name: 'Backend Engineering Module: Flask, ETL, Codebase Navigation, Testing' + startOffset: 2261 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2261 + endOffset: 2285 +- name: 'Curriculum Prioritization: Dropping Spark/Kafka/Kubernetes for Juniors' + startOffset: 2285 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2285 + endOffset: 2442 +- name: 'Transition Path: Data Analyst → Data Engineer (backend & cloud focus)' + startOffset: 2442 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2442 + endOffset: 2661 +- name: 'SQL Mastery: Window Functions & Medium LeetCode SQL Problems' + startOffset: 2661 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2661 + endOffset: 2714 +- name: 'Data Modeling Practice: OLTP vs OLAP and Sample Databases' + startOffset: 2714 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2714 + endOffset: 2880 +- name: 'Interview Stages: Screening Calls, SQL Tests, On-site Expectations' + startOffset: 2880 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2880 + endOffset: 2992 +- name: 'How to Start Teaching: Pick a Beginner Topic & Teach One Person' + startOffset: 2992 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2992 + endOffset: 3116 +- name: 'Delivery Tactics: In-Person vs Online Engagement and Sequencing' + startOffset: 3116 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3116 + endOffset: 3294 +- name: 'Running a Small School: Curriculum Volume and Time Management' + startOffset: 3294 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3294 + endOffset: 3406 +- name: 'Teaching Fundamentals vs Shiny Tech: 85% Python/SQL, 15% tools' + startOffset: 3406 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3406 + endOffset: 3571 +- name: 'Outcomes & Next Cohort: JigsawLabs Results and Start Date' + startOffset: 3571 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3571 + endOffset: 3621 +- name: 'Contact & Follow-up: Jeff Katz, Webinar on Getting Data Engineering Jobs' + startOffset: 3621 + url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3621 + endOffset: 3631 + transcript: - header: Episode Overview & Guest Introduction - header: 'Guest Background: Lawyer → Developer → Educator' @@ -1298,127 +1406,6 @@ transcript: sec: 3711 time: '1:01:51' who: Jeff -description: 'Master data engineering and SQL with a bootcamp curriculum: employer-validated - projects, cloud basics, SQL window functions & interview prep for junior roles.' -dateadded: '2022-05-16' -duration: PT01H31S -quotableClips: -- name: Episode Overview & Guest Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=0 - endOffset: 80 -- name: 'Guest Background: Lawyer → Developer → Educator' - startOffset: 80 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=80 - endOffset: 236 -- name: Active Learning & Continuous Student Feedback (teaching methods) - startOffset: 236 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=236 - endOffset: 392 -- name: 'Education as Social Impact: Training, Refugees, Last-mile' - startOffset: 392 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=392 - endOffset: 522 -- name: 'Early Bootcamps: General Assembly and Flatiron School Origins' - startOffset: 522 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=522 - endOffset: 598 -- name: 'Curriculum Development: Market Research & Employer Validation' - startOffset: 598 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=598 - endOffset: 704 -- name: 'Lesson Structure: Syllabi, Labs, Reinforcement Cycles' - startOffset: 704 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=704 - endOffset: 870 -- name: 'Pedagogy: Conceptual Understanding Before Implementation' - startOffset: 870 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=870 - endOffset: 924 -- name: 'Market Shift: Why Data Science Moved Toward Data Engineering' - startOffset: 924 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=924 - endOffset: 1018 -- name: 'Building a School: Affordability, Part-time Model, Career Services' - startOffset: 1018 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1018 - endOffset: 1218 -- name: 'Lowering Barriers: Workshops, Part-time Pathways, Admissions' - startOffset: 1218 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1218 - endOffset: 1415 -- name: 'Data Engineering Core Skills: Python, SQL, Cloud Fundamentals' - startOffset: 1415 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1415 - endOffset: 1600 -- name: 'Ensuring Hires: Admissions Criteria, Curriculum-Employer Fit, Follow-up' - startOffset: 1600 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1600 - endOffset: 1661 -- name: 'Mid-Program Internships: Employer Projects for Real Experience' - startOffset: 1661 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1661 - endOffset: 1832 -- name: 'Applicant Screening: Technical Interview & Learning Agility' - startOffset: 1832 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1832 - endOffset: 1985 -- name: 'Interview Practice: Apply Early, Learn from Rejection' - startOffset: 1985 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=1985 - endOffset: 2178 -- name: 'Analytics Engineering Module: DBT, Snowflake, Mode, Fivetran' - startOffset: 2178 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2178 - endOffset: 2261 -- name: 'Backend Engineering Module: Flask, ETL, Codebase Navigation, Testing' - startOffset: 2261 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2261 - endOffset: 2285 -- name: 'Curriculum Prioritization: Dropping Spark/Kafka/Kubernetes for Juniors' - startOffset: 2285 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2285 - endOffset: 2442 -- name: 'Transition Path: Data Analyst → Data Engineer (backend & cloud focus)' - startOffset: 2442 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2442 - endOffset: 2661 -- name: 'SQL Mastery: Window Functions & Medium LeetCode SQL Problems' - startOffset: 2661 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2661 - endOffset: 2714 -- name: 'Data Modeling Practice: OLTP vs OLAP and Sample Databases' - startOffset: 2714 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2714 - endOffset: 2880 -- name: 'Interview Stages: Screening Calls, SQL Tests, On-site Expectations' - startOffset: 2880 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2880 - endOffset: 2992 -- name: 'How to Start Teaching: Pick a Beginner Topic & Teach One Person' - startOffset: 2992 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=2992 - endOffset: 3116 -- name: 'Delivery Tactics: In-Person vs Online Engagement and Sequencing' - startOffset: 3116 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3116 - endOffset: 3294 -- name: 'Running a Small School: Curriculum Volume and Time Management' - startOffset: 3294 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3294 - endOffset: 3406 -- name: 'Teaching Fundamentals vs Shiny Tech: 85% Python/SQL, 15% tools' - startOffset: 3406 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3406 - endOffset: 3571 -- name: 'Outcomes & Next Cohort: JigsawLabs Results and Start Date' - startOffset: 3571 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3571 - endOffset: 3621 -- name: 'Contact & Follow-up: Jeff Katz, Webinar on Getting Data Engineering Jobs' - startOffset: 3621 - url: https://www.youtube.com/watch?v=dFo10l8B6Go&t=3621 - endOffset: 3631 --- Links: diff --git a/_podcast/s07e07-becoming-a-data-engineering-manager.md b/_podcast/data-engineering-leadership-and-modern-data-platforms.md similarity index 97% rename from _podcast/s07e07-becoming-a-data-engineering-manager.md rename to _podcast/data-engineering-leadership-and-modern-data-platforms.md index 7d8db751..d4131efc 100644 --- a/_podcast/s07e07-becoming-a-data-engineering-manager.md +++ b/_podcast/data-engineering-leadership-and-modern-data-platforms.md @@ -1,42 +1,133 @@ --- +title: "Data Engineering Leadership: Scale ETL to ELT, Build Robust Data Platforms & Teams" +short: "Becoming a Data Engineering Manager" +season: 7 episode: 7 guests: - 16rahuljain -description: Learn to scale ETL to ELT and build resilient data platforms—gain leadership - skills, stakeholder management, data quality metrics and hiring tips. -intro: 'How do you lead a data engineering team to scale ETL into ELT, build a robust - data platform, and maintain data quality as you grow? In this episode, Rahul Jain - — a data engineering manager at Siemens with 12+ years in data and three years in - management — walks through that transition from ETL developer to IoT data platform - lead and what leadership looks like in practice.

We cover practical topics - like migrating ETL to ELT architectures, data lake and data lineage design, and - end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul - discusses stakeholder management, prioritization, hands-on technical credibility, - balancing individual contributor work with people management, and onboarding strategies - to build trust and delegate effectively. He shares approaches for measuring success - (data culture, consumers served, data quality), detecting data reconciliation issues, - GDPR tactics like dynamic data masking and role‑based access, and how to evaluate - new tools (example: Prefect). Hiring, interview screening, and essential skills - (SQL, Python, CI/CD, cloud) are also explored.

Listen to gain concrete - leadership and technical guidance for scaling data platforms, improving throughput, - and enabling your team to deliver reliable, compliant data products.' -topics: -- data engineering -- career growth -- career switch +image: images/podcast/data-engineering-leadership-and-modern-data-platforms.jpg ids: anchor: Becoming-a-Data-Engineering-Manager---Rahul-Jain-e1f5nvf youtube: FljnbUQ796w -image: images/podcast/s07e07-becoming-a-data-engineering-manager.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-Engineering-Manager---Rahul-Jain-e1f5nvf apple: https://podcasts.apple.com/us/podcast/becoming-a-data-engineering-manager-rahul-jain/id1541710331?i=1000552953646 spotify: https://open.spotify.com/episode/4nWP18woLTt4a7Wm0CQwhM youtube: https://www.youtube.com/watch?v=FljnbUQ796w -season: 7 -short: Becoming a Data Engineering Manager -title: 'Data Engineering Leadership: Scale ETL to ELT, Build Robust Data Platforms - & Teams' + +description: "Learn to scale ETL to ELT and build resilient data platforms—gain leadership skills, stakeholder management, data quality metrics and hiring tips" +intro: "How do you lead a data engineering team to scale ETL into ELT, build a robust data platform, and maintain data quality as you grow? In this episode, Rahul Jain — a data engineering manager at Siemens with 12+ years in data and three years in management — walks through that transition from ETL developer to IoT data platform lead and what leadership looks like in practice.

We cover practical topics like migrating ETL to ELT architectures, data lake and data lineage design, and end-to-end pipeline patterns (ingestion, central hub, exposure, monitoring). Rahul discusses stakeholder management, prioritization, hands-on technical credibility, balancing individual contributor work with people management, and onboarding strategies to build trust and delegate effectively. He shares approaches for measuring success (data culture, consumers served, data quality), detecting data reconciliation issues, GDPR tactics like dynamic data masking and role-based access, and how to evaluate new tools (example: Prefect). Hiring, interview screening, and essential skills (SQL, Python, CI/CD, cloud) are also explored.

Listen to gain concrete leadership and technical guidance for scaling data platforms, improving throughput, and enabling your team to deliver reliable, compliant data products." +topics: +- data engineering +- career growth +- career switch +dateadded: 2022-03-06 + +duration: PT00H59M31S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=0 + endOffset: 116 +- name: 'Rahul''s Career Path: From ETL Developer to IoT Data Platform Lead' + startOffset: 116 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=116 + endOffset: 212 +- name: ETL Foundations to Big Data and Open Source Tooling + startOffset: 212 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=212 + endOffset: 292 +- name: 'Data Engineering Leadership: Stakeholder Management & Prioritization' + startOffset: 292 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=292 + endOffset: 447 +- name: 'Technical Credibility: Hands-on Management and Code-Level Involvement' + startOffset: 447 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=447 + endOffset: 534 +- name: 'Time Allocation: Balancing Individual Contributor Work with People Management' + startOffset: 534 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=534 + endOffset: 669 +- name: 'Transition into Management: Business Acumen and Seeing the Bigger Picture' + startOffset: 669 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=669 + endOffset: 795 +- name: 'Core Manager Traits: Empathy, Situational Awareness, and Quality Standards' + startOffset: 795 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=795 + endOffset: 894 +- name: 'Continuous Learning: Evaluating New Tools and Prototypes (example: Prefect)' + startOffset: 894 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=894 + endOffset: 992 +- name: 'Onboarding Challenges: Building Trust, Prioritization, and Delegation' + startOffset: 992 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=992 + endOffset: 1395 +- name: 'Expectation Framework: Non-Negotiable Deliverables vs. Stretch (Aspirational) + Goals' + startOffset: 1395 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1395 + endOffset: 1504 +- name: 'Measuring Success: Data Culture, Consumers Served, and Data Quality Metrics' + startOffset: 1504 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1504 + endOffset: 1684 +- name: 'Data Reconciliation: Detecting Losses Between Sources and Targets' + startOffset: 1684 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1684 + endOffset: 1741 +- name: 'GDPR Strategies: Dynamic Data Masking and Role-Based Access Control' + startOffset: 1741 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1741 + endOffset: 1850 +- name: 'Modeling at Scale: Moving from ETL to ELT, Data Lake, and Data Lineage' + startOffset: 1850 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1850 + endOffset: 2019 +- name: 'Manager Transition Advice: Prioritize Business Impact and Enable Team Growth' + startOffset: 2019 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2019 + endOffset: 2138 +- name: 'Sustaining Relevance: Automate Monotony and Improve Throughput' + startOffset: 2138 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2138 + endOffset: 2316 +- name: 'Essential Data Engineering Skills: SQL, Python, CI/CD, Cloud, and Ownership' + startOffset: 2316 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2316 + endOffset: 2460 +- name: 'Interview Screening: Communicating Projects Clearly in Five Minutes' + startOffset: 2460 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2460 + endOffset: 2688 +- name: 'Hiring Assessment: Hypotheticals, Leadership Traits, and Future Potential' + startOffset: 2688 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2688 + endOffset: 2833 +- name: 'Top Hires: Due Diligence, Cultural Fit, and Assertiveness' + startOffset: 2833 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2833 + endOffset: 2975 +- name: 'Filtering Buzzwords: Ask for Context, Alternatives, and Real Use Cases' + startOffset: 2975 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2975 + endOffset: 3274 +- name: 'Advice for Students: Master DBMS, SQL, and Fundamentals Over Specific Tools' + startOffset: 3274 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3274 + endOffset: 3449 +- name: 'End-to-End Data Pipeline Overview: Ingestion, Central Hub, Exposure, Monitoring' + startOffset: 3449 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3449 + endOffset: 3599 +- name: Closing Remarks and Connect with Rahul on LinkedIn + startOffset: 3599 + url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3599 + endOffset: 3571 + transcript: - header: Podcast Introduction - line: This week, we'll talk about becoming a data engineering manager. We have a @@ -440,7 +531,7 @@ transcript: sec: 1375 time: '22:55' who: Alexey -- header: 'Expectation Framework: Non‑Negotiable Deliverables vs. Stretch (Aspirational) +- header: 'Expectation Framework: Non-Negotiable Deliverables vs. Stretch (Aspirational) Goals' - line: Yeah, it took quite some time to build the framework to set this because the nature of businesses changes very dynamically and you will have the requirements @@ -1105,108 +1196,4 @@ transcript: sec: 3650 time: '1:00:50' who: Rahul -dateadded: '2022-03-06' -duration: PT00H59M31S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=0 - endOffset: 116 -- name: 'Rahul''s Career Path: From ETL Developer to IoT Data Platform Lead' - startOffset: 116 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=116 - endOffset: 212 -- name: ETL Foundations to Big Data and Open Source Tooling - startOffset: 212 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=212 - endOffset: 292 -- name: 'Data Engineering Leadership: Stakeholder Management & Prioritization' - startOffset: 292 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=292 - endOffset: 447 -- name: 'Technical Credibility: Hands-on Management and Code-Level Involvement' - startOffset: 447 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=447 - endOffset: 534 -- name: 'Time Allocation: Balancing Individual Contributor Work with People Management' - startOffset: 534 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=534 - endOffset: 669 -- name: 'Transition into Management: Business Acumen and Seeing the Bigger Picture' - startOffset: 669 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=669 - endOffset: 795 -- name: 'Core Manager Traits: Empathy, Situational Awareness, and Quality Standards' - startOffset: 795 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=795 - endOffset: 894 -- name: 'Continuous Learning: Evaluating New Tools and Prototypes (example: Prefect)' - startOffset: 894 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=894 - endOffset: 992 -- name: 'Onboarding Challenges: Building Trust, Prioritization, and Delegation' - startOffset: 992 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=992 - endOffset: 1395 -- name: 'Expectation Framework: Non‑Negotiable Deliverables vs. Stretch (Aspirational) - Goals' - startOffset: 1395 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1395 - endOffset: 1504 -- name: 'Measuring Success: Data Culture, Consumers Served, and Data Quality Metrics' - startOffset: 1504 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1504 - endOffset: 1684 -- name: 'Data Reconciliation: Detecting Losses Between Sources and Targets' - startOffset: 1684 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1684 - endOffset: 1741 -- name: 'GDPR Strategies: Dynamic Data Masking and Role-Based Access Control' - startOffset: 1741 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1741 - endOffset: 1850 -- name: 'Modeling at Scale: Moving from ETL to ELT, Data Lake, and Data Lineage' - startOffset: 1850 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=1850 - endOffset: 2019 -- name: 'Manager Transition Advice: Prioritize Business Impact and Enable Team Growth' - startOffset: 2019 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2019 - endOffset: 2138 -- name: 'Sustaining Relevance: Automate Monotony and Improve Throughput' - startOffset: 2138 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2138 - endOffset: 2316 -- name: 'Essential Data Engineering Skills: SQL, Python, CI/CD, Cloud, and Ownership' - startOffset: 2316 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2316 - endOffset: 2460 -- name: 'Interview Screening: Communicating Projects Clearly in Five Minutes' - startOffset: 2460 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2460 - endOffset: 2688 -- name: 'Hiring Assessment: Hypotheticals, Leadership Traits, and Future Potential' - startOffset: 2688 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2688 - endOffset: 2833 -- name: 'Top Hires: Due Diligence, Cultural Fit, and Assertiveness' - startOffset: 2833 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2833 - endOffset: 2975 -- name: 'Filtering Buzzwords: Ask for Context, Alternatives, and Real Use Cases' - startOffset: 2975 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=2975 - endOffset: 3274 -- name: 'Advice for Students: Master DBMS, SQL, and Fundamentals Over Specific Tools' - startOffset: 3274 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3274 - endOffset: 3449 -- name: 'End-to-End Data Pipeline Overview: Ingestion, Central Hub, Exposure, Monitoring' - startOffset: 3449 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3449 - endOffset: 3599 -- name: Closing Remarks and Connect with Rahul on LinkedIn - startOffset: 3599 - url: https://www.youtube.com/watch?v=FljnbUQ796w&t=3599 - endOffset: 3571 --- diff --git a/_podcast/s05e02-data-engineering-acronyms.md b/_podcast/data-engineering-tools-modern-data-stack.md similarity index 97% rename from _podcast/s05e02-data-engineering-acronyms.md rename to _podcast/data-engineering-tools-modern-data-stack.md index c7617bb5..671da9e7 100644 --- a/_podcast/s05e02-data-engineering-acronyms.md +++ b/_podcast/data-engineering-tools-modern-data-stack.md @@ -1,12 +1,11 @@ --- -title: 'ETL vs ELT & Data Lake vs Warehouse: Airbyte, dbt, CDC for Modern Data Engineering' -short: Making Sense of Data Engineering Acronyms and Buzzwords -guests: -- nataliekwong -image: images/podcast/s05e02-data-engineering-acronyms.jpg +title: "ETL vs ELT & Data Lake vs Warehouse: Airbyte, dbt, CDC for Modern Data Engineering" +short: "Making Sense of Data Engineering Acronyms and Buzzwords" season: 5 -date: 2025-11-07 episode: 2 +guests: +- nataliekwong +image: images/podcast/data-engineering-tools-modern-data-stack.jpg ids: youtube: t9Z1S3OYnJU anchor: Making-Sense-of-Data-Engineering-Acronyms-and-Buzzwords---Natalie-Kwong-e177303 @@ -15,6 +14,135 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Making-Sense-of-Data-Engineering-Acronyms-and-Buzzwords---Natalie-Kwong-e177303 spotify: https://open.spotify.com/episode/1AvtwdcAXGGjdJ7fl0Hsuw apple: https://podcasts.apple.com/us/podcast/making-sense-of-data-engineering-acronyms-and/id1541710331?i=1000534990760 + +description: "Discover ETL vs ELT, data lake vs data warehouse with Airbyte and dbt—learn CDC, orchestration, and governance to design reliable, fast modern data pipelines" +intro: "How do you decide between ETL and ELT, or when to keep a data lake versus a warehouse—and where do tools like Airbyte, dbt, and CDC fit into a modern data stack? In this episode, Natalie Kwong, Growth Product Manager at Airbyte with prior analytics and ops roles at Harness, KeepTruckin, and AppDynamics, pulls from hands-on experience scaling analytics teams and systems to unpack these trade-offs.

We break down core concepts—ETL (traditional extract-transform-load) vs ELT (load then transform), the rise of the analytics engineer, and why ELT favors analyst autonomy with dbt. Natalie explains Airbyte's role as a connector/ingestion layer, CDC for row-level change syncing, and orchestration with Airflow. We also cover data lake vs data warehouse purposes, preventing data swamps through governance, schema evolution, operational reverse data flows, and when hybrid architectures make sense.

If you're designing a modern data platform or refining pipelines, this episode offers practical guidance on ETL vs ELT decisions, choosing lakes vs warehouses, leveraging Airbyte and dbt, and operational considerations like data quality, orchestration, and cleanup practices" +topics: +- data engineering +- tools +dateadded: 2021-09-11 +date: 2025-11-07 + +duration: PT00H59M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=0 + endOffset: 94 +- name: 'Episode Overview: Decoding Data Engineering Acronyms' + startOffset: 94 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=94 + endOffset: 118 +- name: 'Guest Career Journey: From Marketing Ops to Analytics & Growth' + startOffset: 118 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=118 + endOffset: 199 +- name: 'Airbyte Overview: ELT Focus and Connector Purpose' + startOffset: 199 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=199 + endOffset: 226 +- name: 'ETL Explained: Extract, Transform, Load (Traditional Model)' + startOffset: 226 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=226 + endOffset: 397 +- name: 'ETL Use Case: Calculating Customer Acquisition Cost' + startOffset: 397 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=397 + endOffset: 477 +- name: 'ELT Advantages: Flexibility, Speed, and Analyst Autonomy' + startOffset: 477 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=477 + endOffset: 600 +- name: 'Transformations in Practice: From Type Casting to Complex SQL Joins' + startOffset: 600 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=600 + endOffset: 759 +- name: 'Analytics Engineer Emergence: Empowering Analysts with DBT & SQL' + startOffset: 759 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=759 + endOffset: 930 +- name: 'Data Marts vs. Warehouses: Purpose, Layers, and Consumption' + startOffset: 930 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=930 + endOffset: 1075 +- name: 'Ingestion Layer: Raw Data Storage, Sanity, and Guardrails' + startOffset: 1075 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1075 + endOffset: 1127 +- name: 'Bringing Transforms Into the Warehouse: ELT vs Legacy Workflows' + startOffset: 1127 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1127 + endOffset: 1190 +- name: 'Data Lakes: Unstructured Storage for Files, Logs, and Media' + startOffset: 1190 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1190 + endOffset: 1282 +- name: 'Data Quality: Preventing Data Swamps Through Governance' + startOffset: 1282 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1282 + endOffset: 1464 +- name: 'Warehouse Ingestion vs. Data Lake: Trade-offs and Convergence' + startOffset: 1464 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1464 + endOffset: 1659 +- name: 'Architecture Decision: When to Maintain a Lake, a Warehouse, or Both' + startOffset: 1659 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1659 + endOffset: 1859 +- name: 'Orchestration: Airflow’s Role in Scheduling and Running Pipelines' + startOffset: 1859 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1859 + endOffset: 1891 +- name: 'Airbyte’s Role in the Stack: Reliable E-L and DBT Integration' + startOffset: 1891 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1891 + endOffset: 2025 +- name: 'Modern Analytics Stack: Best-of-Breed Tools and Typical Components' + startOffset: 2025 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2025 + endOffset: 2142 +- name: 'Operational Reverse Data Flows: Pushing Warehouse Tables Back to Sources' + startOffset: 2142 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2142 + endOffset: 2346 +- name: 'Low-Code/No-Code Tools: Evolving Data Engineering Roles, Not Replacing Them' + startOffset: 2346 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2346 + endOffset: 2490 +- name: 'ETL’s Continued Relevance: Large Enterprises and Complex Staging Needs' + startOffset: 2490 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2490 + endOffset: 2582 +- name: 'Managing Unused Data: Team Ownership and Regular Cleanup Practices' + startOffset: 2582 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2582 + endOffset: 2625 +- name: 'Open Source Strategy: Why Airbyte Is Open and the Cloud Offering Model' + startOffset: 2625 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2625 + endOffset: 2759 +- name: 'CDC Explained: Capturing and Syncing Only Row-Level Changes' + startOffset: 2759 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2759 + endOffset: 2906 +- name: 'Open-Source Risks: Competition and Licensing (Elasticsearch Example)' + startOffset: 2906 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2906 + endOffset: 2938 +- name: 'Schema Evolution: Handling Slowly Changing Attributes' + startOffset: 2938 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2938 + endOffset: 2972 +- name: 'Licensing Considerations: MIT, Cloud Products, and Future Choices' + startOffset: 2972 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2972 + endOffset: 3642 +- name: 'Episode Wrap-Up: Final Thoughts, Hiring News, and Contact Information' + startOffset: 3642 + url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=3642 + endOffset: 3595 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Decoding Data Engineering Acronyms' @@ -848,7 +976,7 @@ transcript: sec: 1872 time: '31:12' who: Natalie -- header: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' +- header: 'Airbyte’s Role in the Stack: Reliable E-L and DBT Integration' - line: I think you mentioned at the beginning what Airbyte does – it's about transformation, right? It's about ingesting and then putting it into a data warehouse. Maybe now we can try to make sense from all these buzzwords. We know what the transformation @@ -856,7 +984,7 @@ transcript: about putting something into a data warehouse. Then a data warehouse is basically the database that we use for all these analytical purposes. So yeah, maybe you can tell us now what Airbyte does? -- header: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' +- header: 'Airbyte’s Role in the Stack: Reliable E-L and DBT Integration' - line: I think you mentioned at the beginning what Airbyte does – it's about transformation, right? It's about ingesting and then putting it into a data warehouse. Maybe now we can try to make sense from all these buzzwords. We know what the transformation @@ -909,14 +1037,14 @@ transcript: sec: 2013 time: '33:33' who: Natalie -- header: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' +- header: 'Modern Analytics Stack: Best-of-Breed Tools and Typical Components' - line: Yeah. So speaking of this modern stack, I've heard this term many times and actually we have a talk about this quite soon. It's about this modern stack for analytics. Actually the talk we have is “modern data stack for analytics engineering.” I don't know if there are different stacks for analytics and for analytics engineering – probably they’re the same. So, what is it? Can you tell us a bit about it? Which tools are a part of this stack? Why do we even talk about it? Why is it a thing? -- header: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' +- header: 'Modern Analytics Stack: Best-of-Breed Tools and Typical Components' - line: Yeah. So speaking of this modern stack, I've heard this term many times and actually we have a talk about this quite soon. It's about this modern stack for analytics. Actually the talk we have is “modern data stack for analytics engineering.” @@ -1049,7 +1177,7 @@ transcript: sec: 2316 time: '38:36' who: Natalie -- header: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing +- header: 'Low-Code/No-Code Tools: Evolving Data Engineering Roles, Not Replacing Them' - line: 'To make sure I understood the whole picture: we have some of these tools like Google AdWords – all these systems, like Google AdWords, or Facebook Ads, @@ -1057,7 +1185,7 @@ transcript: our data warehouse or ingest. We import and then we do something and then we export back, right? Or using the terminology we just learned, we first extract, then do something, and then we do this reverse extract, and then put that back.' -- header: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing +- header: 'Low-Code/No-Code Tools: Evolving Data Engineering Roles, Not Replacing Them' - line: 'To make sure I understood the whole picture: we have some of these tools like Google AdWords – all these systems, like Google AdWords, or Facebook Ads, @@ -1283,13 +1411,13 @@ transcript: sec: 2846 time: '47:26' who: Alexey -- header: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' +- header: 'Open-Source Risks: Competition and Licensing (Elasticsearch Example)' - line: Yeah, exactly. It’s essentially a performance consideration. It also allows you to capture deleted rows. So that's another benefit as well. I think that we don't offer it on all of our data warehouse sources yet. But we are actively working on building out CDC capabilities for all the sources that essentially allow for that. -- header: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' +- header: 'Open-Source Risks: Competition and Licensing (Elasticsearch Example)' - line: Yeah, exactly. It’s essentially a performance consideration. It also allows you to capture deleted rows. So that's another benefit as well. I think that we don't offer it on all of our data warehouse sources yet. But we are actively working @@ -1574,7 +1702,7 @@ transcript: sec: 3636 time: '1:00:36' who: Alexey -- header: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' +- header: 'Episode Wrap-Up: Final Thoughts, Hiring News, and Contact Information' - line: It was such a pleasure to be on this, talking about these acronyms. I hope it helped some of your listeners get more clarity. Airbyte – check us out. We are also hiring on a lot of different fronts. Not just on the engineering front, @@ -1582,7 +1710,7 @@ transcript: gets listed on our company docs page – very public. If you want to contribute back or check us out, you can do that very easily. All the information is on our website. -- header: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' +- header: 'Episode Wrap-Up: Final Thoughts, Hiring News, and Contact Information' - line: It was such a pleasure to be on this, talking about these acronyms. I hope it helped some of your listeners get more clarity. Airbyte – check us out. We are also hiring on a lot of different fronts. Not just on the engineering front, @@ -1607,142 +1735,6 @@ transcript: sec: 3689 time: '1:01:29' who: Alexey -intro: How do you decide between ETL and ELT, or when to keep a data lake versus a - warehouse—and where do tools like Airbyte, dbt, and CDC fit into a modern data stack? - In this episode, Natalie Kwong, Growth Product Manager at Airbyte with prior analytics - and ops roles at Harness, KeepTruckin, and AppDynamics, pulls from hands-on experience - scaling analytics teams and systems to unpack these trade-offs.

We break - down core concepts—ETL (traditional extract-transform-load) vs ELT (load then transform), - the rise of the analytics engineer, and why ELT favors analyst autonomy with dbt. - Natalie explains Airbyte's role as a connector/ingestion layer, CDC for row-level - change syncing, and orchestration with Airflow. We also cover data lake vs data - warehouse purposes, preventing data swamps through governance, schema evolution, - operational reverse data flows, and when hybrid architectures make sense.

- If you're designing a modern data platform or refining pipelines, this episode offers - practical guidance on ETL vs ELT decisions, choosing lakes vs warehouses, leveraging - Airbyte and dbt, and operational considerations like data quality, orchestration, - and cleanup practices. -description: Discover ETL vs ELT, data lake vs data warehouse with Airbyte and dbt—learn - CDC, orchestration, and governance to design reliable, fast modern data pipelines. -dateadded: '2021-09-11' -duration: PT00H59M55S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=0 - endOffset: 94 -- name: 'Episode Overview: Decoding Data Engineering Acronyms' - startOffset: 94 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=94 - endOffset: 118 -- name: 'Guest Career Journey: From Marketing Ops to Analytics & Growth' - startOffset: 118 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=118 - endOffset: 199 -- name: 'Airbyte Overview: ELT Focus and Connector Purpose' - startOffset: 199 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=199 - endOffset: 226 -- name: 'ETL Explained: Extract, Transform, Load (Traditional Model)' - startOffset: 226 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=226 - endOffset: 397 -- name: 'ETL Use Case: Calculating Customer Acquisition Cost' - startOffset: 397 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=397 - endOffset: 477 -- name: 'ELT Advantages: Flexibility, Speed, and Analyst Autonomy' - startOffset: 477 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=477 - endOffset: 600 -- name: 'Transformations in Practice: From Type Casting to Complex SQL Joins' - startOffset: 600 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=600 - endOffset: 759 -- name: 'Analytics Engineer Emergence: Empowering Analysts with DBT & SQL' - startOffset: 759 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=759 - endOffset: 930 -- name: 'Data Marts vs. Warehouses: Purpose, Layers, and Consumption' - startOffset: 930 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=930 - endOffset: 1075 -- name: 'Ingestion Layer: Raw Data Storage, Sanity, and Guardrails' - startOffset: 1075 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1075 - endOffset: 1127 -- name: 'Bringing Transforms Into the Warehouse: ELT vs Legacy Workflows' - startOffset: 1127 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1127 - endOffset: 1190 -- name: 'Data Lakes: Unstructured Storage for Files, Logs, and Media' - startOffset: 1190 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1190 - endOffset: 1282 -- name: 'Data Quality: Preventing Data Swamps Through Governance' - startOffset: 1282 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1282 - endOffset: 1464 -- name: 'Warehouse Ingestion vs. Data Lake: Trade-offs and Convergence' - startOffset: 1464 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1464 - endOffset: 1659 -- name: 'Architecture Decision: When to Maintain a Lake, a Warehouse, or Both' - startOffset: 1659 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1659 - endOffset: 1859 -- name: 'Orchestration: Airflow’s Role in Scheduling and Running Pipelines' - startOffset: 1859 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1859 - endOffset: 1891 -- name: 'Airbyte’s Role in the Stack: Reliable E‑L and DBT Integration' - startOffset: 1891 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=1891 - endOffset: 2025 -- name: 'Modern Analytics Stack: Best‑of‑Breed Tools and Typical Components' - startOffset: 2025 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2025 - endOffset: 2142 -- name: 'Operational Reverse Data Flows: Pushing Warehouse Tables Back to Sources' - startOffset: 2142 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2142 - endOffset: 2346 -- name: 'Low‑Code/No‑Code Tools: Evolving Data Engineering Roles, Not Replacing Them' - startOffset: 2346 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2346 - endOffset: 2490 -- name: 'ETL’s Continued Relevance: Large Enterprises and Complex Staging Needs' - startOffset: 2490 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2490 - endOffset: 2582 -- name: 'Managing Unused Data: Team Ownership and Regular Cleanup Practices' - startOffset: 2582 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2582 - endOffset: 2625 -- name: 'Open Source Strategy: Why Airbyte Is Open and the Cloud Offering Model' - startOffset: 2625 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2625 - endOffset: 2759 -- name: 'CDC Explained: Capturing and Syncing Only Row-Level Changes' - startOffset: 2759 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2759 - endOffset: 2906 -- name: 'Open‑Source Risks: Competition and Licensing (Elasticsearch Example)' - startOffset: 2906 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2906 - endOffset: 2938 -- name: 'Schema Evolution: Handling Slowly Changing Attributes' - startOffset: 2938 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2938 - endOffset: 2972 -- name: 'Licensing Considerations: MIT, Cloud Products, and Future Choices' - startOffset: 2972 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=2972 - endOffset: 3642 -- name: 'Episode Wrap‑Up: Final Thoughts, Hiring News, and Contact Information' - startOffset: 3642 - url: https://www.youtube.com/watch?v=t9Z1S3OYnJU&t=3642 - endOffset: 3595 --- Links: diff --git a/_podcast/s20e09-taking-your-freelance-career-to-next-level.md b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md similarity index 91% rename from _podcast/s20e09-taking-your-freelance-career-to-next-level.md rename to _podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md index 6b59556f..1ccc2b31 100644 --- a/_podcast/s20e09-taking-your-freelance-career-to-next-level.md +++ b/_podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.md @@ -1,28 +1,97 @@ --- +title: "Building a Sustainable Data Freelancing Career: Market Validation, Client Acquisition & Strategic Positioning" +short: "Taking your Freelance Career to the Next Level" +season: 20 episode: 9 guests: - dimitrivisnadi -date: 2025-11-07 -topics: -- Freelance -- Career Growth -- Consulting -- Personal Branding -- Entrepreneurship -- Remote Work -- Business Development +image: images/podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg ids: anchor: datatalksclub/episodes/Can-You-Quit-Your-Job-and-Still-Succeed-as-a-Data-Freelancer-e360j7e youtube: S93V8RgwBig -image: images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Can-You-Quit-Your-Job-and-Still-Succeed-as-a-Data-Freelancer-e360j7e apple: https://podcasts.apple.com/us/podcast/can-you-quit-your-job-and-still-succeed-as-a-data-freelancer/id1541710331?i=1000718997257 spotify: https://open.spotify.com/episode/3BknrKqhLggx1G5ZbrfgFc youtube: https://www.youtube.com/watch?v=S93V8RgwBig -season: 20 -short: Taking your Freelance Career to the Next Level -title: 'Data Freelancer Playbook: Land Clients, Price Services & Use AI for Productivity' +description: "Master data freelancer tactics, pricing strategies and AI tools to land clients, price services confidently, and boost productivity for higher income" +intro: "How do you move from employed data pro to a sustainable data freelancer who consistently lands clients, prices services well, and uses AI to boost productivity? In this episode, Dimitri Visnadi — an independent data consultant focused on data strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer Science from UCL — walks through a practical playbook for data freelancers.

Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus problem-solving, and how to validate freelance viability with financial targets. He explains how to land initial clients through recruiters and LinkedIn, the idea behind a data-freelancer job board, market-driven specialization, and insights on rates, top skills and data management. You’ll hear about scaling choices (lifestyle business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course and community approaches for branding and marketing, subscription models and client relationship management, high-impact small analyses, pricing strategies (hourly vs packages), and transition planning.

Listen to get concrete guidance on landing clients, setting prices, structuring offers, and using AI tools to increase productivity as a freelance data consultant" +topics: +- Freelance +- Career Growth +- Consulting +- Personal Branding +- Entrepreneurship +- Remote Work +- Business Development +dateadded: 2025-07-28 +date: 2025-11-07 +duration: PT01H05M29S +quotableClips: +- name: Episode Opening & Dimitri’s Data Journey + startOffset: 0 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=0 + endOffset: 341 +- name: Job Tenure Trends & Freelancer Types + startOffset: 341 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=341 + endOffset: 650 +- name: Expertise vs Problem-Solving in Freelance Work + startOffset: 650 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=650 + endOffset: 853 +- name: 'Validating Freelance Viability: Financial Targets' + startOffset: 853 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=853 + endOffset: 968 +- name: 'Landing Initial Clients: Recruiters & LinkedIn' + startOffset: 968 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=968 + endOffset: 1053 +- name: Market Trends & Building a Data-Freelancer Job Board + startOffset: 1053 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1053 + endOffset: 1431 +- name: Market-Driven Specialization & Starting Paths + startOffset: 1431 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1431 + endOffset: 1508 +- name: 'Job Board Insights: Rates, Top Skills & "Data Management" + startOffset: 1508 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1508 + endOffset: 1968 +- name: 'Lifestyle Business vs Agency: Scaling Choices' + startOffset: 1968 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1968 + endOffset: 2281 +- name: 'AI Tools for Productivity: Claude, ChatGPT, Cursor' + startOffset: 2281 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2281 + endOffset: 2730 +- name: 'Course & Community: Branding, Marketing, and Support' + startOffset: 2730 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2730 + endOffset: 2913 +- name: Subscription Model & Client Relationship Management + startOffset: 2913 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2913 + endOffset: 3140 +- name: 'High-Impact Analytics: Small Analyses, Big Returns' + startOffset: 3140 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3140 + endOffset: 3407 +- name: 'Pricing Strategies: Hourly, Project Packages, and Transitioning' + startOffset: 3407 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3407 + endOffset: 3662 +- name: Notice Periods & Transition Planning for Freelancers + startOffset: 3662 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3662 + endOffset: 3929 +- name: Episode Wrap-up & Final Advice + startOffset: 3929 + url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3929 + endOffset: 3929 transcript: - header: Episode Opening & Dimitri’s Data Journey - header: Episode Opening & Dimitri’s Data Journey @@ -229,8 +298,8 @@ transcript: sec: 1469 time: '24:29' who: Alexey -- header: 'Job Board Insights: Rates, Top Skills & "Data Management"' -- header: 'Job Board Insights: Rates, Top Skills & "Data Management"' +- header: 'Job Board Insights: Rates, Top Skills & "Data Management" +- header: 'Job Board Insights: Rates, Top Skills & "Data Management" - line: It really depends on the skills you have. If you're a data analyst, you likely won't become a software engineer overnight—it takes time to learn new skills. I could pull up numbers on how many software engineering roles get filtered out, @@ -590,94 +659,17 @@ transcript: sec: 3929 time: '1:05:29' who: Alexey -intro: How do you move from employed data pro to a sustainable data freelancer who - consistently lands clients, prices services well, and uses AI to boost productivity? - In this episode, Dimitri Visnadi — an independent data consultant focused on data - strategy who’s worked with Unilever, Ferrero, Heineken and Red Bull, held roles - at HP and a Google-partnered firm, and holds a Masters in Business Analytics & Computer - Science from UCL — walks through a practical playbook for data freelancers.

- Dimitri covers job-tenure trends and freelancer types, when to sell expertise versus - problem-solving, and how to validate freelance viability with financial targets. - He explains how to land initial clients through recruiters and LinkedIn, the idea - behind a data-freelancer job board, market-driven specialization, and insights on - rates, top skills and data management. You’ll hear about scaling choices (lifestyle - business vs agency), AI tools for productivity (Claude, ChatGPT, Cursor), course - and community approaches for branding and marketing, subscription models and client - relationship management, high-impact small analyses, pricing strategies (hourly - vs packages), and transition planning.

Listen to get concrete guidance - on landing clients, setting prices, structuring offers, and using AI tools to increase - productivity as a freelance data consultant. -description: Master data freelancer tactics, pricing strategies and AI tools to land - clients, price services confidently, and boost productivity for higher income. -dateadded: '2025-07-28' -duration: PT01H05M29S -quotableClips: -- name: Episode Opening & Dimitri’s Data Journey - startOffset: 0 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=0 - endOffset: 341 -- name: Job Tenure Trends & Freelancer Types - startOffset: 341 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=341 - endOffset: 650 -- name: Expertise vs Problem-Solving in Freelance Work - startOffset: 650 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=650 - endOffset: 853 -- name: 'Validating Freelance Viability: Financial Targets' - startOffset: 853 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=853 - endOffset: 968 -- name: 'Landing Initial Clients: Recruiters & LinkedIn' - startOffset: 968 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=968 - endOffset: 1053 -- name: Market Trends & Building a Data-Freelancer Job Board - startOffset: 1053 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1053 - endOffset: 1431 -- name: Market-Driven Specialization & Starting Paths - startOffset: 1431 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1431 - endOffset: 1508 -- name: 'Job Board Insights: Rates, Top Skills & "Data Management"' - startOffset: 1508 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1508 - endOffset: 1968 -- name: 'Lifestyle Business vs Agency: Scaling Choices' - startOffset: 1968 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=1968 - endOffset: 2281 -- name: 'AI Tools for Productivity: Claude, ChatGPT, Cursor' - startOffset: 2281 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2281 - endOffset: 2730 -- name: 'Course & Community: Branding, Marketing, and Support' - startOffset: 2730 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2730 - endOffset: 2913 -- name: Subscription Model & Client Relationship Management - startOffset: 2913 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=2913 - endOffset: 3140 -- name: 'High-Impact Analytics: Small Analyses, Big Returns' - startOffset: 3140 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3140 - endOffset: 3407 -- name: 'Pricing Strategies: Hourly, Project Packages, and Transitioning' - startOffset: 3407 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3407 - endOffset: 3662 -- name: Notice Periods & Transition Planning for Freelancers - startOffset: 3662 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3662 - endOffset: 3929 -- name: Episode Wrap-up & Final Advice - startOffset: 3929 - url: https://www.youtube.com/watch?v=S93V8RgwBig&t=3929 - endOffset: 3929 ---- +context: 'Context: Through Dimitri’s journey and practical segments on job data, client + acquisition, pricing, AI tools, and scaling, the episode maps the real-world mechanics + of going freelance in data. + Core narrative: The unifying idea is that a sustainable, scalable freelance data + career is built by starting with market demand—validate financial targets, specialize + around high-impact problems, productize repeatable analytics offerings, leverage + productivity tools (including AI) to deliver efficiently, and adopt deliberate pricing + and client-retention models (projects, subscriptions, or agency paths) so you can + reliably land clients, capture value, and grow on your own terms.' +--- Links: * [Previous podcast episode](https://datatalks.club/podcast/s16e09-become-data-freelancer.html){:target="_blank"} diff --git a/_podcast/s14e04-data-access-management.md b/_podcast/data-governance-data-access-management.md similarity index 97% rename from _podcast/s14e04-data-access-management.md rename to _podcast/data-governance-data-access-management.md index cc1c5103..3740b6fe 100644 --- a/_podcast/s14e04-data-access-management.md +++ b/_podcast/data-governance-data-access-management.md @@ -1,19 +1,150 @@ --- +title: "Data Governance & Data Access Management: Access Controls, Data Catalogs & Access-as-Code" +short: "Data Governance & Data Access Management" +season: 14 episode: 4 guests: - bartvandekerckhove +image: images/podcast/data-governance-data-access-management.jpg ids: anchor: ow/datatalksclub/episodes/Data-Access-Management---Bart-Vandekerckhove-e253r4u youtube: IiPOIiUy5b4 -image: images/podcast/s14e04-data-access-management.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Data-Access-Management---Bart-Vandekerckhove-e253r4u apple: https://podcasts.apple.com/us/podcast/data-access-management-bart-vandekerckhove/id1541710331?i=1000615456026 spotify: https://open.spotify.com/episode/5PDgK1FsGNtKAAyiXOppRs?si=QZDP8k38Q0e4LaZtl4lCMA youtube: https://www.youtube.com/watch?v=IiPOIiUy5b4 -season: 14 -short: Data Access Management -title: 'Data Access Management: Access Controls, Data Catalogs & Access-as-Code' + +description: "Master Data Access Management with data catalog, access controls & access-as-code to stop privilege creep, speed investigations and ensure compliance" +intro: "How do you scale data access management—from access controls and data catalogs to access-as-code—without slowing innovation? In this episode, Bart Vandekerckhove, co-founder and CEO at Raito and former PM of Privacy at Collibra, walks through practical approaches born from consulting with banks (BCBS 239) and tackling early data governance pain.

We explore what effective data governance looks like for building trust in data, the differences between data catalogs, dictionaries and lineage, and how cloud consolidation and Chinese walls shape access management. Bart covers ownership models (data teams, governance teams, data mesh), common skill gaps for data engineers, and core processes: access requests, approvals, reviews and revocation. You’ll hear actionable tactics for preventing privilege creep—time-bound access, revocation workflows—and guidance on GDPR, privacy vs security roles, and debugging with temporary access.

Later segments dive into DataOps patterns (active metadata, automated tagging), avoiding role explosion, and the rise of access-as-code with Terraform and IAM. Listeners will gain a clear, incremental strategy for implementing access controls, leveraging data catalogs, and evaluating build vs buy or open source options to scale data access management." +topics: +- data governance +dateadded: 2023-06-03 + +duration: PT00H55M54S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=0 + endOffset: 87 +- name: 'Episode Overview: Data Access Management & Guest Summary' + startOffset: 87 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=87 + endOffset: 143 +- name: Guest Introduction & Career Path + startOffset: 143 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=143 + endOffset: 206 +- name: 'Consulting Background: Banks, BCBS 239 and data trauma' + startOffset: 206 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=206 + endOffset: 280 +- name: 'Early Data Governance Pain: Manual tools and outdated lineage' + startOffset: 280 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=280 + endOffset: 320 +- name: 'Defining Data Governance: Building trust in data' + startOffset: 320 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=320 + endOffset: 412 +- name: 'Legacy Governance Problems: Top-down models and friction' + startOffset: 412 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=412 + endOffset: 538 +- name: 'Data Catalogs, Dictionaries & Lineage: Purpose and differences' + startOffset: 538 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=538 + endOffset: 680 +- name: 'Data Access Management Defined: Cloud consolidation and Chinese walls' + startOffset: 680 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=680 + endOffset: 814 +- name: 'Ownership Models: Data teams, governance teams, and data mesh' + startOffset: 814 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=814 + endOffset: 887 +- name: 'Data Engineers & Access Requests: Skill gaps and role mismatch' + startOffset: 887 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=887 + endOffset: 1038 +- name: 'Governance Skillset: Change management and DMBOK guidance' + startOffset: 1038 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1038 + endOffset: 1188 +- name: 'Maturing Access Management: Incremental improvement and scaling' + startOffset: 1188 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1188 + endOffset: 1310 +- name: 'Learning Resources: Books, Slack communities, and conferences' + startOffset: 1310 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1310 + endOffset: 1383 +- name: 'When to Invest: Size, maturity signals, and trust erosion' + startOffset: 1383 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1383 + endOffset: 1505 +- name: Start with Access Controls Early for Sensitive Data + startOffset: 1505 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1505 + endOffset: 1669 +- name: 'Core Processes: Access requests, approvals, reviews, revocation' + startOffset: 1669 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1669 + endOffset: 1776 +- name: 'Churn Use Case: Catalog discovery, purpose-based access requests' + startOffset: 1776 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1776 + endOffset: 1928 +- name: 'Privilege Creep & Best Practices: Time-bound access and revocation' + startOffset: 1928 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1928 + endOffset: 2002 +- name: 'Regulatory Context: GDPR, privacy awareness, and EU perspective' + startOffset: 2002 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2002 + endOffset: 2135 +- name: 'Debugging in Production: Temporary access and investigation workflows' + startOffset: 2135 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2135 + endOffset: 2239 +- name: 'Privacy vs Security Stakeholders: DPO needs and CISO responsibilities' + startOffset: 2239 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2239 + endOffset: 2540 +- name: 'Data Mesh & Sensitive Data: Federated governance, masking, filtering' + startOffset: 2540 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2540 + endOffset: 2695 +- name: 'Avoiding Role Explosion: Role inheritance, reviews, and alerts' + startOffset: 2695 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2695 + endOffset: 2802 +- name: 'Governance in DataOps: Active metadata, automated tagging, and pipelines' + startOffset: 2802 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2802 + endOffset: 3008 +- name: 'Access-as-Code Beginnings: Terraform, IAM and early patterns' + startOffset: 3008 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3008 + endOffset: 3230 +- name: 'Build vs Buy: Maintenance cost, connector updates, key-person risk' + startOffset: 3230 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3230 + endOffset: 3282 +- name: 'Gradual Adoption Strategy: Visibility-first onboarding and automation' + startOffset: 3282 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3282 + endOffset: 3356 +- name: 'Open Source Options: Raito CLI, Terraform patterns and limitations' + startOffset: 3356 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3356 + endOffset: 3396 +- name: Closing Remarks & Next Steps + startOffset: 3396 + url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3396 + endOffset: 3354 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Data Access Management & Guest Summary' @@ -1052,147 +1183,6 @@ transcript: sec: 3441 time: '57:21' who: Alexey -description: Master Data Access Management with data catalog, access controls & access-as-code - to stop privilege creep, speed investigations and ensure compliance. -intro: 'How do you scale data access management—from access controls and data catalogs - to access-as-code—without slowing innovation? In this episode, Bart Vandekerckhove, - co-founder and CEO at Raito and former PM of Privacy at Collibra, walks through - practical approaches born from consulting with banks (BCBS 239) and tackling early - data governance pain.

We explore what effective data governance looks like - for building trust in data, the differences between data catalogs, dictionaries - and lineage, and how cloud consolidation and Chinese walls shape access management. - Bart covers ownership models (data teams, governance teams, data mesh), common skill - gaps for data engineers, and core processes: access requests, approvals, reviews - and revocation. You’ll hear actionable tactics for preventing privilege creep—time-bound - access, revocation workflows—and guidance on GDPR, privacy vs security roles, and - debugging with temporary access.

Later segments dive into DataOps patterns - (active metadata, automated tagging), avoiding role explosion, and the rise of access-as-code - with Terraform and IAM. Listeners will gain a clear, incremental strategy for implementing - access controls, leveraging data catalogs, and evaluating build vs buy or open source - options to scale data access management.' -dateadded: '2023-06-03' -duration: PT00H55M54S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=0 - endOffset: 87 -- name: 'Episode Overview: Data Access Management & Guest Summary' - startOffset: 87 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=87 - endOffset: 143 -- name: Guest Introduction & Career Path - startOffset: 143 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=143 - endOffset: 206 -- name: 'Consulting Background: Banks, BCBS 239 and data trauma' - startOffset: 206 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=206 - endOffset: 280 -- name: 'Early Data Governance Pain: Manual tools and outdated lineage' - startOffset: 280 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=280 - endOffset: 320 -- name: 'Defining Data Governance: Building trust in data' - startOffset: 320 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=320 - endOffset: 412 -- name: 'Legacy Governance Problems: Top-down models and friction' - startOffset: 412 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=412 - endOffset: 538 -- name: 'Data Catalogs, Dictionaries & Lineage: Purpose and differences' - startOffset: 538 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=538 - endOffset: 680 -- name: 'Data Access Management Defined: Cloud consolidation and Chinese walls' - startOffset: 680 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=680 - endOffset: 814 -- name: 'Ownership Models: Data teams, governance teams, and data mesh' - startOffset: 814 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=814 - endOffset: 887 -- name: 'Data Engineers & Access Requests: Skill gaps and role mismatch' - startOffset: 887 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=887 - endOffset: 1038 -- name: 'Governance Skillset: Change management and DMBOK guidance' - startOffset: 1038 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1038 - endOffset: 1188 -- name: 'Maturing Access Management: Incremental improvement and scaling' - startOffset: 1188 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1188 - endOffset: 1310 -- name: 'Learning Resources: Books, Slack communities, and conferences' - startOffset: 1310 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1310 - endOffset: 1383 -- name: 'When to Invest: Size, maturity signals, and trust erosion' - startOffset: 1383 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1383 - endOffset: 1505 -- name: Start with Access Controls Early for Sensitive Data - startOffset: 1505 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1505 - endOffset: 1669 -- name: 'Core Processes: Access requests, approvals, reviews, revocation' - startOffset: 1669 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1669 - endOffset: 1776 -- name: 'Churn Use Case: Catalog discovery, purpose-based access requests' - startOffset: 1776 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1776 - endOffset: 1928 -- name: 'Privilege Creep & Best Practices: Time-bound access and revocation' - startOffset: 1928 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=1928 - endOffset: 2002 -- name: 'Regulatory Context: GDPR, privacy awareness, and EU perspective' - startOffset: 2002 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2002 - endOffset: 2135 -- name: 'Debugging in Production: Temporary access and investigation workflows' - startOffset: 2135 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2135 - endOffset: 2239 -- name: 'Privacy vs Security Stakeholders: DPO needs and CISO responsibilities' - startOffset: 2239 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2239 - endOffset: 2540 -- name: 'Data Mesh & Sensitive Data: Federated governance, masking, filtering' - startOffset: 2540 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2540 - endOffset: 2695 -- name: 'Avoiding Role Explosion: Role inheritance, reviews, and alerts' - startOffset: 2695 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2695 - endOffset: 2802 -- name: 'Governance in DataOps: Active metadata, automated tagging, and pipelines' - startOffset: 2802 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=2802 - endOffset: 3008 -- name: 'Access-as-Code Beginnings: Terraform, IAM and early patterns' - startOffset: 3008 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3008 - endOffset: 3230 -- name: 'Build vs Buy: Maintenance cost, connector updates, key-person risk' - startOffset: 3230 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3230 - endOffset: 3282 -- name: 'Gradual Adoption Strategy: Visibility-first onboarding and automation' - startOffset: 3282 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3282 - endOffset: 3356 -- name: 'Open Source Options: Raito CLI, Terraform patterns and limitations' - startOffset: 3356 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3356 - endOffset: 3396 -- name: Closing Remarks & Next Steps - startOffset: 3396 - url: https://www.youtube.com/watch?v=IiPOIiUy5b4&t=3396 - endOffset: 3354 --- Links: diff --git a/_podcast/s06e02-non-technical-interviews.md b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md similarity index 99% rename from _podcast/s06e02-non-technical-interviews.md rename to _podcast/data-interview-behavioral-and-portfolio-prep-guide.md index c27d8f07..ef379075 100644 --- a/_podcast/s06e02-non-technical-interviews.md +++ b/_podcast/data-interview-behavioral-and-portfolio-prep-guide.md @@ -1,13 +1,11 @@ --- -title: 'Ace Data Interviews: Behavioral STARs, Case Strategy, Portfolios & Cold Emails' -short: Ace Non-Technical Data Science Interviews -guests: -- nicksingh -image: images/podcast/s06e02-non-technical-interviews.jpg -description: 'Master behavioral interviews & prep to break into data roles: build - an impact portfolio, use STAR stories, nail case interviews and cold emails.' +title: "Ace Data Interviews: Behavioral STARs, Case Strategy, Portfolios & Cold Emails" +short: "Ace Non-Technical Data Science Interviews" season: 6 episode: 2 +guests: +- nicksingh +image: images/podcast/data-interview-behavioral-and-portfolio-prep-guide.jpg ids: youtube: tRdLVUKU7Bo anchor: Ace-Non-Technical-Data-Science-Interviews---Nick-Singh-e1a5qtd @@ -16,6 +14,124 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Ace-Non-Technical-Data-Science-Interviews---Nick-Singh-e1a5qtd spotify: https://open.spotify.com/episode/7tO8GmqAcFUUk4fLqxEXy1 apple: https://podcasts.apple.com/us/podcast/ace-non-technical-data-science-interviews-nick-singh/id1541710331?i=1000541631687 + +description: "Master behavioral interviews & prep to break into data roles: build an impact portfolio, use STAR stories, nail case interviews and cold emails." +topics: +- data science +- machine learning +- MLOps +- product management +- job search +dateadded: 2021-11-12 + +duration: PT01H01M38S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=0 + endOffset: 118 +- name: 'Guest Overview: Nick Singh’s career and book' + startOffset: 118 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=118 + endOffset: 257 +- name: 'Career Coaching Focus: Helping candidates break into data roles' + startOffset: 257 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=257 + endOffset: 311 +- name: 'Hiring Process Breakdown: Screens, assessments, and panel interviews' + startOffset: 311 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=311 + endOffset: 417 +- name: 'Industry Trends: Why multiple interview rounds are common' + startOffset: 417 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=417 + endOffset: 538 +- name: 'Behavioral Interviews: Purpose and what interviewers seek' + startOffset: 538 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=538 + endOffset: 800 +- name: 'Behavioral Prep Method: Grid planning and STAR storytelling' + startOffset: 800 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=800 + endOffset: 1127 +- name: 'Practiced Delivery: Preparation without sounding scripted' + startOffset: 1127 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1127 + endOffset: 1185 +- name: 'Handling Tricky Prompts: Common pitfalls and recoveries' + startOffset: 1185 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1185 + endOffset: 1513 +- name: 'Project Walkthroughs: Detailing work while showing ownership' + startOffset: 1513 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1513 + endOffset: 1670 +- name: 'Lead with Impact: Pyramid principle for concise results-first stories' + startOffset: 1670 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1670 + endOffset: 1866 +- name: 'Business Context: Translating technical work into product value' + startOffset: 1866 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1866 + endOffset: 2039 +- name: 'Controlling Pacing: Avoiding rambling and burying the lead' + startOffset: 2039 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2039 + endOffset: 2238 +- name: 'Technical Claims: Only present models you can defend' + startOffset: 2238 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2238 + endOffset: 2297 +- name: 'Favorite-Model Strategy: Choose familiar, project-backed techniques' + startOffset: 2297 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2297 + endOffset: 2382 +- name: 'Portfolio Impact: Quantifying non-enterprise projects' + startOffset: 2382 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2382 + endOffset: 2471 +- name: 'Senior vs. Junior: Differences in behavioral and case expectations' + startOffset: 2471 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2471 + endOffset: 2667 +- name: 'Case Interview Approach: Clarify goals before proposing solutions' + startOffset: 2667 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2667 + endOffset: 2730 +- name: 'Product-Sense Interviews: Metrics, assumptions, and brainstorming' + startOffset: 2730 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2730 + endOffset: 2974 +- name: 'Metric Identification: Researching unfamiliar domains effectively' + startOffset: 2974 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2974 + endOffset: 3191 +- name: 'Company Context: Using reports and product knowledge to prepare' + startOffset: 3191 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3191 + endOffset: 3295 +- name: 'Read Tech Blogs: Learning production and architecture from case studies' + startOffset: 3295 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3295 + endOffset: 3506 +- name: 'Outreach Strategy: Cold emailing hiring managers and recruiters' + startOffset: 3506 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3506 + endOffset: 3659 +- name: 'Cold Email Examples: Showcasing projects with links and visuals' + startOffset: 3659 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3659 + endOffset: 3736 +- name: 'Final Tips and Resources: Book recommendations and next steps' + startOffset: 3736 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3736 + endOffset: 3767 +- name: 'Contact & Follow-up: NickSingh.com and LinkedIn' + startOffset: 3767 + url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3767 + endOffset: 3698 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Nick Singh’s career and book' @@ -1904,111 +2020,4 @@ transcript: sec: 3816 time: '1:03:36' who: Nick -dateadded: '2021-11-12' -duration: PT01H01M38S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=0 - endOffset: 118 -- name: 'Guest Overview: Nick Singh’s career and book' - startOffset: 118 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=118 - endOffset: 257 -- name: 'Career Coaching Focus: Helping candidates break into data roles' - startOffset: 257 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=257 - endOffset: 311 -- name: 'Hiring Process Breakdown: Screens, assessments, and panel interviews' - startOffset: 311 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=311 - endOffset: 417 -- name: 'Industry Trends: Why multiple interview rounds are common' - startOffset: 417 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=417 - endOffset: 538 -- name: 'Behavioral Interviews: Purpose and what interviewers seek' - startOffset: 538 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=538 - endOffset: 800 -- name: 'Behavioral Prep Method: Grid planning and STAR storytelling' - startOffset: 800 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=800 - endOffset: 1127 -- name: 'Practiced Delivery: Preparation without sounding scripted' - startOffset: 1127 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1127 - endOffset: 1185 -- name: 'Handling Tricky Prompts: Common pitfalls and recoveries' - startOffset: 1185 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1185 - endOffset: 1513 -- name: 'Project Walkthroughs: Detailing work while showing ownership' - startOffset: 1513 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1513 - endOffset: 1670 -- name: 'Lead with Impact: Pyramid principle for concise results-first stories' - startOffset: 1670 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1670 - endOffset: 1866 -- name: 'Business Context: Translating technical work into product value' - startOffset: 1866 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=1866 - endOffset: 2039 -- name: 'Controlling Pacing: Avoiding rambling and burying the lead' - startOffset: 2039 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2039 - endOffset: 2238 -- name: 'Technical Claims: Only present models you can defend' - startOffset: 2238 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2238 - endOffset: 2297 -- name: 'Favorite-Model Strategy: Choose familiar, project-backed techniques' - startOffset: 2297 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2297 - endOffset: 2382 -- name: 'Portfolio Impact: Quantifying non-enterprise projects' - startOffset: 2382 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2382 - endOffset: 2471 -- name: 'Senior vs. Junior: Differences in behavioral and case expectations' - startOffset: 2471 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2471 - endOffset: 2667 -- name: 'Case Interview Approach: Clarify goals before proposing solutions' - startOffset: 2667 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2667 - endOffset: 2730 -- name: 'Product-Sense Interviews: Metrics, assumptions, and brainstorming' - startOffset: 2730 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2730 - endOffset: 2974 -- name: 'Metric Identification: Researching unfamiliar domains effectively' - startOffset: 2974 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=2974 - endOffset: 3191 -- name: 'Company Context: Using reports and product knowledge to prepare' - startOffset: 3191 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3191 - endOffset: 3295 -- name: 'Read Tech Blogs: Learning production and architecture from case studies' - startOffset: 3295 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3295 - endOffset: 3506 -- name: 'Outreach Strategy: Cold emailing hiring managers and recruiters' - startOffset: 3506 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3506 - endOffset: 3659 -- name: 'Cold Email Examples: Showcasing projects with links and visuals' - startOffset: 3659 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3659 - endOffset: 3736 -- name: 'Final Tips and Resources: Book recommendations and next steps' - startOffset: 3736 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3736 - endOffset: 3767 -- name: 'Contact & Follow-up: NickSingh.com and LinkedIn' - startOffset: 3767 - url: https://www.youtube.com/watch?v=tRdLVUKU7Bo&t=3767 - endOffset: 3698 --- diff --git a/_podcast/s11e08-technical-writing-and-data-journalism.md b/_podcast/data-journalism-python-visualization-storytelling.md similarity index 96% rename from _podcast/s11e08-technical-writing-and-data-journalism.md rename to _podcast/data-journalism-python-visualization-storytelling.md index 21204a5f..87c5d052 100644 --- a/_podcast/s11e08-technical-writing-and-data-journalism.md +++ b/_podcast/data-journalism-python-visualization-storytelling.md @@ -1,20 +1,141 @@ --- +title: "Practical Data Journalism: Sourcing, Storytelling, Visualization & Tools (Python, Tableau)" +short: "Technical Writing and Data Journalism" +season: 11 episode: 8 guests: - angelicaloduca +image: images/podcast/data-journalism-python-visualization-storytelling.jpg ids: anchor: Technical-Writing-and-Data-Journalism---Angelica-Lo-Duca-e1r7j8k youtube: uO_lk12q02A -image: images/podcast/s11e08-technical-writing-and-data-journalism.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Technical-Writing-and-Data-Journalism---Angelica-Lo-Duca-e1r7j8k apple: https://podcasts.apple.com/us/podcast/technical-writing-and-data-journalism-angelica-lo-duca/id1541710331?i=1000587507530 spotify: https://open.spotify.com/episode/38b2Y9KgxSFlIHPZ3jqheK?si=SPiURO1bTamVKrKV_laVDQ youtube: https://www.youtube.com/watch?v=uO_lk12q02A -season: 11 -short: Technical Writing and Data Journalism -title: 'Practical Data Journalism: Sourcing, Storytelling, Visualization & Tools (Python, - Tableau)' + +description: "Discover data journalism: sourcing, storytelling & visualization with Python and Tableau—learn tools, workflows and publishing tips to craft compelling stories." +intro: "How do you transform raw data into compelling, trustworthy journalism that readers can understand and act upon? In this episode, Angelica Lo Duca—researcher at the Institute of Informatics and Telematics (CNR) and Data Journalism professor at the University of Pisa—shares practical frameworks for data journalism covering sourcing, storytelling, visualization, and essential tools like Python and Tableau.

Drawing from her journey through cryptography, web development, and data science, Angelica tackles real-world challenges: finding reliable small datasets on the web, working with query engines like Presto and Trino, and learning from investigative work like Washington Post projects. She distinguishes data journalism from data science, reveals teaching approaches for digital humanities students, and breaks down an effective writer's workflow: problem identification → solution development → clear results presentation, complete with code repositories and step-by-step clarity.

You'll gain concrete strategies for converting dense reports and survey PDFs into engaging narratives, visualization best practices (one concept per chart, choosing tables over confusing pie charts), tool selection guidance between Python scripting and Tableau, plus curated learning resources. Whether you're a journalist exploring data tools, a data professional interested in storytelling, or an educator teaching interdisciplinary skills, this episode delivers actionable methods for reliable sourcing, effective narrative construction, and clear data visualization that makes complex information accessible and impactful." +topics: +- data journalism +- data science +- data visualization +- tools +dateadded: 2022-11-26 + +duration: PT01H01M37S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=0 + endOffset: 113 +- name: 'Guest Introduction: Angelica Lo Duca, researcher & professor' + startOffset: 113 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=113 + endOffset: 149 +- name: 'Career Journey: Cryptography to Web Applications and Data Science' + startOffset: 149 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=149 + endOffset: 281 +- name: 'Data Engineering Research Interests: security and data integrity' + startOffset: 281 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=281 + endOffset: 357 +- name: 'Writing Portfolio: novels, technical articles, and Comet for Data Science' + startOffset: 357 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=357 + endOffset: 404 +- name: 'Query Engines: Presto, Trino, and real-world migrations' + startOffset: 404 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=404 + endOffset: 463 +- name: 'Defining Data Journalism: data-driven news vs. storytelling' + startOffset: 463 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=463 + endOffset: 481 +- name: 'Data Journalism vs Data Science: accuracy, methods, and scope' + startOffset: 481 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=481 + endOffset: 671 +- name: 'Investigative Examples: Washington Post and international projects' + startOffset: 671 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=671 + endOffset: 911 +- name: 'Data Sourcing Challenges: finding small, accurate datasets on the web' + startOffset: 911 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=911 + endOffset: 973 +- name: 'Teaching Shift: how Angelica started teaching data journalism' + startOffset: 973 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=973 + endOffset: 1129 +- name: 'Course Audience: digital humanities students and interdisciplinary skills' + startOffset: 1129 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1129 + endOffset: 1213 +- name: 'Tool Choices: Python scripting vs. Tableau for data journalism' + startOffset: 1213 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1213 + endOffset: 1283 +- name: 'Learning Resources: Coursera and recommended readings' + startOffset: 1283 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1283 + endOffset: 1475 +- name: 'Defining Technical Writing: how-to guides, clarity, and audience focus' + startOffset: 1475 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1475 + endOffset: 1759 +- name: 'From Reports to Stories: converting survey PDFs into narratives' + startOffset: 1759 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1759 + endOffset: 1945 +- name: 'Adding Context & Wisdom: framing data with meaning and calls to action' + startOffset: 1945 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1945 + endOffset: 2180 +- name: 'Visualization Guidelines: one concept per chart; tables when clearer' + startOffset: 2180 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2180 + endOffset: 2306 +- name: 'Visualization Pitfalls: why to avoid pie charts and confusing graphics' + startOffset: 2306 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2306 + endOffset: 2352 +- name: 'Article Length & Formats: short Medium posts and the Syntax Error publication' + startOffset: 2352 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2352 + endOffset: 2447 +- name: 'Article Workflow: problem → solution → result, with code repos' + startOffset: 2447 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2447 + endOffset: 2620 +- name: 'Topic Sourcing: personal problems, social media, and community signals' + startOffset: 2620 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2620 + endOffset: 2735 +- name: 'Path to a Book: publisher outreach and acquisition editor contact' + startOffset: 2735 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2735 + endOffset: 3019 +- name: 'Book Contract & Schedule: chapter timelines, pacing, and holidays' + startOffset: 3019 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3019 + endOffset: 3257 +- name: 'Market Research & Audience: proposal, state-of-the-art, and level targeting' + startOffset: 3257 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3257 + endOffset: 3609 +- name: 'Editing & Reviews: reviewer feedback, overlapping revisions, and organization' + startOffset: 3609 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3609 + endOffset: 3743 +- name: 'Episode Wrap-Up: final questions and closing remarks' + startOffset: 3743 + url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3743 + endOffset: 3697 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Angelica Lo Duca, researcher & professor' @@ -984,136 +1105,6 @@ transcript: sec: 3810 time: '1:03:30' who: Alexey -description: 'Discover data journalism: sourcing, storytelling & visualization with - Python and Tableau—learn tools, workflows and publishing tips to craft compelling - stories.' -intro: 'How do you turn messy, hard-to-find data into clear, accountable journalism? - In this episode, Angelica Lo Duca — researcher at the Institute of Informatics and - Telematics (CNR) and Data Journalism professor at the University of Pisa — walks - through practical approaches to data journalism: sourcing, storytelling, visualization, - and tools like Python and Tableau. Drawing on a career from cryptography to web - apps and data science, Angelica covers data sourcing challenges (including finding - small, accurate web datasets), query engines and migrations (Presto, Trino), and - examples of investigative projects such as work referenced from the Washington Post. - She contrasts data journalism with data science, explains teaching strategies for - digital humanities students, and outlines a writer’s workflow: problem → solution - → result, with code repositories and how-to clarity. Expect concrete guidance on - converting reports and survey PDFs into narratives, visualization rules (one concept - per chart; prefer tables when clearer; avoid confusing pie charts), tool choices - between Python scripting and Tableau, and curated learning resources. Listen to - learn actionable methods for reliable data sourcing, effective data storytelling, - and clean data visualization you can apply to reporting projects.' -dateadded: '2022-11-26' -duration: PT01H01M37S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=0 - endOffset: 113 -- name: 'Guest Introduction: Angelica Lo Duca, researcher & professor' - startOffset: 113 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=113 - endOffset: 149 -- name: 'Career Journey: Cryptography to Web Applications and Data Science' - startOffset: 149 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=149 - endOffset: 281 -- name: 'Data Engineering Research Interests: security and data integrity' - startOffset: 281 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=281 - endOffset: 357 -- name: 'Writing Portfolio: novels, technical articles, and Comet for Data Science' - startOffset: 357 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=357 - endOffset: 404 -- name: 'Query Engines: Presto, Trino, and real-world migrations' - startOffset: 404 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=404 - endOffset: 463 -- name: 'Defining Data Journalism: data-driven news vs. storytelling' - startOffset: 463 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=463 - endOffset: 481 -- name: 'Data Journalism vs Data Science: accuracy, methods, and scope' - startOffset: 481 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=481 - endOffset: 671 -- name: 'Investigative Examples: Washington Post and international projects' - startOffset: 671 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=671 - endOffset: 911 -- name: 'Data Sourcing Challenges: finding small, accurate datasets on the web' - startOffset: 911 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=911 - endOffset: 973 -- name: 'Teaching Shift: how Angelica started teaching data journalism' - startOffset: 973 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=973 - endOffset: 1129 -- name: 'Course Audience: digital humanities students and interdisciplinary skills' - startOffset: 1129 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1129 - endOffset: 1213 -- name: 'Tool Choices: Python scripting vs. Tableau for data journalism' - startOffset: 1213 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1213 - endOffset: 1283 -- name: 'Learning Resources: Coursera and recommended readings' - startOffset: 1283 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1283 - endOffset: 1475 -- name: 'Defining Technical Writing: how-to guides, clarity, and audience focus' - startOffset: 1475 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1475 - endOffset: 1759 -- name: 'From Reports to Stories: converting survey PDFs into narratives' - startOffset: 1759 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1759 - endOffset: 1945 -- name: 'Adding Context & Wisdom: framing data with meaning and calls to action' - startOffset: 1945 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=1945 - endOffset: 2180 -- name: 'Visualization Guidelines: one concept per chart; tables when clearer' - startOffset: 2180 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2180 - endOffset: 2306 -- name: 'Visualization Pitfalls: why to avoid pie charts and confusing graphics' - startOffset: 2306 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2306 - endOffset: 2352 -- name: 'Article Length & Formats: short Medium posts and the Syntax Error publication' - startOffset: 2352 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2352 - endOffset: 2447 -- name: 'Article Workflow: problem → solution → result, with code repos' - startOffset: 2447 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2447 - endOffset: 2620 -- name: 'Topic Sourcing: personal problems, social media, and community signals' - startOffset: 2620 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2620 - endOffset: 2735 -- name: 'Path to a Book: publisher outreach and acquisition editor contact' - startOffset: 2735 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=2735 - endOffset: 3019 -- name: 'Book Contract & Schedule: chapter timelines, pacing, and holidays' - startOffset: 3019 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3019 - endOffset: 3257 -- name: 'Market Research & Audience: proposal, state-of-the-art, and level targeting' - startOffset: 3257 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3257 - endOffset: 3609 -- name: 'Editing & Reviews: reviewer feedback, overlapping revisions, and organization' - startOffset: 3609 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3609 - endOffset: 3743 -- name: 'Episode Wrap-Up: final questions and closing remarks' - startOffset: 3743 - url: https://www.youtube.com/watch?v=uO_lk12q02A&t=3743 - endOffset: 3697 --- Links: diff --git a/_podcast/s18e01-inclusive-data-leadership-coaching.md b/_podcast/data-leadership-coaching.md similarity index 96% rename from _podcast/s18e01-inclusive-data-leadership-coaching.md rename to _podcast/data-leadership-coaching.md index ede42bf4..3a733c96 100644 --- a/_podcast/s18e01-inclusive-data-leadership-coaching.md +++ b/_podcast/data-leadership-coaching.md @@ -1,23 +1,134 @@ --- +title: "Data Leadership Coaching: Transition to Manager, Stakeholder Skills and Team Impact" +short: "Inclusive Data Leadership Coaching" +season: 18 episode: 1 guests: - terezaiofciu +image: images/podcast/data-leadership-coaching.jpg ids: - anchor: 'on has three major components, this includes the main AI framework which - is the “TermAIte”, the main database, and the mobile application. These three - components work together to process the data that is inputted into the system - by the user. The data includes the images of the wood and the different environmental - conditions readings: temperature, humidity, and wood moisture. ' youtube: Z4vOTgzLkJQ -image: images/podcast/s18e01-inclusive-data-leadership-coaching.jpg links: apple: https://podcasts.apple.com/us/podcast/inclusive-data-leadership-coaching-tereza-iofciu/id1541710331?i=1000650865043 spotify: https://open.spotify.com/episode/3zVzlQ0NmAVCtaFQXbqvHE?si=sSZhU-KXRamv2x5YZCDxAg youtube: https://www.youtube.com/watch?v=Z4vOTgzLkJQ -season: 18 -short: Inclusive Data Leadership Coaching -title: 'Data Leadership Coaching: Transition to Manager, Feedback Skills & Influencing - Without Authority' +description: "Master data leadership coaching: transition to manager, build stakeholder skills, and boost team impact with feedback, visibility, and influence strategies." +topics: +- leadership +- career transition +- communication +- team building +- data strategy +intro: "How do you move from a strong individual contributor into a data leader who can influence stakeholders, grow team impact, and build inclusive practices? In this episode Tereza Iofciu—data science manager, data scientist, data engineer, product manager, coach and community organizer—walks through her transition from a PhD in computer science to leading teams and running data leadership coaching.

We cover the practical challenges of the manager transition, experiments that shaped her coaching approach, and ways to scale manager bandwidth using the “pizza” span-of-control metaphor. Tereza breaks down feedback skills, psychological safety, and routines for team feedback training, plus leadership learning through workshops and frameworks. You’ll hear actionable guidance on increasing impact and promotions, making foundational data work visible with product mindsets and KPIs, and influencing without authority by framing projects to stakeholders’ priorities. The conversation also explores cross-functional and inclusive leadership, self-promotion versus bragging, and concrete coaching formats like one-shot sessions, CV reviews, and Calendly-driven delivery.

Listen if you want practical data leadership coaching on managing the IC-to-manager shift, stakeholder skills, and boosting your team’s measurable impact." +dateadded: 2024-03-31 +duration: PT00H56M35S +quotableClips: +- name: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) + startOffset: 86 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=86 + endOffset: 161 +- name: 'Career Journey: From Computer Science PhD to Data Lead and Coach' + startOffset: 161 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=161 + endOffset: 224 +- name: 'Transition to Coaching: Stepping back from product responsibility' + startOffset: 224 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=224 + endOffset: 377 +- name: 'Career Shift: Challenges of moving from Individual Contributor to Lead' + startOffset: 377 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=377 + endOffset: 472 +- name: 'Early Coaching Experiments: Free sessions to learn real problems' + startOffset: 472 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=472 + endOffset: 555 +- name: 'Manager Bandwidth Limits: Need for independent problem-solving' + startOffset: 555 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=555 + endOffset: 689 +- name: 'Community Format: Python Pizza conference and newcomer talks' + startOffset: 689 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=689 + endOffset: 758 +- name: 'Team Span-of-Control: Pizza metaphor for sustainable management' + startOffset: 758 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=758 + endOffset: 854 +- name: 'Leadership Learning: Courses, awareness, and building feedback culture' + startOffset: 854 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=854 + endOffset: 1022 +- name: 'Leadership Training vs. Self-Study: Practical workshops and frameworks' + startOffset: 1022 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1022 + endOffset: 1183 +- name: 'Feedback Skills: Giving constructive feedback without hurting relationships' + startOffset: 1183 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1183 + endOffset: 1218 +- name: 'Team Feedback Training: Psychological safety and practiced routines' + startOffset: 1218 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1218 + endOffset: 1387 +- name: 'Coaching Focus: Increasing impact, promotions, and strategic mindset' + startOffset: 1387 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1387 + endOffset: 1472 +- name: 'Data Work Visibility: Foundation work, product mindset, and KPIs' + startOffset: 1472 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1472 + endOffset: 1710 +- name: 'Coaching Delivery: LinkedIn, Calendly, one-shot sessions, and CV reviews' + startOffset: 1710 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1710 + endOffset: 1959 +- name: 'Side Projects & Partnerships: PyPodcats, Shades & Contrast, Responsible AI' + startOffset: 1959 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1959 + endOffset: 2078 +- name: 'Coaching Approach: Blending coaching, mentoring, and practical advice' + startOffset: 2078 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2078 + endOffset: 2174 +- name: 'Self-Promotion vs. Bragging: CV culture and owning achievements' + startOffset: 2174 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2174 + endOffset: 2313 +- name: 'Personal Retrospectives: Tracking wins and the two-year rule for topics' + startOffset: 2313 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2313 + endOffset: 2618 +- name: 'Communication Overhead: Behind-the-scenes work for models and open source' + startOffset: 2618 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2618 + endOffset: 2760 +- name: 'Influencing Without Authority: Speaking different work languages & active + listening' + startOffset: 2760 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2760 + endOffset: 2960 +- name: 'Stakeholder Framing: Connecting projects to what''s important for others' + startOffset: 2960 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2960 + endOffset: 3023 +- name: 'Empathy in Practice: Role perspective-taking for better collaboration' + startOffset: 3023 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3023 + endOffset: 3230 +- name: 'Cross-Functional Leadership: Emotional intelligence and people care' + startOffset: 3230 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3230 + endOffset: 3264 +- name: 'Inclusive Leadership: Defining inclusion, avoiding exclusivity, and cultural + diversity' + startOffset: 3264 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3264 + endOffset: 3468 +- name: 'Closing & Contact: How to reach Tereza, Calendly and further resources' + startOffset: 3468 + url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3468 + endOffset: 3395 transcript: - header: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) @@ -1096,137 +1207,12 @@ transcript: sec: 3481 time: '58:01' who: Tereza -description: Learn data leadership, feedback skills and influencing without authority - to transition to manager, increase impact and lead cross-functional teams. -intro: How do you move from an individual contributor to an effective data leader - while coaching teams, giving constructive feedback, and influencing without formal - authority? In this episode, Tereza Iofciu—an experienced data practitioner who has - worked as a data scientist, data engineer, product manager, leads a coaching team, - and teaches data science at neuefische—walks through the practical challenges of - that transition. She shares her career journey from a computer science PhD to data - lead and coach, early coaching experiments, and why managers need teammates who - can solve problems independently.

Key topics include transition-to-manager - tactics, building feedback skills and psychological safety, designing sustainable - team span-of-control (the “pizza” metaphor), making foundational data work visible - with product-minded KPIs, and influencing without authority through stakeholder - framing, active listening, and empathy. Tereza also covers coaching delivery formats—one-shot - sessions, CV reviews, and community initiatives like PyLadies and conference newcomer - talks—and how to blend coaching, mentoring, and practical advice.

If you’re - stepping into a lead role or coaching data teams, listen for actionable frameworks, - feedback routines, and inclusive leadership practices to increase impact, visibility, - and promotion readiness. Closing notes include how to reach Tereza and schedule - time via Calendly. -dateadded: '2024-03-31' -duration: PT00H56M35S -quotableClips: -- name: Episode Introduction & Guest Re-introduction (Inclusive Data Leadership Coaching) - startOffset: 86 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=86 - endOffset: 161 -- name: 'Career Journey: From Computer Science PhD to Data Lead and Coach' - startOffset: 161 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=161 - endOffset: 224 -- name: 'Transition to Coaching: Stepping back from product responsibility' - startOffset: 224 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=224 - endOffset: 377 -- name: 'Career Shift: Challenges of moving from Individual Contributor to Lead' - startOffset: 377 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=377 - endOffset: 472 -- name: 'Early Coaching Experiments: Free sessions to learn real problems' - startOffset: 472 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=472 - endOffset: 555 -- name: 'Manager Bandwidth Limits: Need for independent problem-solving' - startOffset: 555 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=555 - endOffset: 689 -- name: 'Community Format: Python Pizza conference and newcomer talks' - startOffset: 689 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=689 - endOffset: 758 -- name: 'Team Span-of-Control: Pizza metaphor for sustainable management' - startOffset: 758 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=758 - endOffset: 854 -- name: 'Leadership Learning: Courses, awareness, and building feedback culture' - startOffset: 854 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=854 - endOffset: 1022 -- name: 'Leadership Training vs. Self-Study: Practical workshops and frameworks' - startOffset: 1022 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1022 - endOffset: 1183 -- name: 'Feedback Skills: Giving constructive feedback without hurting relationships' - startOffset: 1183 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1183 - endOffset: 1218 -- name: 'Team Feedback Training: Psychological safety and practiced routines' - startOffset: 1218 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1218 - endOffset: 1387 -- name: 'Coaching Focus: Increasing impact, promotions, and strategic mindset' - startOffset: 1387 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1387 - endOffset: 1472 -- name: 'Data Work Visibility: Foundation work, product mindset, and KPIs' - startOffset: 1472 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1472 - endOffset: 1710 -- name: 'Coaching Delivery: LinkedIn, Calendly, one-shot sessions, and CV reviews' - startOffset: 1710 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1710 - endOffset: 1959 -- name: 'Side Projects & Partnerships: PyPodcats, Shades & Contrast, Responsible AI' - startOffset: 1959 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=1959 - endOffset: 2078 -- name: 'Coaching Approach: Blending coaching, mentoring, and practical advice' - startOffset: 2078 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2078 - endOffset: 2174 -- name: 'Self-Promotion vs. Bragging: CV culture and owning achievements' - startOffset: 2174 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2174 - endOffset: 2313 -- name: 'Personal Retrospectives: Tracking wins and the two-year rule for topics' - startOffset: 2313 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2313 - endOffset: 2618 -- name: 'Communication Overhead: Behind-the-scenes work for models and open source' - startOffset: 2618 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2618 - endOffset: 2760 -- name: 'Influencing Without Authority: Speaking different work languages & active - listening' - startOffset: 2760 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2760 - endOffset: 2960 -- name: 'Stakeholder Framing: Connecting projects to what''s important for others' - startOffset: 2960 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=2960 - endOffset: 3023 -- name: 'Empathy in Practice: Role perspective-taking for better collaboration' - startOffset: 3023 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3023 - endOffset: 3230 -- name: 'Cross-Functional Leadership: Emotional intelligence and people care' - startOffset: 3230 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3230 - endOffset: 3264 -- name: 'Inclusive Leadership: Defining inclusion, avoiding exclusivity, and cultural - diversity' - startOffset: 3264 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3264 - endOffset: 3468 -- name: 'Closing & Contact: How to reach Tereza, Calendly and further resources' - startOffset: 3468 - url: https://www.youtube.com/watch?v=Z4vOTgzLkJQ&t=3468 - endOffset: 3395 +context: 'Helping technical professionals—especially data practitioners—become high-impact, + inclusive leaders by combining mindset shifts and practical routines: making invisible + technical work visible with a product/value lens, cultivating psychological safety + and feedback skills, managing sustainable team scope, and using empathetic communication + and stakeholder framing to influence across functions.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/tereza-iofciu/){:target="_blank"} diff --git a/_podcast/s03e08-data-led-professional.md b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md similarity index 96% rename from _podcast/s03e08-data-led-professional.md rename to _podcast/data-led-growth-event-tracking-and-reverse-etl.md index 87d93ec0..b9c8c3fa 100644 --- a/_podcast/s03e08-data-led-professional.md +++ b/_podcast/data-led-growth-event-tracking-and-reverse-etl.md @@ -1,12 +1,11 @@ --- -title: 'How to Build a Data-Led Growth Stack: Event Tracking, Tracking Plans & Reverse - ETL' -short: Becoming a Data-led Professional -guests: -- arpitchoudhury -image: images/podcast/s03e08-data-led-professional.jpg +title: "How to Build a Data-Led Growth Stack: Event Tracking, Tracking Plans & Reverse ETL" +short: "Becoming a Data-led Professional" season: 3 episode: 8 +guests: +- arpitchoudhury +image: images/podcast/data-led-growth-event-tracking-and-reverse-etl.jpg ids: youtube: 8v5KpHWgyYw anchor: Becoming-a-Data-led-Professional---Arpit-Choudhury-e11mkgq @@ -15,6 +14,124 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-led-Professional---Arpit-Choudhury-e11mkgq spotify: https://open.spotify.com/episode/2hg3Gi3h5OfdedXENwZwnU apple: https://podcasts.apple.com/us/podcast/becoming-a-data-led-professional-arpit-choudhury/id1541710331?i=1000523422699 + +description: "Build a data-led growth stack with event tracking, tracking plans & Reverse ETL to activate product data for precise experimentation, personalization, and ops" +intro: "How do you design a data-led growth stack that reliably powers personalization, activation, and operational workflows? In this episode, Arpit Choudhury, founder of Data-led Academy, walks through the practical steps of building a data-led growth stack focused on event tracking, documented tracking plans, and reverse ETL.

Arpit — who runs Data-led Academy to teach data skills for non-technical and technical teams alike — breaks down the full data flow: collection (client- vs server-side events), storage (warehouses like Snowflake, BigQuery, Redshift), transformation (DBT), analysis (product analytics), and activation (reverse ETL to support, sales, and engagement tools). He covers how to create tracking plans and instrument events (signup, project created, invite, invoice), common tooling (Segment, RudderStack, MetaRouter, Freshpaint, AVO, Iteratively, TrackPlan), and reverse ETL platforms (Census, Hightouch, Grouparoo). You’ll also hear trade-offs around CDPs versus warehouse-centric approaches, buy vs build decisions, and the team roles and documentation practices needed to democratize data.

Listen to learn concrete patterns for event tracking, tracking-plan ownership, anomaly investigation, and activating product data to drive growth without sacrificing data quality." +topics: +- data engineering +- tools +dateadded: 2021-05-29 + +duration: PT01H21S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=0 + endOffset: 141 +- name: 'DataLed Academy: free learning, repository & podcast' + startOffset: 141 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=141 + endOffset: 306 +- name: 'Career trajectory: integrations, Integromat & community growth' + startOffset: 306 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=306 + endOffset: 441 +- name: 'Growth marketing: A/B testing, personalization & product data' + startOffset: 441 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=441 + endOffset: 586 +- name: 'Marketer tooling: visual queries and self-serve data access' + startOffset: 586 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=586 + endOffset: 645 +- name: 'Definition: data-led professional — source awareness & data skepticism' + startOffset: 645 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=645 + endOffset: 693 +- name: 'Data-led vs. data-driven: balancing data, intuition & bias' + startOffset: 693 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=693 + endOffset: 814 +- name: 'Tracking plan & instrumentation: documenting events, properties & ownership' + startOffset: 814 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=814 + endOffset: 1107 +- name: 'Anomaly investigation: tracing event origins and fake signups' + startOffset: 1107 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1107 + endOffset: 1247 +- name: 'Collaborative tracking tools: AVO, Iteratively, TrackPlan' + startOffset: 1247 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1247 + endOffset: 1370 +- name: 'Data flow overview: collection, storage, analysis and activation' + startOffset: 1370 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1370 + endOffset: 1483 +- name: 'Event examples for SaaS: signup, project created, invite, invoice' + startOffset: 1483 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1483 + endOffset: 1620 +- name: 'Client-side vs. server-side events: timing, accuracy and use cases' + startOffset: 1620 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1620 + endOffset: 1732 +- name: 'Data warehousing & transformation: warehouses, DBT and BI analysis' + startOffset: 1732 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1732 + endOffset: 1803 +- name: 'Data activation: sending event data to support, sales and engagement tools' + startOffset: 1803 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1803 + endOffset: 2021 +- name: 'Data collection platforms: Segment, RudderStack, MetaRouter, Freshpaint' + startOffset: 2021 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2021 + endOffset: 2127 +- name: 'Warehouse-centric analytics: Snowflake, BigQuery, Redshift & warehouse-first + tools' + startOffset: 2127 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2127 + endOffset: 2245 +- name: 'Reverse ETL / operational analytics: Census, HighTouch, Grouparoo' + startOffset: 2245 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2245 + endOffset: 2300 +- name: 'Customer Data Platforms (CDP): all-in-one trade-offs for marketers' + startOffset: 2300 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2300 + endOffset: 2490 +- name: 'Modern data stack for growth: CDI, product analytics, warehouse & reverse + ETL' + startOffset: 2490 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2490 + endOffset: 2630 +- name: 'Buy vs. build: cost, maintenance and open-source alternatives' + startOffset: 2630 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2630 + endOffset: 2773 +- name: 'Team composition: data engineer, analyst, analytics engineer & product ops' + startOffset: 2773 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2773 + endOffset: 3100 +- name: 'Data democratization: data literacy, documentation & self-serve analytics' + startOffset: 3100 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3100 + endOffset: 3228 +- name: 'Motivating documentation: culture, early habits & catalog tools' + startOffset: 3228 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3228 + endOffset: 3368 +- name: 'Product-led vs. data-led: activation events and personalized onboarding' + startOffset: 3368 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3368 + endOffset: 3629 +- name: 'Closing & resources: dataled.academy, newsletter and podcast episodes' + startOffset: 3629 + url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3629 + endOffset: 3621 + transcript: - header: Podcast Introduction - header: 'DataLed Academy: free learning, repository & podcast' @@ -897,131 +1014,4 @@ transcript: sec: 3762 time: '1:02:42' who: Alexey -description: Build a data-led growth stack with event tracking, tracking plans & Reverse - ETL to activate product data for precise experimentation, personalization, and ops. -intro: 'How do you design a data-led growth stack that reliably powers personalization, - activation, and operational workflows? In this episode, Arpit Choudhury, founder - of Data-led Academy, walks through the practical steps of building a data-led growth - stack focused on event tracking, documented tracking plans, and reverse ETL.

- Arpit — who runs Data-led Academy to teach data skills for non-technical and technical - teams alike — breaks down the full data flow: collection (client- vs server-side - events), storage (warehouses like Snowflake, BigQuery, Redshift), transformation - (DBT), analysis (product analytics), and activation (reverse ETL to support, sales, - and engagement tools). He covers how to create tracking plans and instrument events - (signup, project created, invite, invoice), common tooling (Segment, RudderStack, - MetaRouter, Freshpaint, AVO, Iteratively, TrackPlan), and reverse ETL platforms - (Census, Hightouch, Grouparoo). You’ll also hear trade-offs around CDPs versus warehouse-centric - approaches, buy vs build decisions, and the team roles and documentation practices - needed to democratize data.

Listen to learn concrete patterns for event - tracking, tracking-plan ownership, anomaly investigation, and activating product - data to drive growth without sacrificing data quality.' -dateadded: '2021-05-29' -duration: PT01H21S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=0 - endOffset: 141 -- name: 'DataLed Academy: free learning, repository & podcast' - startOffset: 141 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=141 - endOffset: 306 -- name: 'Career trajectory: integrations, Integromat & community growth' - startOffset: 306 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=306 - endOffset: 441 -- name: 'Growth marketing: A/B testing, personalization & product data' - startOffset: 441 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=441 - endOffset: 586 -- name: 'Marketer tooling: visual queries and self-serve data access' - startOffset: 586 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=586 - endOffset: 645 -- name: 'Definition: data-led professional — source awareness & data skepticism' - startOffset: 645 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=645 - endOffset: 693 -- name: 'Data-led vs. data-driven: balancing data, intuition & bias' - startOffset: 693 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=693 - endOffset: 814 -- name: 'Tracking plan & instrumentation: documenting events, properties & ownership' - startOffset: 814 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=814 - endOffset: 1107 -- name: 'Anomaly investigation: tracing event origins and fake signups' - startOffset: 1107 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1107 - endOffset: 1247 -- name: 'Collaborative tracking tools: AVO, Iteratively, TrackPlan' - startOffset: 1247 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1247 - endOffset: 1370 -- name: 'Data flow overview: collection, storage, analysis and activation' - startOffset: 1370 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1370 - endOffset: 1483 -- name: 'Event examples for SaaS: signup, project created, invite, invoice' - startOffset: 1483 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1483 - endOffset: 1620 -- name: 'Client-side vs. server-side events: timing, accuracy and use cases' - startOffset: 1620 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1620 - endOffset: 1732 -- name: 'Data warehousing & transformation: warehouses, DBT and BI analysis' - startOffset: 1732 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1732 - endOffset: 1803 -- name: 'Data activation: sending event data to support, sales and engagement tools' - startOffset: 1803 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=1803 - endOffset: 2021 -- name: 'Data collection platforms: Segment, RudderStack, MetaRouter, Freshpaint' - startOffset: 2021 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2021 - endOffset: 2127 -- name: 'Warehouse-centric analytics: Snowflake, BigQuery, Redshift & warehouse-first - tools' - startOffset: 2127 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2127 - endOffset: 2245 -- name: 'Reverse ETL / operational analytics: Census, HighTouch, Grouparoo' - startOffset: 2245 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2245 - endOffset: 2300 -- name: 'Customer Data Platforms (CDP): all-in-one trade-offs for marketers' - startOffset: 2300 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2300 - endOffset: 2490 -- name: 'Modern data stack for growth: CDI, product analytics, warehouse & reverse - ETL' - startOffset: 2490 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2490 - endOffset: 2630 -- name: 'Buy vs. build: cost, maintenance and open-source alternatives' - startOffset: 2630 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2630 - endOffset: 2773 -- name: 'Team composition: data engineer, analyst, analytics engineer & product ops' - startOffset: 2773 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=2773 - endOffset: 3100 -- name: 'Data democratization: data literacy, documentation & self-serve analytics' - startOffset: 3100 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3100 - endOffset: 3228 -- name: 'Motivating documentation: culture, early habits & catalog tools' - startOffset: 3228 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3228 - endOffset: 3368 -- name: 'Product-led vs. data-led: activation events and personalized onboarding' - startOffset: 3368 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3368 - endOffset: 3629 -- name: 'Closing & resources: dataled.academy, newsletter and podcast episodes' - startOffset: 3629 - url: https://www.youtube.com/watch?v=8v5KpHWgyYw&t=3629 - endOffset: 3621 --- diff --git a/_podcast/s10e06-data-mesh-101.md b/_podcast/data-mesh-architecture-decentralized-data-products.md similarity index 97% rename from _podcast/s10e06-data-mesh-101.md rename to _podcast/data-mesh-architecture-decentralized-data-products.md index f5c58553..31107916 100644 --- a/_podcast/s10e06-data-mesh-101.md +++ b/_podcast/data-mesh-architecture-decentralized-data-products.md @@ -1,20 +1,131 @@ --- +title: "Data Mesh Implementation: Build Decentralized Data Products, Contracts & Federated Governance" +short: "Data Mesh 101" +season: 10 episode: 6 guests: - zhamakdehghani +image: images/podcast/data-mesh-architecture-decentralized-data-products.jpg ids: anchor: Data-Mesh-101---Zhamak-Dehghani-e1n7vlk youtube: 346N_pCtYZU -image: images/podcast/s10e06-data-mesh-101.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Mesh-101---Zhamak-Dehghani-e1n7vlk apple: https://podcasts.apple.com/us/podcast/data-mesh-101-zhamak-dehghani/id1541710331?i=1000578193372 spotify: https://open.spotify.com/episode/5uX5sfRPvC9WAXOM9fRCup?si=FQYB7cpuSOyzq7022xU3Tg youtube: https://www.youtube.com/watch?v=346N_pCtYZU -season: 10 -short: Data Mesh 101 -title: 'Data Mesh Implementation: Build Decentralized Data Products, Contracts & Federated - Governance' + +description: "Discover Data Mesh strategies, data contracts and federated governance to build decentralized data products, improve data quality, and scale adoption" +intro: "How do you scale data architecture so teams deliver value without centralized bottlenecks? In this episode, Zhamak Dehghani — director of technology at Thoughtworks and founder of the Data Mesh concept — walks through practical steps for Data Mesh implementation: building decentralized data products, defining data contracts, and establishing federated governance.

We cover why enterprises face long pipelines to value and how a socio-technical, domain-oriented approach decouples pipelines with clear contracts and ownership. Zhamak explains the mesh-as-graph view, streaming examples of domain producers/consumers and schemas, and the maturity spectrum from tight warehouse schemas to loose coupling. You’ll hear about minimal guarantees and metadata for discoverability, decentralized interoperability (identity and auth), and how to define data product contracts (quality, SLAs, ownership).

The episode also digs into self-serve data platforms, platform federation with shared standards, governance primitives such as retention and automated validation, and an adoption roadmap including assessment, pilots, and executive buy-in. Listeners will gain concrete guidance on applying Data Mesh principles, designing data products and contracts, and operationalizing federated governance in their organizations." +topics: +- data mesh +- data engineering +dateadded: 2022-09-02 + +duration: PT00H59M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 144 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=144 + endOffset: 159 +- name: Guest background & career path + startOffset: 159 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=159 + endOffset: 197 +- name: 'From firmware to distributed systems: career highlights' + startOffset: 197 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=197 + endOffset: 382 +- name: 'Consulting practice: building data platforms and products' + startOffset: 382 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=382 + endOffset: 455 +- name: 'Enterprise data friction: long pipelines to value' + startOffset: 455 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=455 + endOffset: 589 +- name: Data Mesh concept and core motivation + startOffset: 589 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=589 + endOffset: 596 +- name: 'Decentralized socio-technical approach: autonomy and interoperability' + startOffset: 596 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=596 + endOffset: 800 +- name: 'Architectural shift: decoupling pipelines and data contracts' + startOffset: 800 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=800 + endOffset: 895 +- name: 'Mesh as a graph: interconnectivity and value exchange' + startOffset: 895 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=895 + endOffset: 994 +- name: Domain-oriented ownership and team alignment + startOffset: 994 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=994 + endOffset: 1030 +- name: 'Streaming example: domain producers, consumers, and schemas' + startOffset: 1030 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1030 + endOffset: 1345 +- name: 'Maturity spectrum: warehouse schemas versus loose coupling' + startOffset: 1345 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1345 + endOffset: 1578 +- name: Optimizing for humans vs machines; federated queries and compute + startOffset: 1578 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1578 + endOffset: 1865 +- name: Minimal guarantees and metadata for discoverability + startOffset: 1865 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1865 + endOffset: 1924 +- name: 'Decentralized interoperability: standard seams, identity, and auth' + startOffset: 1924 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1924 + endOffset: 2076 +- name: 'Data as a product: consumer-first guarantees and KPIs' + startOffset: 2076 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2076 + endOffset: 2376 +- name: 'Data product contracts: quality, SLAs, and ownership decisions' + startOffset: 2376 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2376 + endOffset: 2518 +- name: 'Self-serve data platform: developer experience and abstractions' + startOffset: 2518 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2518 + endOffset: 2855 +- name: 'Platform federation: multiple platforms with shared standards' + startOffset: 2855 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2855 + endOffset: 2965 +- name: 'Federated governance: policies, automation, and enforcement' + startOffset: 2965 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2965 + endOffset: 3182 +- name: 'Governance primitives: retention, metadata, and automated validation' + startOffset: 3182 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3182 + endOffset: 3288 +- name: 'Core understanding: why, what, and how of Data Mesh' + startOffset: 3288 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3288 + endOffset: 3447 +- name: 'Adoption roadmap: assessment, pilots, and executive buy-in' + startOffset: 3447 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3447 + endOffset: 3603 +- name: 'Implementations and case studies: community resources' + startOffset: 3603 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3603 + endOffset: 3717 +- name: Episode wrap-up and closing remarks + startOffset: 3717 + url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3717 + endOffset: 3595 + transcript: - header: Podcast Introduction - line: This week we will talk about Data Mesh. We have a special guest today, Zhamak. @@ -1009,127 +1120,6 @@ transcript: sec: 3739 time: '1:02:19' who: Zhamak -description: Discover Data Mesh strategies, data contracts and federated governance - to build decentralized data products, improve data quality, and scale adoption. -intro: 'How do you scale data architecture so teams deliver value without centralized - bottlenecks? In this episode, Zhamak Dehghani — director of technology at Thoughtworks - and founder of the Data Mesh concept — walks through practical steps for Data Mesh - implementation: building decentralized data products, defining data contracts, and - establishing federated governance.

We cover why enterprises face long pipelines - to value and how a socio-technical, domain-oriented approach decouples pipelines - with clear contracts and ownership. Zhamak explains the mesh-as-graph view, streaming - examples of domain producers/consumers and schemas, and the maturity spectrum from - tight warehouse schemas to loose coupling. You’ll hear about minimal guarantees - and metadata for discoverability, decentralized interoperability (identity and auth), - and how to define data product contracts (quality, SLAs, ownership).

The - episode also digs into self-serve data platforms, platform federation with shared - standards, governance primitives such as retention and automated validation, and - an adoption roadmap including assessment, pilots, and executive buy-in. Listeners - will gain concrete guidance on applying Data Mesh principles, designing data products - and contracts, and operationalizing federated governance in their organizations.' -dateadded: '2022-09-02' -duration: PT00H59M55S -quotableClips: -- name: Podcast Introduction - startOffset: 144 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=144 - endOffset: 159 -- name: Guest background & career path - startOffset: 159 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=159 - endOffset: 197 -- name: 'From firmware to distributed systems: career highlights' - startOffset: 197 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=197 - endOffset: 382 -- name: 'Consulting practice: building data platforms and products' - startOffset: 382 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=382 - endOffset: 455 -- name: 'Enterprise data friction: long pipelines to value' - startOffset: 455 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=455 - endOffset: 589 -- name: Data Mesh concept and core motivation - startOffset: 589 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=589 - endOffset: 596 -- name: 'Decentralized socio-technical approach: autonomy and interoperability' - startOffset: 596 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=596 - endOffset: 800 -- name: 'Architectural shift: decoupling pipelines and data contracts' - startOffset: 800 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=800 - endOffset: 895 -- name: 'Mesh as a graph: interconnectivity and value exchange' - startOffset: 895 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=895 - endOffset: 994 -- name: Domain-oriented ownership and team alignment - startOffset: 994 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=994 - endOffset: 1030 -- name: 'Streaming example: domain producers, consumers, and schemas' - startOffset: 1030 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1030 - endOffset: 1345 -- name: 'Maturity spectrum: warehouse schemas versus loose coupling' - startOffset: 1345 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1345 - endOffset: 1578 -- name: Optimizing for humans vs machines; federated queries and compute - startOffset: 1578 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1578 - endOffset: 1865 -- name: Minimal guarantees and metadata for discoverability - startOffset: 1865 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1865 - endOffset: 1924 -- name: 'Decentralized interoperability: standard seams, identity, and auth' - startOffset: 1924 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=1924 - endOffset: 2076 -- name: 'Data as a product: consumer-first guarantees and KPIs' - startOffset: 2076 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2076 - endOffset: 2376 -- name: 'Data product contracts: quality, SLAs, and ownership decisions' - startOffset: 2376 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2376 - endOffset: 2518 -- name: 'Self-serve data platform: developer experience and abstractions' - startOffset: 2518 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2518 - endOffset: 2855 -- name: 'Platform federation: multiple platforms with shared standards' - startOffset: 2855 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2855 - endOffset: 2965 -- name: 'Federated governance: policies, automation, and enforcement' - startOffset: 2965 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=2965 - endOffset: 3182 -- name: 'Governance primitives: retention, metadata, and automated validation' - startOffset: 3182 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3182 - endOffset: 3288 -- name: 'Core understanding: why, what, and how of Data Mesh' - startOffset: 3288 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3288 - endOffset: 3447 -- name: 'Adoption roadmap: assessment, pilots, and executive buy-in' - startOffset: 3447 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3447 - endOffset: 3603 -- name: 'Implementations and case studies: community resources' - startOffset: 3603 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3603 - endOffset: 3717 -- name: Episode wrap-up and closing remarks - startOffset: 3717 - url: https://www.youtube.com/watch?v=346N_pCtYZU&t=3717 - endOffset: 3595 --- Links: diff --git a/_podcast/s14e02-practical-data-privacy.md b/_podcast/data-privacy-engineering-gdpr-machine-learning.md similarity index 96% rename from _podcast/s14e02-practical-data-privacy.md rename to _podcast/data-privacy-engineering-gdpr-machine-learning.md index c1fe9696..b6e14d12 100644 --- a/_podcast/s14e02-practical-data-privacy.md +++ b/_podcast/data-privacy-engineering-gdpr-machine-learning.md @@ -1,20 +1,118 @@ --- +title: "Data Privacy Playbook: Differential Privacy, Federated Learning, PETs & Consent UX" +short: "Practical Data Privacy" +season: 14 episode: 2 guests: - katharinejarmul +image: images/podcast/data-privacy-engineering-gdpr-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Practical-Data-Privacy---Katharine-Jarmul-e23u551 youtube: gbjoFfrm4iw -image: images/podcast/s14e02-practical-data-privacy.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Practical-Data-Privacy---Katharine-Jarmul-e23u551 apple: https://podcasts.apple.com/us/podcast/practical-data-privacy-katharine-jarmul/id1541710331?i=1000613701646 spotify: https://open.spotify.com/episode/137H2M9qU5lFqb4hLyMBvg?si=b0KXeubVSpa3bfsuZaS6pQ youtube: https://www.youtube.com/watch?v=gbjoFfrm4iw -season: 14 -short: Practical Data Privacy -title: 'Data Privacy Playbook: Differential Privacy, Federated Learning, PETs & Consent - UX' + +description: "Discover differential privacy, federated learning and PETs - privacy engineering, consent UX fixes and compliance to reduce re-identification risk" +intro: "How can teams build useful machine learning while respecting user privacy, compliance, and re-identification risk? In this episode, Katharine Jarmul — privacy activist and Principal Data Scientist at ThoughtWorks Germany — walks through a practical Data Privacy Playbook focused on differential privacy, federated learning, privacy-enhancing technologies (PETs) and consent UX.

Katharine draws on a career from data journalism and NLP to startup work at KI Protect and enterprise ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies for pseudonymisation, encrypted ML and federated architectures. We cover consent and opt-out UX, legal vs technical definitions of privacy, profiling and fingerprinting risks, and privacy-friendly personalization like session-based intent and ephemeral inference.

You’ll get concrete takeaways: why differential privacy matters (formal definition, use cases, Tumult and other libraries), common anonymization pitfalls (hashing, k-anonymity, Netflix lessons), how PETs fit into system design, and generative AI privacy considerations including retention and localized model deployment. Listeners leave with actionable guidance on privacy engineering, data minimization, consent design, and resources to continue learning." +topics: +- data governance +- data privacy +- machine learning +- federated learning +dateadded: 2023-05-20 + +duration: PT01H01M28S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=0 + endOffset: 100 +- name: 'Guest Introduction: Katharine Jarmul — privacy activist, ML engineer, ThoughtWorks, + book' + startOffset: 100 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=100 + endOffset: 152 +- name: 'Career Journey: data journalism, NLP, consulting, and machine learning' + startOffset: 152 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=152 + endOffset: 548 +- name: 'Startup Focus: KI Protect, pseudonymisation, encrypted & federated ML' + startOffset: 548 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=548 + endOffset: 693 +- name: 'Privacy Regulation Overview: GDPR, CCPA, CPRA and cookie consent defaults' + startOffset: 693 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=693 + endOffset: 875 +- name: 'Cookie Consent & Opt-Out UX: one-click rejects and user behavior' + startOffset: 875 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=875 + endOffset: 984 +- name: 'Defining Data Privacy: legal, social, and technical perspectives' + startOffset: 984 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=984 + endOffset: 1295 +- name: 'Practical Data Privacy (book): availability, previews, and giveaways' + startOffset: 1295 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1295 + endOffset: 1358 +- name: 'Bridging Legal & Technical Views: privacy risk, translation, and collaboration' + startOffset: 1358 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1358 + endOffset: 1512 +- name: 'User Profiling & Fingerprinting: browser history, apps, and re-identification + risks' + startOffset: 1512 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1512 + endOffset: 1815 +- name: 'Privacy-Friendly Personalization: session-based intent and ephemeral inference' + startOffset: 1815 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1815 + endOffset: 1988 +- name: 'Privacy Engineering & PETs: encrypted ML, federated learning, and architecture' + startOffset: 1988 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1988 + endOffset: 2109 +- name: 'Business Case for Privacy: risk management, regulation, and customer trust' + startOffset: 2109 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2109 + endOffset: 2450 +- name: 'Differential Privacy Explained: formal definition, use cases, and libraries + (Tumult)' + startOffset: 2450 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2450 + endOffset: 2708 +- name: 'Anonymization Pitfalls: hashing, k-anonymity, Netflix de-anonymization lessons' + startOffset: 2708 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2708 + endOffset: 2820 +- name: 'Designing for Privacy: consent, data minimization, and workflow practices' + startOffset: 2820 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2820 + endOffset: 3155 +- name: 'Generative AI & Privacy: ChatGPT incidents, consent, retention, and enterprise + options' + startOffset: 3155 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3155 + endOffset: 3569 +- name: 'Deploying Localized Models: Azure localization, fine-tuning, and ownership' + startOffset: 3569 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3569 + endOffset: 3675 +- name: 'Further Learning: Probably Private newsletter, notebooks, and differential + privacy resources' + startOffset: 3675 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3675 + endOffset: 3764 +- name: 'Episode Close: final notes, social links, and next steps' + startOffset: 3764 + url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3764 + endOffset: 3688 + transcript: - header: Episode Introduction - header: 'Guest Introduction: Katharine Jarmul — privacy activist, ML engineer, ThoughtWorks, @@ -305,7 +403,7 @@ transcript: sec: 872 time: '14:32' who: Alexey -- header: 'Cookie Consent & Opt‑Out UX: one‑click rejects and user behavior' +- header: 'Cookie Consent & Opt-Out UX: one-click rejects and user behavior' - line: Because I think I've seen how the collected data gets shared and used in advertising optimization and I'm not a big fan of personalized advertising myself. I find it to be annoying. So I'd rather not participate if I can opt out. But I think @@ -551,7 +649,7 @@ transcript: sec: 1511 time: '25:11' who: Katharine -- header: 'User Profiling & Fingerprinting: browser history, apps, and re‑identification +- header: 'User Profiling & Fingerprinting: browser history, apps, and re-identification risks' - line: We talked about this website – about data engineering, right? [Katharine agrees] So maybe you can use the online advertisements as an example. I visit a website @@ -636,7 +734,7 @@ transcript: sec: 1787 time: '29:47' who: Alexey -- header: 'Privacy‑Friendly Personalization: session‑based intent and ephemeral inference' +- header: 'Privacy-Friendly Personalization: session-based intent and ephemeral inference' - line: Yeah. One of the things that I'm excited to see is that I think there's a lot more thinking through intent-based recommendation, so “What is the intent of the user? And how do we improve?” The first time I noticed it was, I think, @@ -908,7 +1006,7 @@ transcript: sec: 2676 time: '44:36' who: Alexey -- header: 'Anonymization Pitfalls: hashing, k‑anonymity, Netflix de‑anonymization +- header: 'Anonymization Pitfalls: hashing, k-anonymity, Netflix de-anonymization lessons' - line: Those are kind of what I would call more “old school” methods of anonymization. People might have also heard about K-anonymity, which is another one of what I @@ -1198,7 +1296,7 @@ transcript: sec: 3525 time: '58:45' who: Alexey -- header: 'Deploying Localized Models: Azure localization, fine‑tuning, and ownership' +- header: 'Deploying Localized Models: Azure localization, fine-tuning, and ownership' - line: Yeah. But, again, I don't think the responsibility should ever lie on the user. You're doing exactly the way the product is designed to be used. It is not your fault that it's not thought through. How you could be like, “Oh, hey. For @@ -1268,112 +1366,6 @@ transcript: sec: 3788 time: '1:03:08' who: Katharine -description: Discover differential privacy, federated learning and PETs - privacy - engineering, consent UX fixes and compliance to reduce re-identification risk. -intro: 'How can teams build useful machine learning while respecting user privacy, - compliance, and re‑identification risk? In this episode, Katharine Jarmul — privacy - activist and Principal Data Scientist at ThoughtWorks Germany — walks through a - practical Data Privacy Playbook focused on differential privacy, federated learning, - privacy‑enhancing technologies (PETs) and consent UX.

Katharine draws on - a career from data journalism and NLP to startup work at KI Protect and enterprise - ML, explaining GDPR/CCPA/CPRA implications, cookie consent defaults, and strategies - for pseudonymisation, encrypted ML and federated architectures. We cover consent - and opt‑out UX, legal vs technical definitions of privacy, profiling and fingerprinting - risks, and privacy‑friendly personalization like session‑based intent and ephemeral - inference.

You’ll get concrete takeaways: why differential privacy matters - (formal definition, use cases, Tumult and other libraries), common anonymization - pitfalls (hashing, k‑anonymity, Netflix lessons), how PETs fit into system design, - and generative AI privacy considerations including retention and localized model - deployment. Listeners leave with actionable guidance on privacy engineering, data - minimization, consent design, and resources to continue learning.' -dateadded: '2023-05-20' -duration: PT01H01M28S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=0 - endOffset: 100 -- name: 'Guest Introduction: Katharine Jarmul — privacy activist, ML engineer, ThoughtWorks, - book' - startOffset: 100 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=100 - endOffset: 152 -- name: 'Career Journey: data journalism, NLP, consulting, and machine learning' - startOffset: 152 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=152 - endOffset: 548 -- name: 'Startup Focus: KI Protect, pseudonymisation, encrypted & federated ML' - startOffset: 548 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=548 - endOffset: 693 -- name: 'Privacy Regulation Overview: GDPR, CCPA, CPRA and cookie consent defaults' - startOffset: 693 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=693 - endOffset: 875 -- name: 'Cookie Consent & Opt‑Out UX: one‑click rejects and user behavior' - startOffset: 875 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=875 - endOffset: 984 -- name: 'Defining Data Privacy: legal, social, and technical perspectives' - startOffset: 984 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=984 - endOffset: 1295 -- name: 'Practical Data Privacy (book): availability, previews, and giveaways' - startOffset: 1295 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1295 - endOffset: 1358 -- name: 'Bridging Legal & Technical Views: privacy risk, translation, and collaboration' - startOffset: 1358 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1358 - endOffset: 1512 -- name: 'User Profiling & Fingerprinting: browser history, apps, and re‑identification - risks' - startOffset: 1512 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1512 - endOffset: 1815 -- name: 'Privacy‑Friendly Personalization: session‑based intent and ephemeral inference' - startOffset: 1815 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1815 - endOffset: 1988 -- name: 'Privacy Engineering & PETs: encrypted ML, federated learning, and architecture' - startOffset: 1988 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=1988 - endOffset: 2109 -- name: 'Business Case for Privacy: risk management, regulation, and customer trust' - startOffset: 2109 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2109 - endOffset: 2450 -- name: 'Differential Privacy Explained: formal definition, use cases, and libraries - (Tumult)' - startOffset: 2450 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2450 - endOffset: 2708 -- name: 'Anonymization Pitfalls: hashing, k‑anonymity, Netflix de‑anonymization lessons' - startOffset: 2708 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2708 - endOffset: 2820 -- name: 'Designing for Privacy: consent, data minimization, and workflow practices' - startOffset: 2820 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=2820 - endOffset: 3155 -- name: 'Generative AI & Privacy: ChatGPT incidents, consent, retention, and enterprise - options' - startOffset: 3155 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3155 - endOffset: 3569 -- name: 'Deploying Localized Models: Azure localization, fine‑tuning, and ownership' - startOffset: 3569 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3569 - endOffset: 3675 -- name: 'Further Learning: Probably Private newsletter, notebooks, and differential - privacy resources' - startOffset: 3675 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3675 - endOffset: 3764 -- name: 'Episode Close: final notes, social links, and next steps' - startOffset: 3764 - url: https://www.youtube.com/watch?v=gbjoFfrm4iw&t=3764 - endOffset: 3688 --- Links: diff --git a/_podcast/s12e02-business-skills-for-data-professionals.md b/_podcast/data-professionals-business-skills-in-saas.md similarity index 97% rename from _podcast/s12e02-business-skills-for-data-professionals.md rename to _podcast/data-professionals-business-skills-in-saas.md index 9a30f296..acb484df 100644 --- a/_podcast/s12e02-business-skills-for-data-professionals.md +++ b/_podcast/data-professionals-business-skills-in-saas.md @@ -1,20 +1,130 @@ --- +title: "Practical Skills for Data Professionals in SaaS: Bridging the Gap between Data and Business" +short: "Practical Skills for Data Professionals in SaaS" +season: 12 episode: 2 guests: - lorismarini +image: images/podcast/data-professionals-business-skills-in-saas.jpg ids: anchor: Business-Skills-for-Data-Professionals---Loris-Marini-e1s89hu youtube: xMYRUiTu960 -image: images/podcast/s12e02-business-skills-for-data-professionals.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Business-Skills-for-Data-Professionals---Loris-Marini-e1s89hu apple: https://podcasts.apple.com/us/podcast/business-skills-for-data-professionals-loris-marini/id1541710331?i=1000590422440 spotify: https://open.spotify.com/episode/5tw3qs1XHETDPYrxdEaVbK?si=QIclWOT_QhKhIGrcl-KQXg youtube: https://www.youtube.com/watch?v=xMYRUiTu960 -season: 12 -short: Business Skills for Data Professionals -title: 'Practical Data Science for SaaS: Deployments, Marketing Automation, Metrics - & Storytelling' + +description: "Discover practical data science for SaaS: deploy ML, build marketing automation, define metrics and reduce churn—stakeholder tactics, tooling, and storytelling insights." +topics: +- data science +- MLOps +- communication +- tools +- career transition +intro: "How do you move data science from experiments to measurable impact in a SaaS business? In this episode, Loris Marini — CEO and founder of Discovering Data and host of the Discovering Data podcast — walks through practical approaches to deploying models, building marketing automation, and turning metrics into persuasive stories.

Loris covers production challenges for model deployment in SaaS, a marketing automation use case (recommendations and reporting), and how applied research like reinforcement learning maps to real problems. We dig into semantic alignment — defining "customer" and core metrics — plus lead indicators, stickiness, churn, and causal thinking for product metrics. Loris also shares tactics for onboarding stakeholders: stakeholder mapping, CRM-style context capture, meeting immersion, and Notion-based note systems. He emphasizes pragmatic tools (Excel, pivots), prioritizing high-connectivity opportunities, and a conversation-first diagnostic before ML. Finally, learn data storytelling techniques, building trust through active listening and business literacy, and where to find further resources and community.

Listen to gain concrete strategies for model deployment, marketing automation, measurement, and communicating data-driven outcomes in SaaS." +dateadded: 2022-12-17 + +duration: PT01H15S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=0 + endOffset: 102 +- name: 'Guest Background: From Physics to Data Science' + startOffset: 102 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=102 + endOffset: 165 +- name: 'Early Data Role: Research Skills Applied in a Startup' + startOffset: 165 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=165 + endOffset: 291 +- name: 'Production Challenges: Deploying Models in a SaaS' + startOffset: 291 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=291 + endOffset: 378 +- name: 'Marketing Automation Use Case: Recommendations & Reporting' + startOffset: 378 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=378 + endOffset: 510 +- name: 'Applied Research: Reinforcement Learning to Practical Problems' + startOffset: 510 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=510 + endOffset: 739 +- name: 'Semantic Alignment: Defining "Customer" and Core Metrics' + startOffset: 739 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=739 + endOffset: 946 +- name: 'Lead Indicators & Stickiness: Churn and Causal Thinking' + startOffset: 946 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=946 + endOffset: 1080 +- name: 'Context & Semantics: Cross-Functional Meaning in Data' + startOffset: 1080 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1080 + endOffset: 1306 +- name: 'Data Storytelling: Marketing Techniques for Memorable Communication' + startOffset: 1306 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1306 + endOffset: 1553 +- name: 'Building Trust: Active Listening and Business Literacy' + startOffset: 1553 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1553 + endOffset: 1675 +- name: 'Onboarding Strategy: Stakeholder Mapping and Prioritization' + startOffset: 1675 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1675 + endOffset: 2120 +- name: 'Stakeholder CRM: Capturing Names, Roles, and Context' + startOffset: 2120 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2120 + endOffset: 2271 +- name: 'Meeting Immersion: Learning Business Language by Attendance' + startOffset: 2271 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2271 + endOffset: 2493 +- name: 'Note Systems: Using Notion to Track Meetings and Key Activities' + startOffset: 2493 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2493 + endOffset: 2622 +- name: 'Tooling & IP Considerations: Personal Knowledge vs Company Systems' + startOffset: 2622 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2622 + endOffset: 2713 +- name: 'Prioritization: Choosing Projects by Stakeholder Impact' + startOffset: 2713 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2713 + endOffset: 2830 +- name: 'Opportunity Selection: Finding High-Connectivity Data Projects' + startOffset: 2830 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2830 + endOffset: 3061 +- name: 'Pragmatism in Tools: Excel, Pivot Tables, and Rapid Experiments' + startOffset: 3061 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3061 + endOffset: 3188 +- name: 'Conversation First: Description and Diagnostic Before ML' + startOffset: 3188 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3188 + endOffset: 3373 +- name: 'Presenting Online: Podcasting, Pauses, and Audio Practices' + startOffset: 3373 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3373 + endOffset: 3515 +- name: 'Resources: Discovering Data Podcast for Business Skills' + startOffset: 3515 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3515 + endOffset: 3633 +- name: 'Community Building: Joining the Discovering Data Discord' + startOffset: 3633 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3633 + endOffset: 3683 +- name: Episode Wrap-Up and Contact Links + startOffset: 3683 + url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3683 + endOffset: 3615 + transcript: - header: Podcast Introduction - header: 'Guest Background: From Physics to Data Science' @@ -1122,125 +1232,6 @@ transcript: sec: 3717 time: '1:01:57' who: Loris -description: 'Discover practical data science for SaaS: deploy ML, build marketing - automation, define metrics and reduce churn—stakeholder tactics, tooling, and storytelling - insights.' -intro: 'How do you move data science from experiments to measurable impact in a SaaS - business? In this episode, Loris Marini — CEO and founder of Discovering Data and - host of the Discovering Data podcast — walks through practical approaches to deploying - models, building marketing automation, and turning metrics into persuasive stories. -

Drawing on a journey "from physics to data science," Loris covers production - challenges for model deployment in SaaS, a marketing automation use case (recommendations - and reporting), and how applied research like reinforcement learning maps to real - problems. We dig into semantic alignment — defining "customer" and core metrics - — plus lead indicators, stickiness, churn, and causal thinking for product metrics. - Loris also shares tactics for onboarding stakeholders: stakeholder mapping, CRM-style - context capture, meeting immersion, and Notion-based note systems. He emphasizes - pragmatic tools (Excel, pivots), prioritizing high-connectivity opportunities, and - a conversation-first diagnostic before ML. Finally, learn data storytelling techniques, - building trust through active listening and business literacy, and where to find - further resources and community.

Listen to gain concrete strategies for - model deployment, marketing automation, measurement, and communicating data-driven - outcomes in SaaS.' -dateadded: '2022-12-17' -duration: PT01H15S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=0 - endOffset: 102 -- name: 'Guest Background: From Physics to Data Science' - startOffset: 102 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=102 - endOffset: 165 -- name: 'Early Data Role: Research Skills Applied in a Startup' - startOffset: 165 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=165 - endOffset: 291 -- name: 'Production Challenges: Deploying Models in a SaaS' - startOffset: 291 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=291 - endOffset: 378 -- name: 'Marketing Automation Use Case: Recommendations & Reporting' - startOffset: 378 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=378 - endOffset: 510 -- name: 'Applied Research: Reinforcement Learning to Practical Problems' - startOffset: 510 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=510 - endOffset: 739 -- name: 'Semantic Alignment: Defining "Customer" and Core Metrics' - startOffset: 739 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=739 - endOffset: 946 -- name: 'Lead Indicators & Stickiness: Churn and Causal Thinking' - startOffset: 946 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=946 - endOffset: 1080 -- name: 'Context & Semantics: Cross-Functional Meaning in Data' - startOffset: 1080 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1080 - endOffset: 1306 -- name: 'Data Storytelling: Marketing Techniques for Memorable Communication' - startOffset: 1306 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1306 - endOffset: 1553 -- name: 'Building Trust: Active Listening and Business Literacy' - startOffset: 1553 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1553 - endOffset: 1675 -- name: 'Onboarding Strategy: Stakeholder Mapping and Prioritization' - startOffset: 1675 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=1675 - endOffset: 2120 -- name: 'Stakeholder CRM: Capturing Names, Roles, and Context' - startOffset: 2120 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2120 - endOffset: 2271 -- name: 'Meeting Immersion: Learning Business Language by Attendance' - startOffset: 2271 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2271 - endOffset: 2493 -- name: 'Note Systems: Using Notion to Track Meetings and Key Activities' - startOffset: 2493 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2493 - endOffset: 2622 -- name: 'Tooling & IP Considerations: Personal Knowledge vs Company Systems' - startOffset: 2622 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2622 - endOffset: 2713 -- name: 'Prioritization: Choosing Projects by Stakeholder Impact' - startOffset: 2713 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2713 - endOffset: 2830 -- name: 'Opportunity Selection: Finding High-Connectivity Data Projects' - startOffset: 2830 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=2830 - endOffset: 3061 -- name: 'Pragmatism in Tools: Excel, Pivot Tables, and Rapid Experiments' - startOffset: 3061 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3061 - endOffset: 3188 -- name: 'Conversation First: Description and Diagnostic Before ML' - startOffset: 3188 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3188 - endOffset: 3373 -- name: 'Presenting Online: Podcasting, Pauses, and Audio Practices' - startOffset: 3373 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3373 - endOffset: 3515 -- name: 'Resources: Discovering Data Podcast for Business Skills' - startOffset: 3515 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3515 - endOffset: 3633 -- name: 'Community Building: Joining the Discovering Data Discord' - startOffset: 3633 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3633 - endOffset: 3683 -- name: Episode Wrap-Up and Contact Links - startOffset: 3683 - url: https://www.youtube.com/watch?v=xMYRUiTu960&t=3683 - endOffset: 3615 --- Links: diff --git a/_podcast/s03e03-data-observability.md b/_podcast/data-quality-data-observability-data-reliability.md similarity index 97% rename from _podcast/s03e03-data-observability.md rename to _podcast/data-quality-data-observability-data-reliability.md index ce57ed06..79356839 100644 --- a/_podcast/s03e03-data-observability.md +++ b/_podcast/data-quality-data-observability-data-reliability.md @@ -1,12 +1,11 @@ --- -title: 'Data Observability Explained: 5 Pillars to Prevent Downtime, Drift & False - Positives' -short: 'Data Observability: The Next Frontier of Data Engineering' -guests: -- barrmoses -image: images/podcast/s03e03-data-observability.jpg +title: "Data Observability Explained: 5 Pillars to Prevent Downtime, Drift & False Positives" +short: "Data Observability: The Next Frontier of Data Engineering" season: 3 episode: 3 +guests: +- barrmoses +image: images/podcast/data-quality-data-observability-data-reliability.jpg ids: youtube: TrMG1SOqZkQ anchor: Data-Observability---Barr-Moses-evghmh @@ -15,6 +14,115 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Observability---Barr-Moses-evghmh spotify: https://open.spotify.com/episode/48QcLAw2I1apC1jeo8e1sd apple: https://podcasts.apple.com/us/podcast/data-observability-barr-moses/id1541710331?i=1000518351217 + +description: "Discover data observability, freshness, lineage and schema detection to prevent downtime, stop model drift and cut false positives in pipelines" +intro: "How do you prevent data downtime, drift, and false positives before they break analytics and models? In this episode, Barr Moses, CEO and co-founder of Monte Carlo and former VP of Customer Operations at Gainsight, walks through a practical framework for data observability grounded in real-world incidents and DevOps principles.

Barr explains why batch data needs different approaches than app monitoring and outlines the Five Pillars of Data Observability—freshness, volume, distribution, schema, and lineage. You’ll hear a schema-change case study, learn how silent failures and model drift occur, and how to move from monitoring to true observability for faster root cause analysis using correlation, logs, and lineage. The conversation covers accountability models (RACI), defining and automating data SLAs, operational runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria for end-to-end platforms versus point tools.

Listeners will get actionable guidance on reducing false positives, prioritizing pipeline fixes, implementing auto lineage, and applying anomaly detection with contextual alerts—practical steps to improve data quality, reliability, and observability across cloud-agnostic environments" +topics: +- MLOps +- data observability +dateadded: 2021-04-24 + +duration: PT01H01M50S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=0 + endOffset: 108 +- name: 'Guest Profile: Barr Moses — career, GainSight, Monte Carlo' + startOffset: 108 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=108 + endOffset: 275 +- name: 'Market Gap: Data downtime impact on analytics teams' + startOffset: 275 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=275 + endOffset: 416 +- name: 'Observability Origins: DevOps pillars (metrics, logs, traces)' + startOffset: 416 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=416 + endOffset: 589 +- name: 'Batch Data Challenges: Why data observability differs from app monitoring' + startOffset: 589 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=589 + endOffset: 820 +- name: 'Silent Failures: Invisible data quality incidents and model drift' + startOffset: 820 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=820 + endOffset: 998 +- name: 'Five Pillars of Data Observability: Freshness, Volume, Distribution, Schema, + Lineage' + startOffset: 998 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=998 + endOffset: 1150 +- name: 'Schema Change Case Study: Downstream breakage and missed notifications' + startOffset: 1150 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1150 + endOffset: 1317 +- name: 'Good Pipelines, Bad Data: Need for engineering and data observability' + startOffset: 1317 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1317 + endOffset: 1471 +- name: 'Monitoring vs Observability: Detection versus diagnosis' + startOffset: 1471 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1471 + endOffset: 1564 +- name: 'Root Cause Analysis: Correlation, logs, lineage for triage' + startOffset: 1564 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1564 + endOffset: 1740 +- name: 'Accountability Models: RACI for data ownership and communication' + startOffset: 1740 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1740 + endOffset: 2124 +- name: 'Data SLAs: Defining timeliness and prioritizing pipeline fixes' + startOffset: 2124 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2124 + endOffset: 2294 +- name: 'SLA Automation: Inferring thresholds from historical data' + startOffset: 2294 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2294 + endOffset: 2463 +- name: 'Operational Runbooks: Playbooks and remediation workflows' + startOffset: 2463 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2463 + endOffset: 2580 +- name: 'Maturity Curve: Reactive → Proactive → Automated → Scalable' + startOffset: 2580 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2580 + endOffset: 2820 +- name: 'Platform Criteria: End-to-end integration and reducing false positives' + startOffset: 2820 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2820 + endOffset: 2992 +- name: 'Open Source Landscape: Point tools versus holistic observability' + startOffset: 2992 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2992 + endOffset: 3052 +- name: 'Test-Driven Data Development: Tests, DBT checks, and limitations' + startOffset: 3052 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3052 + endOffset: 3263 +- name: 'Cloud Agnosticism: Integrations across AWS, GCP, Snowflake' + startOffset: 3263 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3263 + endOffset: 3417 +- name: 'Centralized Governance: Observability across distributed environments' + startOffset: 3417 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3417 + endOffset: 3531 +- name: 'Auto Lineage: Detecting upstream and downstream data impact' + startOffset: 3531 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3531 + endOffset: 3627 +- name: 'Anomalies vs Bad Data: Contextual alerts and reducing false positives' + startOffset: 3627 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3627 + endOffset: 3770 +- name: Closing Remarks & Contact Resources (Monte Carlo, links, Slack) + startOffset: 3770 + url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3770 + endOffset: 3710 + transcript: - header: Podcast Introduction - header: 'Guest Profile: Barr Moses — career, GainSight, Monte Carlo' @@ -976,123 +1084,6 @@ transcript: sec: 3818 time: '1:03:38' who: Barr -description: Discover data observability, freshness, lineage and schema detection - to prevent downtime, stop model drift and cut false positives in pipelines. -intro: How do you prevent data downtime, drift, and false positives before they break - analytics and models? In this episode, Barr Moses, CEO and co‑founder of Monte Carlo - and former VP of Customer Operations at Gainsight, walks through a practical framework - for data observability grounded in real-world incidents and DevOps principles.

- Barr explains why batch data needs different approaches than app monitoring and - outlines the Five Pillars of Data Observability—freshness, volume, distribution, - schema, and lineage. You’ll hear a schema‑change case study, learn how silent failures - and model drift occur, and how to move from monitoring to true observability for - faster root cause analysis using correlation, logs, and lineage. The conversation - covers accountability models (RACI), defining and automating data SLAs, operational - runbooks, maturity stages (reactive → proactive → automated → scalable), and criteria - for end‑to‑end platforms versus point tools.

Listeners will get actionable - guidance on reducing false positives, prioritizing pipeline fixes, implementing - auto lineage, and applying anomaly detection with contextual alerts—practical steps - to improve data quality, reliability, and observability across cloud‑agnostic environments. -dateadded: '2021-04-24' -duration: PT01H01M50S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=0 - endOffset: 108 -- name: 'Guest Profile: Barr Moses — career, GainSight, Monte Carlo' - startOffset: 108 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=108 - endOffset: 275 -- name: 'Market Gap: Data downtime impact on analytics teams' - startOffset: 275 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=275 - endOffset: 416 -- name: 'Observability Origins: DevOps pillars (metrics, logs, traces)' - startOffset: 416 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=416 - endOffset: 589 -- name: 'Batch Data Challenges: Why data observability differs from app monitoring' - startOffset: 589 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=589 - endOffset: 820 -- name: 'Silent Failures: Invisible data quality incidents and model drift' - startOffset: 820 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=820 - endOffset: 998 -- name: 'Five Pillars of Data Observability: Freshness, Volume, Distribution, Schema, - Lineage' - startOffset: 998 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=998 - endOffset: 1150 -- name: 'Schema Change Case Study: Downstream breakage and missed notifications' - startOffset: 1150 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1150 - endOffset: 1317 -- name: 'Good Pipelines, Bad Data: Need for engineering and data observability' - startOffset: 1317 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1317 - endOffset: 1471 -- name: 'Monitoring vs Observability: Detection versus diagnosis' - startOffset: 1471 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1471 - endOffset: 1564 -- name: 'Root Cause Analysis: Correlation, logs, lineage for triage' - startOffset: 1564 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1564 - endOffset: 1740 -- name: 'Accountability Models: RACI for data ownership and communication' - startOffset: 1740 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=1740 - endOffset: 2124 -- name: 'Data SLAs: Defining timeliness and prioritizing pipeline fixes' - startOffset: 2124 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2124 - endOffset: 2294 -- name: 'SLA Automation: Inferring thresholds from historical data' - startOffset: 2294 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2294 - endOffset: 2463 -- name: 'Operational Runbooks: Playbooks and remediation workflows' - startOffset: 2463 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2463 - endOffset: 2580 -- name: 'Maturity Curve: Reactive → Proactive → Automated → Scalable' - startOffset: 2580 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2580 - endOffset: 2820 -- name: 'Platform Criteria: End-to-end integration and reducing false positives' - startOffset: 2820 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2820 - endOffset: 2992 -- name: 'Open Source Landscape: Point tools versus holistic observability' - startOffset: 2992 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=2992 - endOffset: 3052 -- name: 'Test-Driven Data Development: Tests, DBT checks, and limitations' - startOffset: 3052 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3052 - endOffset: 3263 -- name: 'Cloud Agnosticism: Integrations across AWS, GCP, Snowflake' - startOffset: 3263 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3263 - endOffset: 3417 -- name: 'Centralized Governance: Observability across distributed environments' - startOffset: 3417 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3417 - endOffset: 3531 -- name: 'Auto Lineage: Detecting upstream and downstream data impact' - startOffset: 3531 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3531 - endOffset: 3627 -- name: 'Anomalies vs Bad Data: Contextual alerts and reducing false positives' - startOffset: 3627 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3627 - endOffset: 3770 -- name: Closing Remarks & Contact Resources (Monte Carlo, links, Slack) - startOffset: 3770 - url: https://www.youtube.com/watch?v=TrMG1SOqZkQ&t=3770 - endOffset: 3710 --- Links: diff --git a/_podcast/s13e02-analytics-for-better-world.md b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md similarity index 97% rename from _podcast/s13e02-analytics-for-better-world.md rename to _podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md index 104cd6ee..515014bb 100644 --- a/_podcast/s13e02-analytics-for-better-world.md +++ b/_podcast/data-science-and-analytics-for-nonprofits-tech-for-good.md @@ -1,20 +1,142 @@ --- +title: "Analytics for Nonprofits: Build Data Maturity, Teams, Tools & Optimization Strategies" +short: "Analytics for a Better World" +season: 13 episode: 2 guests: - parvathykrishnan +image: images/podcast/data-science-and-analytics-for-nonprofits-tech-for-good.jpg ids: anchor: Analytics-for-a-Better-World---Parvathy-Krishnan-e1vo27h youtube: b6x5zZ3C6sQ -image: images/podcast/s13e02-analytics-for-better-world.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Analytics-for-a-Better-World---Parvathy-Krishnan-e1vo27h apple: https://podcasts.apple.com/us/podcast/analytics-for-a-better-world-parvathy-krishnan/id1541710331?i=1000602678901 spotify: https://open.spotify.com/episode/5Xiuu4jMBCMuwkokXbwhE2?si=nGRQrMUaRNa5EINbtJadBA youtube: https://www.youtube.com/watch?v=b6x5zZ3C6sQ -season: 13 -short: Analytics for a Better World -title: 'Analytics for Nonprofits: Build Data Maturity, Teams, Tools & Optimization - Strategies' + +description: "Learn nonprofit analytics and data maturity strategies to build teams, choose tools and optimize programs-practical roadmaps, case studies, and open resources" +intro: "How can nonprofits move from basic reporting to optimization using analytics while building the right teams, tools, and governance? In this episode, Parvathy Krishnan, CTO at Analytics for a Better World and professional doctorate in data science, walks through practical steps for building data maturity in the social sector. Drawing on discovery workshops, fellowship pilots (including a waste-collection optimization project in Nairobi), and partnerships with academic and industry groups, Parvathy explains how to assess needs, design maturity roadmaps, and prioritize short- and long-term goals.

Listen to learn how to structure nonprofit data teams (analysts, data scientists, engineers, and blended roles), select technology (KoboToolbox, PostgreSQL, dashboards, Python/R, cloud deployment), and implement process and governance practices including privacy, SOPs, and version control. The episode also covers curriculum progression—from descriptive to diagnostic, predictive, and optimization—academy programs for practitioners and executives, open resources on YouTube and GitHub, and real-world optimization use cases like healthcare access and COVID testing lab placement. Ideal for nonprofit leaders, data practitioners, and funders seeking actionable guidance on analytics for nonprofits, data maturity, and optimization strategies" +topics: +- nonprofit +- data maturity +- data science +- analytics +dateadded: 2023-03-04 + +duration: PT00H59M21S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=0 + endOffset: 70 +- name: 'Overview: Analytics for a Better World mission and guest intro' + startOffset: 70 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=70 + endOffset: 114 +- name: 'Career Path: From renewable energy to data science and CTO role' + startOffset: 114 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=114 + endOffset: 278 +- name: 'CTO Responsibilities: Connecting nonprofits with research and tech capacity' + startOffset: 278 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=278 + endOffset: 380 +- name: 'Discovery Workshops: Assessing nonprofit needs and data maturity' + startOffset: 380 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=380 + endOffset: 569 +- name: 'Fellowship Case Study: Waste-collection optimization pilot in Nairobi' + startOffset: 569 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=569 + endOffset: 753 +- name: 'Data Maturity Comparison: Nonprofit vs. private-sector analytics' + startOffset: 753 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=753 + endOffset: 923 +- name: 'Talent & Purpose: Motivating data professionals to join the public sector' + startOffset: 923 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=923 + endOffset: 1073 +- name: 'Academy Structure: Programs for practitioners, analytics translators, executives' + startOffset: 1073 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1073 + endOffset: 1214 +- name: 'Open Resources: YouTube lectures, GitHub, and open-source deliverables' + startOffset: 1214 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1214 + endOffset: 1346 +- name: 'Curriculum Focus: Descriptive → diagnostic → predictive → optimization' + startOffset: 1346 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1346 + endOffset: 1536 +- name: 'Audience Profile: MBA, business analytics, and technical students' + startOffset: 1536 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1536 + endOffset: 1699 +- name: 'Student Engagement: Thesis collaborations and researcher pathways' + startOffset: 1699 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1699 + endOffset: 1847 +- name: 'Maturity Roadmaps: Scans, short/long-term goals, and cost optimization' + startOffset: 1847 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1847 + endOffset: 2046 +- name: 'People Dimension: Roles for data collection, analysis, and app development' + startOffset: 2046 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2046 + endOffset: 2194 +- name: 'Process Dimension: Data governance, privacy, SOPs, and workflows' + startOffset: 2194 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2194 + endOffset: 2302 +- name: 'Technology Dimension: Centralized data, version control, and tech selection' + startOffset: 2302 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2302 + endOffset: 2368 +- name: 'Tool Recommendations: Dashboards, Python/R, and cloud deployment options' + startOffset: 2368 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2368 + endOffset: 2658 +- name: 'Data Platforms: KoboToolbox, PostgreSQL, and Digital Public Goods guidance' + startOffset: 2658 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2658 + endOffset: 2751 +- name: 'Team Profiles: Analysts, data scientists, engineers, and blended roles' + startOffset: 2751 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2751 + endOffset: 2955 +- name: 'Data Engineering Needs: Moving from research to deployed applications' + startOffset: 2955 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2955 + endOffset: 3006 +- name: 'Optimization Use Cases: Healthcare access and COVID testing lab placement' + startOffset: 3006 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3006 + endOffset: 3170 +- name: 'Partnerships & Staffing: Ortec, academic partners, and on-demand talent network' + startOffset: 3170 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3170 + endOffset: 3247 +- name: 'Organizational Model: Small core team and large extended research network' + startOffset: 3247 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3247 + endOffset: 3338 +- name: 'Becoming Data-Driven: Strategy plus investments in people, processes, technology' + startOffset: 3338 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3338 + endOffset: 3502 +- name: 'Recommended Reading & Daily Resources: Culture Map, 7 Habits, Towards Data + Science' + startOffset: 3502 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3502 + endOffset: 3600 +- name: Closing Remarks and links to Academy resources and contact info + startOffset: 3600 + url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3600 + endOffset: 3561 + transcript: - header: Podcast Introduction - header: 'Overview: Analytics for a Better World mission and guest intro' @@ -1061,137 +1183,6 @@ transcript: sec: 3631 time: '1:00:31' who: Alexey -description: Learn nonprofit analytics and data maturity strategies to build teams, - choose tools and optimize programs-practical roadmaps, case studies, and open resources. -intro: How can nonprofits move from basic reporting to optimization using analytics - while building the right teams, tools, and governance? In this episode, Parvathy - Krishnan, CTO at Analytics for a Better World and professional doctorate in data - science, walks through practical steps for building data maturity in the social - sector. Drawing on discovery workshops, fellowship pilots (including a waste-collection - optimization project in Nairobi), and partnerships with academic and industry groups, - Parvathy explains how to assess needs, design maturity roadmaps, and prioritize - short- and long-term goals.

Listen to learn how to structure nonprofit - data teams (analysts, data scientists, engineers, and blended roles), select technology - (KoboToolbox, PostgreSQL, dashboards, Python/R, cloud deployment), and implement - process and governance practices including privacy, SOPs, and version control. The - episode also covers curriculum progression—from descriptive to diagnostic, predictive, - and optimization—academy programs for practitioners and executives, open resources - on YouTube and GitHub, and real-world optimization use cases like healthcare access - and COVID testing lab placement. Ideal for nonprofit leaders, data practitioners, - and funders seeking actionable guidance on analytics for nonprofits, data maturity, - and optimization strategies. -dateadded: '2023-03-04' -duration: PT00H59M21S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=0 - endOffset: 70 -- name: 'Overview: Analytics for a Better World mission and guest intro' - startOffset: 70 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=70 - endOffset: 114 -- name: 'Career Path: From renewable energy to data science and CTO role' - startOffset: 114 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=114 - endOffset: 278 -- name: 'CTO Responsibilities: Connecting nonprofits with research and tech capacity' - startOffset: 278 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=278 - endOffset: 380 -- name: 'Discovery Workshops: Assessing nonprofit needs and data maturity' - startOffset: 380 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=380 - endOffset: 569 -- name: 'Fellowship Case Study: Waste-collection optimization pilot in Nairobi' - startOffset: 569 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=569 - endOffset: 753 -- name: 'Data Maturity Comparison: Nonprofit vs. private-sector analytics' - startOffset: 753 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=753 - endOffset: 923 -- name: 'Talent & Purpose: Motivating data professionals to join the public sector' - startOffset: 923 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=923 - endOffset: 1073 -- name: 'Academy Structure: Programs for practitioners, analytics translators, executives' - startOffset: 1073 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1073 - endOffset: 1214 -- name: 'Open Resources: YouTube lectures, GitHub, and open-source deliverables' - startOffset: 1214 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1214 - endOffset: 1346 -- name: 'Curriculum Focus: Descriptive → diagnostic → predictive → optimization' - startOffset: 1346 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1346 - endOffset: 1536 -- name: 'Audience Profile: MBA, business analytics, and technical students' - startOffset: 1536 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1536 - endOffset: 1699 -- name: 'Student Engagement: Thesis collaborations and researcher pathways' - startOffset: 1699 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1699 - endOffset: 1847 -- name: 'Maturity Roadmaps: Scans, short/long-term goals, and cost optimization' - startOffset: 1847 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=1847 - endOffset: 2046 -- name: 'People Dimension: Roles for data collection, analysis, and app development' - startOffset: 2046 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2046 - endOffset: 2194 -- name: 'Process Dimension: Data governance, privacy, SOPs, and workflows' - startOffset: 2194 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2194 - endOffset: 2302 -- name: 'Technology Dimension: Centralized data, version control, and tech selection' - startOffset: 2302 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2302 - endOffset: 2368 -- name: 'Tool Recommendations: Dashboards, Python/R, and cloud deployment options' - startOffset: 2368 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2368 - endOffset: 2658 -- name: 'Data Platforms: KoboToolbox, PostgreSQL, and Digital Public Goods guidance' - startOffset: 2658 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2658 - endOffset: 2751 -- name: 'Team Profiles: Analysts, data scientists, engineers, and blended roles' - startOffset: 2751 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2751 - endOffset: 2955 -- name: 'Data Engineering Needs: Moving from research to deployed applications' - startOffset: 2955 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=2955 - endOffset: 3006 -- name: 'Optimization Use Cases: Healthcare access and COVID testing lab placement' - startOffset: 3006 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3006 - endOffset: 3170 -- name: 'Partnerships & Staffing: Ortec, academic partners, and on-demand talent network' - startOffset: 3170 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3170 - endOffset: 3247 -- name: 'Organizational Model: Small core team and large extended research network' - startOffset: 3247 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3247 - endOffset: 3338 -- name: 'Becoming Data-Driven: Strategy plus investments in people, processes, technology' - startOffset: 3338 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3338 - endOffset: 3502 -- name: 'Recommended Reading & Daily Resources: Culture Map, 7 Habits, Towards Data - Science' - startOffset: 3502 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3502 - endOffset: 3600 -- name: Closing Remarks and links to Academy resources and contact info - startOffset: 3600 - url: https://www.youtube.com/watch?v=b6x5zZ3C6sQ&t=3600 - endOffset: 3561 --- Links: diff --git a/_podcast/s02e07-abc-data-science.md b/_podcast/data-science-career-abc-framework.md similarity index 97% rename from _podcast/s02e07-abc-data-science.md rename to _podcast/data-science-career-abc-framework.md index f2580cce..26b269de 100644 --- a/_podcast/s02e07-abc-data-science.md +++ b/_podcast/data-science-career-abc-framework.md @@ -1,12 +1,11 @@ --- -title: 'Data Science Career Guide: ABC Framework (Analyst, Builder, Consultant) & - Transition Tips' -short: The ABC’s of Data Science -guests: -- dannyma -image: images/podcast/s02e07-abc-data-science.jpg +title: "Data Science Career Guide: ABC Framework (Analyst, Builder, Consultant) & Transition Tips" +short: "The ABC’s of Data Science" season: 2 episode: 7 +guests: +- dannyma +image: images/podcast/data-science-career-abc-framework.jpg ids: youtube: HVQ0DZOQcts anchor: The-ABCs-of-Data-Science---Danny-Ma-er33oa @@ -15,6 +14,140 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-ABCs-of-Data-Science---Danny-Ma-er33oa spotify: https://open.spotify.com/episode/5T1Nm3HvrS9oIMH6C2AWcf apple: https://podcasts.apple.com/us/podcast/the-abcs-of-data-science-danny-ma/id1541710331?i=1000510794953 + +description: "Master the Data Science ABC Framework: Analyst, Builder, Consultant. Get SQL, Python, MLOps career tips, project roadmap, transition strategies to land roles." +intro: "How do you pick the right data science path—and actually make the transition? In this episode, Danny Ma, a recovering data scientist now focused on ML and data engineering, walks through his ABC Framework (Analyst, Builder, Consultant) and pragmatic steps for career moves. Danny, who runs the #DataWithDanny community (4,500+ members) and specializes in analytics, supervised ML, data architecture and digital customer experiments, traces his own shift from SQL/SAS/Excel workflows to Python, Kaggle projects and production systems.

We cover the ABC Framework origins and definitions: Type A (Analyst) — data exploration, visualization and storytelling; Type B (Builder) — ML engineering, MLOps and production mindset; Type C (Consultant/Leader) — stakeholder persuasion and strategy. Danny shares transition tactics: build projects first, learn theory as needed, core tools (Git, Docker, cloud), practicing engineering via mini-projects and mentorship, portfolio and referral strategies, and when advanced degrees matter. Tune in to get concrete guidance on skills to prioritize, how to gain production experience, and a clear roadmap from SQL → visualization → ML → deep learning to advance your data science career." +topics: +- career transition +- data science +- machine learning +- data analysis +dateadded: 2021-02-26 + +duration: PT01H24M57S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=0 + endOffset: 49 +- name: LinkedIn Memes & Creative Editing for Data Audiences + startOffset: 49 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=49 + endOffset: 213 +- name: 'Career Journey: Analytics to Data Science' + startOffset: 213 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=213 + endOffset: 296 +- name: Transition to Python, Kaggle & Self-Directed Learning + startOffset: 296 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=296 + endOffset: 392 +- name: 'Early Tools: SQL, SAS and Excel Workflows' + startOffset: 392 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=392 + endOffset: 499 +- name: 'Moving into Data Science: Team Integration at a Bank' + startOffset: 499 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=499 + endOffset: 546 +- name: 'Machine Learning Projects: Propensity Models & Experimentation' + startOffset: 546 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=546 + endOffset: 689 +- name: Origins of the ABC Framework for Data Science Roles + startOffset: 689 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=689 + endOffset: 738 +- name: 'Defining the Three Profiles: Analyst, Builder, Consultant' + startOffset: 738 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=738 + endOffset: 797 +- name: 'Type A (Analyst): Data Exploration, Visualization & Storytelling' + startOffset: 797 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=797 + endOffset: 961 +- name: 'Type A Backgrounds: Research, Statistics & Analyst Pathways' + startOffset: 961 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=961 + endOffset: 1100 +- name: 'Type A Skillset: Programming, Theory, Experiment Design' + startOffset: 1100 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1100 + endOffset: 1201 +- name: 'Learning Strategy: Build Projects First, Learn Theory When Needed' + startOffset: 1201 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1201 + endOffset: 1314 +- name: 'Curiosity Spectrum: Depth of Inquiry & Learning Motivation' + startOffset: 1314 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1314 + endOffset: 1553 +- name: 'Type B (Builder): ML Engineering, MLOps & Production Systems' + startOffset: 1553 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1553 + endOffset: 1706 +- name: Technical Debt, Production Mindset & Systemic Risk + startOffset: 1706 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1706 + endOffset: 1826 +- name: 'Pathway A→B: Gaining Production Experience & On-the-Job Pressure' + startOffset: 1826 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1826 + endOffset: 1992 +- name: 'Core Tools for Transition: Git, Docker, Cloud Platforms' + startOffset: 1992 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1992 + endOffset: 2206 +- name: 'Practicing Engineering Skills Outside Work: Mentors & Mini-Projects' + startOffset: 2206 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2206 + endOffset: 2558 +- name: 'Type C (Consultant/Leader): Stakeholder Persuasion & Strategy' + startOffset: 2558 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2558 + endOffset: 2929 +- name: 'Testing Leadership: Shifting from Hands-On to People Management' + startOffset: 2929 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2929 + endOffset: 3288 +- name: 'Building a Lean Data Science Team: Roles, Tech Lead & Data Lead' + startOffset: 3288 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3288 + endOffset: 3716 +- name: Domain Expertise vs Technical Specialization for Career Mobility + startOffset: 3716 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3716 + endOffset: 3851 +- name: 'Breaking In: Project Portfolios, Referrals & Application Strategy' + startOffset: 3851 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3851 + endOffset: 4042 +- name: 'Entry Choice: Analyst vs Builder — Trade-offs & Competitive Edge' + startOffset: 4042 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4042 + endOffset: 4346 +- name: 'Bootcamps & Intensives: Benefits, Limits & Realistic Expectations' + startOffset: 4346 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4346 + endOffset: 4477 +- name: 'Serious SQL Course: Curriculum, Case Studies & Apprenticeship Model' + startOffset: 4477 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4477 + endOffset: 4745 +- name: 'Data Science Roadmap: SQL → Visualization → ML → Deep Learning' + startOffset: 4745 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4745 + endOffset: 4984 +- name: 'Advanced Degrees: When Master''s/PhD Matter in Data Science Roles' + startOffset: 4984 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4984 + endOffset: 5121 +- name: Episode Wrap-up, Resources & Next Steps + startOffset: 5121 + url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=5121 + endOffset: 5097 + transcript: - header: Podcast Introduction - header: LinkedIn Memes & Creative Editing for Data Audiences @@ -1251,147 +1384,6 @@ transcript: sec: 5146 time: '1:25:46' who: Danny -description: 'Master the Data Science ABC Framework: Analyst, Builder, Consultant. - Get SQL, Python, MLOps career tips, project roadmap, transition strategies to land - roles.' -intro: 'How do you pick the right data science path—and actually make the transition? - In this episode, Danny Ma, a recovering data scientist now focused on ML and data - engineering, walks through his ABC Framework (Analyst, Builder, Consultant) and - pragmatic steps for career moves. Danny, who runs the #DataWithDanny community (4,500+ - members) and specializes in analytics, supervised ML, data architecture and digital - customer experiments, traces his own shift from SQL/SAS/Excel workflows to Python, - Kaggle projects and production systems.

We cover the ABC Framework origins - and definitions: Type A (Analyst) — data exploration, visualization and storytelling; - Type B (Builder) — ML engineering, MLOps and production mindset; Type C (Consultant/Leader) - — stakeholder persuasion and strategy. Danny shares transition tactics: build projects - first, learn theory as needed, core tools (Git, Docker, cloud), practicing engineering - via mini-projects and mentorship, portfolio and referral strategies, and when advanced - degrees matter. Tune in to get concrete guidance on skills to prioritize, how to - gain production experience, and a clear roadmap from SQL → visualization → ML → - deep learning to advance your data science career.' -dateadded: '2021-02-26' -duration: PT01H24M57S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=0 - endOffset: 49 -- name: LinkedIn Memes & Creative Editing for Data Audiences - startOffset: 49 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=49 - endOffset: 213 -- name: 'Career Journey: Analytics to Data Science' - startOffset: 213 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=213 - endOffset: 296 -- name: Transition to Python, Kaggle & Self-Directed Learning - startOffset: 296 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=296 - endOffset: 392 -- name: 'Early Tools: SQL, SAS and Excel Workflows' - startOffset: 392 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=392 - endOffset: 499 -- name: 'Moving into Data Science: Team Integration at a Bank' - startOffset: 499 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=499 - endOffset: 546 -- name: 'Machine Learning Projects: Propensity Models & Experimentation' - startOffset: 546 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=546 - endOffset: 689 -- name: Origins of the ABC Framework for Data Science Roles - startOffset: 689 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=689 - endOffset: 738 -- name: 'Defining the Three Profiles: Analyst, Builder, Consultant' - startOffset: 738 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=738 - endOffset: 797 -- name: 'Type A (Analyst): Data Exploration, Visualization & Storytelling' - startOffset: 797 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=797 - endOffset: 961 -- name: 'Type A Backgrounds: Research, Statistics & Analyst Pathways' - startOffset: 961 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=961 - endOffset: 1100 -- name: 'Type A Skillset: Programming, Theory, Experiment Design' - startOffset: 1100 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1100 - endOffset: 1201 -- name: 'Learning Strategy: Build Projects First, Learn Theory When Needed' - startOffset: 1201 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1201 - endOffset: 1314 -- name: 'Curiosity Spectrum: Depth of Inquiry & Learning Motivation' - startOffset: 1314 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1314 - endOffset: 1553 -- name: 'Type B (Builder): ML Engineering, MLOps & Production Systems' - startOffset: 1553 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1553 - endOffset: 1706 -- name: Technical Debt, Production Mindset & Systemic Risk - startOffset: 1706 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1706 - endOffset: 1826 -- name: 'Pathway A→B: Gaining Production Experience & On-the-Job Pressure' - startOffset: 1826 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1826 - endOffset: 1992 -- name: 'Core Tools for Transition: Git, Docker, Cloud Platforms' - startOffset: 1992 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=1992 - endOffset: 2206 -- name: 'Practicing Engineering Skills Outside Work: Mentors & Mini-Projects' - startOffset: 2206 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2206 - endOffset: 2558 -- name: 'Type C (Consultant/Leader): Stakeholder Persuasion & Strategy' - startOffset: 2558 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2558 - endOffset: 2929 -- name: 'Testing Leadership: Shifting from Hands-On to People Management' - startOffset: 2929 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=2929 - endOffset: 3288 -- name: 'Building a Lean Data Science Team: Roles, Tech Lead & Data Lead' - startOffset: 3288 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3288 - endOffset: 3716 -- name: Domain Expertise vs Technical Specialization for Career Mobility - startOffset: 3716 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3716 - endOffset: 3851 -- name: 'Breaking In: Project Portfolios, Referrals & Application Strategy' - startOffset: 3851 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=3851 - endOffset: 4042 -- name: 'Entry Choice: Analyst vs Builder — Trade-offs & Competitive Edge' - startOffset: 4042 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4042 - endOffset: 4346 -- name: 'Bootcamps & Intensives: Benefits, Limits & Realistic Expectations' - startOffset: 4346 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4346 - endOffset: 4477 -- name: 'Serious SQL Course: Curriculum, Case Studies & Apprenticeship Model' - startOffset: 4477 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4477 - endOffset: 4745 -- name: 'Data Science Roadmap: SQL → Visualization → ML → Deep Learning' - startOffset: 4745 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4745 - endOffset: 4984 -- name: 'Advanced Degrees: When Master''s/PhD Matter in Data Science Roles' - startOffset: 4984 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=4984 - endOffset: 5121 -- name: Episode Wrap-up, Resources & Next Steps - startOffset: 5121 - url: https://www.youtube.com/watch?v=HVQ0DZOQcts&t=5121 - endOffset: 5097 --- Links: diff --git a/_podcast/s03e09-what-data-scientists-dont-mention.md b/_podcast/data-science-failures-and-mlops-lessons.md similarity index 96% rename from _podcast/s03e09-what-data-scientists-dont-mention.md rename to _podcast/data-science-failures-and-mlops-lessons.md index 327b25da..b3f36837 100644 --- a/_podcast/s03e09-what-data-scientists-dont-mention.md +++ b/_podcast/data-science-failures-and-mlops-lessons.md @@ -1,12 +1,11 @@ --- -title: 'Turn Data Science Project Failures into Career Wins: Production Lessons, MLOps - Fixes & Framing Failures on LinkedIn' -short: What Data Scientists Don’t Mention in Their LinkedIn Profiles -guests: -- yurykashnitsky -image: images/podcast/s03e09-what-data-scientists-dont-mention.jpg +title: "Turn Data Science Project Failures into Career Wins: Production Lessons, MLOps Fixes & Framing Failures on LinkedIn" +short: "What Data Scientists Don’t Mention in Their LinkedIn Profiles" season: 3 episode: 9 +guests: +- yurykashnitsky +image: images/podcast/data-science-failures-and-mlops-lessons.jpg ids: youtube: c6dK1LWpv4g anchor: What-Data-Scientists-Dont-Mention-in-Their-LinkedIn-Profiles---Yury-Kashnitsky-e125jjl @@ -15,6 +14,136 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/What-Data-Scientists-Dont-Mention-in-Their-LinkedIn-Profiles---Yury-Kashnitsky-e125jjl spotify: https://open.spotify.com/episode/3KR6zErxqeDuQ2jo8NDvNx apple: https://podcasts.apple.com/us/podcast/what-data-scientists-dont-mention-in-their-linkedin/id1541710331?i=1000524260842 + +description: "Discover how to turn data science project failures into career wins: practical MLOps fixes, production lessons, LinkedIn framing tips to boost hiring outcomes." +intro: "How do you turn data science project failures into tangible career wins — and how should you talk about them on LinkedIn? In this episode, Yury Kashnitsky, Ph.D. in applied math, Kaggle Master and Senior ML Scientist at Elsevier who also leads the open course mlcourse.ai, walks through real production ML lessons and MLOps fixes learned across academia, startups and industry.

We dig into common data science pitfalls and a concrete case study (a BERT-based proofreading regression stopped early), stakeholder communication for when to kill a project, and the missing role of a data product manager. Yury breaks down engineering vs research trade-offs in deployment, production fixes like reducing re-ranking scope to meet latency, when gradient boosting beats CTR heuristics, and DevOps anti-patterns such as SSH deploys and no CI/CD. We also cover practical topics: data labeling cost/quality, going from notebooks to production, multilingual telco NLP, resume choices, interview questions about revenue-producing ML, and how to frame failed projects on LinkedIn with honesty and lessons learned.

Listen to get actionable MLOps and production-ML strategies, communication tactics for stakeholders, and guidance on reframing failures into career momentum." +topics: +- machine learning +- MLOps +- career growth +- communication +dateadded: 2021-06-06 + +duration: PT00H59M57S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=0 + endOffset: 90 +- name: 'Episode Theme: Failures and LinkedIn Omissions' + startOffset: 90 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=90 + endOffset: 152 +- name: 'Guest Opening: Background Snapshot' + startOffset: 152 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=152 + endOffset: 185 +- name: 'Career Journey: Aviation, Academia, and Transition to NLP' + startOffset: 185 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=185 + endOffset: 298 +- name: 'CV Choices: Omitting Hobbies and Personal Details' + startOffset: 298 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=298 + endOffset: 335 +- name: 'Project Failures Overview: Common Data Science Pitfalls' + startOffset: 335 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=335 + endOffset: 382 +- name: 'Case Study — Proofreading AI: BERT Regression and Early Termination' + startOffset: 382 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=382 + endOffset: 666 +- name: 'Stakeholder Communication: Making the Call to Stop a Project' + startOffset: 666 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=666 + endOffset: 691 +- name: 'Product Management Gap: Value of a Data Product Manager' + startOffset: 691 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=691 + endOffset: 1006 +- name: 'Customer Development: Rapid Validation vs Building ML First' + startOffset: 1006 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1006 + endOffset: 1080 +- name: 'Engineering vs Research: Deployment and Serving Constraints' + startOffset: 1080 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1080 + endOffset: 1144 +- name: 'Production Lesson: Gradient Boosting vs CTR Heuristic Baseline' + startOffset: 1144 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1144 + endOffset: 1525 +- name: 'Performance Fix: Re-ranking Scope Reduction to Meet Latency' + startOffset: 1525 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1525 + endOffset: 1556 +- name: 'DevOps Anti-patterns: SSH Deploys, No CI/CD and Technical Debt' + startOffset: 1556 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1556 + endOffset: 1691 +- name: 'From Notebooks to Production: BI, LTV Predictions, and MLOps Needs' + startOffset: 1691 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1691 + endOffset: 1844 +- name: 'Startup Anecdote: GPU Overstock, Bitcoin, and Sentiment Analysis' + startOffset: 1844 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1844 + endOffset: 2076 +- name: 'Data Labeling Reality: Cost, Quality, and Mechanical Turk' + startOffset: 2076 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2076 + endOffset: 2118 +- name: 'Resume Strategy: Omitting Short or Sensitive Startup Stints' + startOffset: 2118 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2118 + endOffset: 2172 +- name: 'Telco NLP: Multilingual Complaint Classification & Transfer Learning' + startOffset: 2172 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2172 + endOffset: 2394 +- name: 'Too Much Freedom: Research Time vs Impactful Production Work' + startOffset: 2394 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2394 + endOffset: 2467 +- name: 'Interview Tip: Ask About Active Revenue-Producing ML in Production' + startOffset: 2467 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2467 + endOffset: 2600 +- name: 'Digital Presence: GitHub, Open Courses, Talks and Hiring Impact' + startOffset: 2600 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2600 + endOffset: 2735 +- name: 'Work-Life Balance Hacks: Focus Time and Side Projects' + startOffset: 2735 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2735 + endOffset: 2907 +- name: 'Public Activity ROI: A/B Tests, Talks, and Career Opportunities' + startOffset: 2907 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2907 + endOffset: 2970 +- name: 'Framing Failed Projects on LinkedIn: Honesty and Lessons Learned' + startOffset: 2970 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2970 + endOffset: 3161 +- name: 'Business-Travel Boundaries: Perm Trips and Weekend Work Limits' + startOffset: 3161 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3161 + endOffset: 3516 +- name: 'Closing Thoughts: Embracing Failures and Building Resilience' + startOffset: 3516 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3516 + endOffset: 3624 +- name: 'Contact & Resources: Open Course and Social Links' + startOffset: 3624 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3624 + endOffset: 3687 +- name: Episode Outro + startOffset: 3687 + url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3687 + endOffset: 3597 + transcript: - header: Episode Introduction - header: 'Episode Theme: Failures and LinkedIn Omissions' @@ -807,144 +936,6 @@ transcript: sec: 3687 time: '1:01:27' who: Yury -description: 'Discover how to turn data science project failures into career wins: - practical MLOps fixes, production lessons, LinkedIn framing tips to boost hiring - outcomes.' -intro: 'How do you turn data science project failures into tangible career wins — - and how should you talk about them on LinkedIn? In this episode, Yury Kashnitsky, - Ph.D. in applied math, Kaggle Master and Senior ML Scientist at Elsevier who also - leads the open course mlcourse.ai, walks through real production ML lessons and - MLOps fixes learned across academia, startups and industry.

We dig into - common data science pitfalls and a concrete case study (a BERT-based proofreading - regression stopped early), stakeholder communication for when to kill a project, - and the missing role of a data product manager. Yury breaks down engineering vs - research trade-offs in deployment, production fixes like reducing re-ranking scope - to meet latency, when gradient boosting beats CTR heuristics, and DevOps anti-patterns - such as SSH deploys and no CI/CD. We also cover practical topics: data labeling - cost/quality, going from notebooks to production, multilingual telco NLP, resume - choices, interview questions about revenue-producing ML, and how to frame failed - projects on LinkedIn with honesty and lessons learned.

Listen to get actionable - MLOps and production-ML strategies, communication tactics for stakeholders, and - guidance on reframing failures into career momentum.' -dateadded: '2021-06-06' -duration: PT00H59M57S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=0 - endOffset: 90 -- name: 'Episode Theme: Failures and LinkedIn Omissions' - startOffset: 90 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=90 - endOffset: 152 -- name: 'Guest Opening: Background Snapshot' - startOffset: 152 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=152 - endOffset: 185 -- name: 'Career Journey: Aviation, Academia, and Transition to NLP' - startOffset: 185 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=185 - endOffset: 298 -- name: 'CV Choices: Omitting Hobbies and Personal Details' - startOffset: 298 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=298 - endOffset: 335 -- name: 'Project Failures Overview: Common Data Science Pitfalls' - startOffset: 335 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=335 - endOffset: 382 -- name: 'Case Study — Proofreading AI: BERT Regression and Early Termination' - startOffset: 382 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=382 - endOffset: 666 -- name: 'Stakeholder Communication: Making the Call to Stop a Project' - startOffset: 666 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=666 - endOffset: 691 -- name: 'Product Management Gap: Value of a Data Product Manager' - startOffset: 691 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=691 - endOffset: 1006 -- name: 'Customer Development: Rapid Validation vs Building ML First' - startOffset: 1006 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1006 - endOffset: 1080 -- name: 'Engineering vs Research: Deployment and Serving Constraints' - startOffset: 1080 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1080 - endOffset: 1144 -- name: 'Production Lesson: Gradient Boosting vs CTR Heuristic Baseline' - startOffset: 1144 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1144 - endOffset: 1525 -- name: 'Performance Fix: Re-ranking Scope Reduction to Meet Latency' - startOffset: 1525 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1525 - endOffset: 1556 -- name: 'DevOps Anti-patterns: SSH Deploys, No CI/CD and Technical Debt' - startOffset: 1556 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1556 - endOffset: 1691 -- name: 'From Notebooks to Production: BI, LTV Predictions, and MLOps Needs' - startOffset: 1691 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1691 - endOffset: 1844 -- name: 'Startup Anecdote: GPU Overstock, Bitcoin, and Sentiment Analysis' - startOffset: 1844 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=1844 - endOffset: 2076 -- name: 'Data Labeling Reality: Cost, Quality, and Mechanical Turk' - startOffset: 2076 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2076 - endOffset: 2118 -- name: 'Resume Strategy: Omitting Short or Sensitive Startup Stints' - startOffset: 2118 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2118 - endOffset: 2172 -- name: 'Telco NLP: Multilingual Complaint Classification & Transfer Learning' - startOffset: 2172 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2172 - endOffset: 2394 -- name: 'Too Much Freedom: Research Time vs Impactful Production Work' - startOffset: 2394 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2394 - endOffset: 2467 -- name: 'Interview Tip: Ask About Active Revenue-Producing ML in Production' - startOffset: 2467 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2467 - endOffset: 2600 -- name: 'Digital Presence: GitHub, Open Courses, Talks and Hiring Impact' - startOffset: 2600 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2600 - endOffset: 2735 -- name: 'Work-Life Balance Hacks: Focus Time and Side Projects' - startOffset: 2735 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2735 - endOffset: 2907 -- name: 'Public Activity ROI: A/B Tests, Talks, and Career Opportunities' - startOffset: 2907 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2907 - endOffset: 2970 -- name: 'Framing Failed Projects on LinkedIn: Honesty and Lessons Learned' - startOffset: 2970 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=2970 - endOffset: 3161 -- name: 'Business-Travel Boundaries: Perm Trips and Weekend Work Limits' - startOffset: 3161 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3161 - endOffset: 3516 -- name: 'Closing Thoughts: Embracing Failures and Building Resilience' - startOffset: 3516 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3516 - endOffset: 3624 -- name: 'Contact & Resources: Open Course and Social Links' - startOffset: 3624 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3624 - endOffset: 3687 -- name: Episode Outro - startOffset: 3687 - url: https://www.youtube.com/watch?v=c6dK1LWpv4g&t=3687 - endOffset: 3597 --- diff --git a/_podcast/s10e01-data-science-for-social-impact.md b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md similarity index 97% rename from _podcast/s10e01-data-science-for-social-impact.md rename to _podcast/data-science-for-public-policy-ethical-ai-social-impact.md index d1fda942..8ba849d4 100644 --- a/_podcast/s10e01-data-science-for-social-impact.md +++ b/_podcast/data-science-for-public-policy-ethical-ai-social-impact.md @@ -1,19 +1,130 @@ --- +title: "Data Science for Public Policy — Ethical AI, Climate Justice & Impact Projects" +short: "Data Science for Social Impact" +season: 10 episode: 1 guests: - christinecepelak +image: images/podcast/data-science-for-public-policy-ethical-ai-social-impact.jpg ids: anchor: Data-Science-for-Social-Impact---Christine-Cepelak-e1li47e youtube: xWC1HAfekRk -image: images/podcast/s10e01-data-science-for-social-impact.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Science-for-Social-Impact---Christine-Cepelak-e1li47e apple: https://podcasts.apple.com/us/podcast/data-science-for-social-impact-christine-cepelak/id1541710331?i=1000571591865 spotify: https://open.spotify.com/episode/7fzBhDrfVfylnBLCJGwUHC?si=3b03d59083804346 youtube: https://www.youtube.com/watch?v=xWC1HAfekRk -season: 10 -short: Data Science for Social Impact -title: Data Science for Public Policy — Ethical AI, Climate Justice & Impact Projects + +description: "Learn data science for public policy: ethical AI, climate justice & impact project strategies, career tips and actionable project design for social good." +intro: "How can data science meaningfully shape public policy without becoming a tech-first solution or creating new ethical harms? In this episode, Christine Cepelak, a writer and researcher of tech and social issues who’s studying Data Science for Public Policy and has years of experience managing social programs, walks through the practical realities of data science for public policy. We cover career paths and sector differences, a community organizing case study on electronics recycling, and real-world use cases like drone computer vision for refugee aid and rooftop sustainability. Christine digs into ethical AI concerns — including the EU AI Act and social scoring risks — plus project design for long-term impact, stakeholder collaboration with NGOs, and building data pipelines amid limited IT infrastructure. Listeners will also hear about public data gaps (recycling programs, corporate transparency), research applications such as satellite imagery for poverty estimation, and future priorities like climate justice and gender equality. Tune in to get concrete guidance on starting volunteer impact projects, where demand for impact data scientists lies, and how to design responsible, policy-driven data work" +topics: +- data science +- public policy +- ethical AI +- social impact +dateadded: 2022-07-30 + +duration: PT00H58M44S + +quotableClips: +- name: Episode Intro & Guest Christine Cepelak + startOffset: 0 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=0 + endOffset: 93 +- name: 'Career Journey: Program Management to Data Science for Public Policy' + startOffset: 93 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=93 + endOffset: 247 +- name: 'Private vs Public Sector: Differences for Data Work' + startOffset: 247 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=247 + endOffset: 325 +- name: 'Public Policy Defined: Laws, Governance & Social Impact' + startOffset: 325 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=325 + endOffset: 507 +- name: 'Community Organizing Case Study: Electronics Recycling Campaign' + startOffset: 507 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=507 + endOffset: 614 +- name: 'Policy vs Political Science: Theory, Practice & Implementation' + startOffset: 614 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=614 + endOffset: 730 +- name: 'Education Landscape: Data Science for Public Policy Programs & DSSG' + startOffset: 730 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=730 + endOffset: 862 +- name: 'Domain Nuances: Data Science for Social Impact vs Typical Industry Work' + startOffset: 862 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=862 + endOffset: 1032 +- name: 'Use Cases: Drone Computer Vision for Refugee Aid & Rooftop Sustainability' + startOffset: 1032 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1032 + endOffset: 1266 +- name: 'Ethics & Regulation: Ethical AI, EU AI Act and Social Scoring Risks' + startOffset: 1266 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1266 + endOffset: 1581 +- name: 'Project Design: Long-term Impact, Iteration & Avoiding Tech-First Solutions' + startOffset: 1581 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1581 + endOffset: 1686 +- name: 'Stakeholder Collaboration: NGOs, HR Use Cases & Mining Domain Knowledge' + startOffset: 1686 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1686 + endOffset: 1832 +- name: 'Data Challenges: Building Pipelines with Limited IT Infrastructure' + startOffset: 1832 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1832 + endOffset: 2209 +- name: 'Public Data Gaps: Recycling Programs, Corporate Transparency & Access Issues' + startOffset: 2209 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2209 + endOffset: 2367 +- name: 'Future Focus Areas: Climate Justice, Gender Equality & Responsible Tech' + startOffset: 2367 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2367 + endOffset: 2419 +- name: 'Gender Inequality Solutions: Salary Transparency & Inclusive Hiring Policies' + startOffset: 2419 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2419 + endOffset: 2618 +- name: 'Corporate Responsibility: CSR Data Needs & Demand for Impact Data Scientists' + startOffset: 2618 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2618 + endOffset: 2785 +- name: 'Public Sector Roles: Chief Data Scientist and Government Data Strategy' + startOffset: 2785 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2785 + endOffset: 2899 +- name: 'Getting Started: Volunteer Projects, SDGs & Finding a Cause' + startOffset: 2899 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2899 + endOffset: 3083 +- name: 'Project Marketplaces & Career Advice: DSSG Projects and 80,000 Hours' + startOffset: 3083 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3083 + endOffset: 3256 +- name: 'Research Applications: Satellite Imagery for Poverty Estimation & Census + Gaps' + startOffset: 3256 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3256 + endOffset: 3382 +- name: 'Community Discussion: Ethics in AI Coffee Chats and Emerging Debates' + startOffset: 3382 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3382 + endOffset: 3486 +- name: 'Connect with Christine: Website, LinkedIn & Twitter' + startOffset: 3486 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3486 + endOffset: 3503 +- name: Episode Wrap-up & Resource Links + startOffset: 3503 + url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3503 + endOffset: 3524 + transcript: - header: Episode Intro & Guest Christine Cepelak - line: This week, we'll talk about data science for social impact. We have a special @@ -1154,124 +1265,6 @@ transcript: sec: 3524 time: '58:44' who: Christine -description: 'Learn data science for public policy: ethical AI, climate justice & - impact project strategies, career tips and actionable project design for social - good.' -intro: How can data science meaningfully shape public policy without becoming a tech-first - solution or creating new ethical harms? In this episode, Christine Cepelak, a writer - and researcher of tech and social issues who’s studying Data Science for Public - Policy and has years of experience managing social programs, walks through the practical - realities of data science for public policy. We cover career paths and sector differences, - a community organizing case study on electronics recycling, and real-world use cases - like drone computer vision for refugee aid and rooftop sustainability. Christine - digs into ethical AI concerns — including the EU AI Act and social scoring risks - — plus project design for long-term impact, stakeholder collaboration with NGOs, - and building data pipelines amid limited IT infrastructure. Listeners will also - hear about public data gaps (recycling programs, corporate transparency), research - applications such as satellite imagery for poverty estimation, and future priorities - like climate justice and gender equality. Tune in to get concrete guidance on starting - volunteer impact projects, where demand for impact data scientists lies, and how - to design responsible, policy-driven data work. -dateadded: '2022-07-30' -duration: PT00H58M44S -quotableClips: -- name: Episode Intro & Guest Christine Cepelak - startOffset: 0 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=0 - endOffset: 93 -- name: 'Career Journey: Program Management to Data Science for Public Policy' - startOffset: 93 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=93 - endOffset: 247 -- name: 'Private vs Public Sector: Differences for Data Work' - startOffset: 247 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=247 - endOffset: 325 -- name: 'Public Policy Defined: Laws, Governance & Social Impact' - startOffset: 325 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=325 - endOffset: 507 -- name: 'Community Organizing Case Study: Electronics Recycling Campaign' - startOffset: 507 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=507 - endOffset: 614 -- name: 'Policy vs Political Science: Theory, Practice & Implementation' - startOffset: 614 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=614 - endOffset: 730 -- name: 'Education Landscape: Data Science for Public Policy Programs & DSSG' - startOffset: 730 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=730 - endOffset: 862 -- name: 'Domain Nuances: Data Science for Social Impact vs Typical Industry Work' - startOffset: 862 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=862 - endOffset: 1032 -- name: 'Use Cases: Drone Computer Vision for Refugee Aid & Rooftop Sustainability' - startOffset: 1032 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1032 - endOffset: 1266 -- name: 'Ethics & Regulation: Ethical AI, EU AI Act and Social Scoring Risks' - startOffset: 1266 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1266 - endOffset: 1581 -- name: 'Project Design: Long-term Impact, Iteration & Avoiding Tech-First Solutions' - startOffset: 1581 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1581 - endOffset: 1686 -- name: 'Stakeholder Collaboration: NGOs, HR Use Cases & Mining Domain Knowledge' - startOffset: 1686 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1686 - endOffset: 1832 -- name: 'Data Challenges: Building Pipelines with Limited IT Infrastructure' - startOffset: 1832 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=1832 - endOffset: 2209 -- name: 'Public Data Gaps: Recycling Programs, Corporate Transparency & Access Issues' - startOffset: 2209 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2209 - endOffset: 2367 -- name: 'Future Focus Areas: Climate Justice, Gender Equality & Responsible Tech' - startOffset: 2367 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2367 - endOffset: 2419 -- name: 'Gender Inequality Solutions: Salary Transparency & Inclusive Hiring Policies' - startOffset: 2419 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2419 - endOffset: 2618 -- name: 'Corporate Responsibility: CSR Data Needs & Demand for Impact Data Scientists' - startOffset: 2618 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2618 - endOffset: 2785 -- name: 'Public Sector Roles: Chief Data Scientist and Government Data Strategy' - startOffset: 2785 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2785 - endOffset: 2899 -- name: 'Getting Started: Volunteer Projects, SDGs & Finding a Cause' - startOffset: 2899 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=2899 - endOffset: 3083 -- name: 'Project Marketplaces & Career Advice: DSSG Projects and 80,000 Hours' - startOffset: 3083 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3083 - endOffset: 3256 -- name: 'Research Applications: Satellite Imagery for Poverty Estimation & Census - Gaps' - startOffset: 3256 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3256 - endOffset: 3382 -- name: 'Community Discussion: Ethics in AI Coffee Chats and Emerging Debates' - startOffset: 3382 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3382 - endOffset: 3486 -- name: 'Connect with Christine: Website, LinkedIn & Twitter' - startOffset: 3486 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3486 - endOffset: 3503 -- name: Episode Wrap-up & Resource Links - startOffset: 3503 - url: https://www.youtube.com/watch?v=xWC1HAfekRk&t=3503 - endOffset: 3524 --- Links: diff --git a/_podcast/s03e04-interviewing-300-data-scientists.md b/_podcast/data-science-interview-and-cv-guide.md similarity index 96% rename from _podcast/s03e04-interviewing-300-data-scientists.md rename to _podcast/data-science-interview-and-cv-guide.md index 45fc9a2d..1d6167d1 100644 --- a/_podcast/s03e04-interviewing-300-data-scientists.md +++ b/_podcast/data-science-interview-and-cv-guide.md @@ -1,14 +1,11 @@ --- -title: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews - & Negotiation' -short: 'Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews - & Negotiation' -guests: -- olegnovikov -image: images/podcast/s03e04-interviewing-300-data-scientists.jpg +title: "Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation" +short: "Data Science Interview Guide: CV Optimization, Take-Home Projects, Mock Interviews & Negotiation" season: 3 episode: 4 -date: 2025-11-07 +guests: +- olegnovikov +image: images/podcast/data-science-interview-and-cv-guide.jpg ids: youtube: AYi7b-8GPm4 anchor: What-I-Learned-After-Interviewing-300-Data-Scientists---Oleg-Novikov-e10ctbs @@ -17,15 +14,133 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/What-I-Learned-After-Interviewing-300-Data-Scientists---Oleg-Novikov-e10ctbs spotify: https://open.spotify.com/episode/406wN6xDkYPyLS8i9fUJL5 apple: https://podcasts.apple.com/us/podcast/what-i-learned-after-interviewing-300-data-scientists/id1541710331?i=1000520681105 + +description: "Master CV optimization, take-home projects and mock interviews to land data science offers—learn SQL/ML prep, negotiation tactics and measurable project impact" +intro: "How do you make your data science application stand out, ace take-home projects, and negotiate an offer without leaving money on the table? In this episode, Oleg Novikov — creator of NextRound and former data science manager at Uber with a background in data and software engineering — walks through a practical data science interview guide covering CV optimization, take-home projects, mock interviews, and negotiation.

We dig into career trajectory from engineering to product data science, building projects that differentiate your application, and concrete product work like forecasting and LTV. Oleg demonstrates NextRound's mock-interview chatbot and personalized feedback, explains common hiring funnels (recruiter screen → take-home → interviews), and contrasts product data scientist vs. machine learning engineer expectations. You'll hear specific advice on treating your CV as a landing page, highlighting personal contributions, crafting case-study narratives from business goals to evaluation metrics, and preparing for technical assessments (ML fundamentals, SQL window functions, coding). We also cover handling rejection, replying graciously, evaluating offers, negotiation tactics when your current salary is low, and practical steps for PhDs breaking into industry.

Listen for actionable steps to refine your data science resume, prioritize take-home ROI, and use mock interviews to iterate faster" +topics: +- data science +- software engineering +- machine learning +- career growth +- career transition +- job search +dateadded: 2021-05-07 +date: 2025-11-07 + +duration: PT01H08M38S + +quotableClips: +- name: Introduction & Episode Overview + startOffset: 76 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=76 + endOffset: 120 +- name: 'Career Path: Engineer → Recommenders → Data Science Management' + startOffset: 120 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=120 + endOffset: 162 +- name: 'Differentiating Application: Building a Project to Showcase Skills' + startOffset: 162 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=162 + endOffset: 319 +- name: 'Product Data Science at Uber: Forecasting & LTV Work' + startOffset: 319 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=319 + endOffset: 398 +- name: 'NextRound: Mock Interview Chatbot with Personalized Feedback' + startOffset: 398 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=398 + endOffset: 509 +- name: Why Companies Provide Generic Rejection Messages + startOffset: 509 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=509 + endOffset: 655 +- name: 'Designing Interview Scenarios: Common On-the-Job Dilemmas' + startOffset: 655 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=655 + endOffset: 804 +- name: 'Typical Hiring Funnel: Recruiter Screen → Take-Home → Interview Rounds' + startOffset: 804 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=804 + endOffset: 929 +- name: 'Role Spectrum: Product Data Scientist vs. Machine Learning Engineer' + startOffset: 929 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=929 + endOffset: 1033 +- name: 'Job Description Focus: Tailoring Your Application to the Role' + startOffset: 1033 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1033 + endOffset: 1108 +- name: 'CV Optimization: Treat Your CV as a Landing Page' + startOffset: 1108 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1108 + endOffset: 1551 +- name: 'CV Details: Highlight Personal Contribution and Remove Noise' + startOffset: 1551 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1551 + endOffset: 1671 +- name: 'Take-Home Projects: Time Investment and ROI Considerations' + startOffset: 1671 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1671 + endOffset: 1772 +- name: 'Behavioral Stories: Preparing Impactful Past-Project Narratives' + startOffset: 1772 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1772 + endOffset: 1923 +- name: 'Case Study Strategy: From Business Goals to Evaluation Metrics' + startOffset: 1923 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1923 + endOffset: 2198 +- name: 'Technical Assessments: ML Knowledge, SQL (Window Functions), and Coding' + startOffset: 2198 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2198 + endOffset: 2350 +- name: 'Handling Rejection: Ask for Feedback and Reapply Strategically' + startOffset: 2350 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2350 + endOffset: 2522 +- name: 'Offer Evaluation: Components, Market Comparison, and Negotiation' + startOffset: 2522 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2522 + endOffset: 2678 +- name: 'Personal Data on CV: Avoid Age, Photo, and Irrelevant Details' + startOffset: 2678 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2678 + endOffset: 2746 +- name: 'PhD to Industry: Cold-Start Projects, Synthetic Data, and Blogging' + startOffset: 2746 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2746 + endOffset: 2950 +- name: 'Replying to Rejections: Be Gracious and Preserve Relationships' + startOffset: 2950 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2950 + endOffset: 3017 +- name: Negotiation Tactics When Current Salary Is Low + startOffset: 3017 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3017 + endOffset: 3189 +- name: 'Applying Despite Experience Gaps: When It Makes Sense to Try' + startOffset: 3189 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3189 + endOffset: 3317 +- name: 'ATS Reality: Parsing Myths vs. Human Screening' + startOffset: 3317 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3317 + endOffset: 3494 +- name: 'Key Lessons from Hundreds of Interviews: Avoid Bias & Iterate' + startOffset: 3494 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3494 + endOffset: 3864 +- name: 'Rethinking CV Format: Historical Constraints and Modern Design' + startOffset: 3864 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3864 + endOffset: 4166 +- name: Closing Remarks and NextRound Resources + startOffset: 4166 + url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=4166 + endOffset: 4118 + transcript: - header: Introduction & Episode Overview -- line: This week we will talk about the interview process, getting hired as a data - scientist — and not only data scientists. We have a special guest today — Oleg. - Oleg worked as a data science manager at Uber, where he built data science teams. - He also has experience building several startups in Europe. Recently he created - NextRound which is a free service for practicing interviews, receiving personalized - feedback, and learning materials. Welcome! -- header: Introduction & Episode Overview - line: This week we will talk about the interview process, getting hired as a data scientist — and not only data scientists. We have a special guest today — Oleg. Oleg worked as a data science manager at Uber, where he built data science teams. @@ -927,138 +1042,6 @@ transcript: sec: 4194 time: '1:09:54' who: Alexey -intro: How do you make your data science application stand out, ace take-home projects, - and negotiate an offer without leaving money on the table? In this episode, Oleg - Novikov — creator of NextRound and former data science manager at Uber with a background - in data and software engineering — walks through a practical data science interview - guide covering CV optimization, take-home projects, mock interviews, and negotiation. -

We dig into career trajectory from engineering to product data science, - building projects that differentiate your application, and concrete product work - like forecasting and LTV. Oleg demonstrates NextRound's mock-interview chatbot and - personalized feedback, explains common hiring funnels (recruiter screen → take-home - → interviews), and contrasts product data scientist vs. machine learning engineer - expectations. You'll hear specific advice on treating your CV as a landing page, - highlighting personal contributions, crafting case-study narratives from business - goals to evaluation metrics, and preparing for technical assessments (ML fundamentals, - SQL window functions, coding). We also cover handling rejection, replying graciously, - evaluating offers, negotiation tactics when your current salary is low, and practical - steps for PhDs breaking into industry.

Listen for actionable steps to refine - your data science resume, prioritize take-home ROI, and use mock interviews to iterate - faster. -description: Master CV optimization, take-home projects and mock interviews to land - data science offers—learn SQL/ML prep, negotiation tactics and measurable project - impact. -dateadded: '2021-05-07' -duration: PT01H08M38S -quotableClips: -- name: Introduction & Episode Overview - startOffset: 76 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=76 - endOffset: 120 -- name: 'Career Path: Engineer → Recommenders → Data Science Management' - startOffset: 120 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=120 - endOffset: 162 -- name: 'Differentiating Application: Building a Project to Showcase Skills' - startOffset: 162 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=162 - endOffset: 319 -- name: 'Product Data Science at Uber: Forecasting & LTV Work' - startOffset: 319 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=319 - endOffset: 398 -- name: 'NextRound: Mock Interview Chatbot with Personalized Feedback' - startOffset: 398 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=398 - endOffset: 509 -- name: Why Companies Provide Generic Rejection Messages - startOffset: 509 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=509 - endOffset: 655 -- name: 'Designing Interview Scenarios: Common On-the-Job Dilemmas' - startOffset: 655 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=655 - endOffset: 804 -- name: 'Typical Hiring Funnel: Recruiter Screen → Take-Home → Interview Rounds' - startOffset: 804 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=804 - endOffset: 929 -- name: 'Role Spectrum: Product Data Scientist vs. Machine Learning Engineer' - startOffset: 929 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=929 - endOffset: 1033 -- name: 'Job Description Focus: Tailoring Your Application to the Role' - startOffset: 1033 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1033 - endOffset: 1108 -- name: 'CV Optimization: Treat Your CV as a Landing Page' - startOffset: 1108 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1108 - endOffset: 1551 -- name: 'CV Details: Highlight Personal Contribution and Remove Noise' - startOffset: 1551 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1551 - endOffset: 1671 -- name: 'Take-Home Projects: Time Investment and ROI Considerations' - startOffset: 1671 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1671 - endOffset: 1772 -- name: 'Behavioral Stories: Preparing Impactful Past-Project Narratives' - startOffset: 1772 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1772 - endOffset: 1923 -- name: 'Case Study Strategy: From Business Goals to Evaluation Metrics' - startOffset: 1923 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=1923 - endOffset: 2198 -- name: 'Technical Assessments: ML Knowledge, SQL (Window Functions), and Coding' - startOffset: 2198 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2198 - endOffset: 2350 -- name: 'Handling Rejection: Ask for Feedback and Reapply Strategically' - startOffset: 2350 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2350 - endOffset: 2522 -- name: 'Offer Evaluation: Components, Market Comparison, and Negotiation' - startOffset: 2522 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2522 - endOffset: 2678 -- name: 'Personal Data on CV: Avoid Age, Photo, and Irrelevant Details' - startOffset: 2678 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2678 - endOffset: 2746 -- name: 'PhD to Industry: Cold-Start Projects, Synthetic Data, and Blogging' - startOffset: 2746 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2746 - endOffset: 2950 -- name: 'Replying to Rejections: Be Gracious and Preserve Relationships' - startOffset: 2950 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=2950 - endOffset: 3017 -- name: Negotiation Tactics When Current Salary Is Low - startOffset: 3017 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3017 - endOffset: 3189 -- name: 'Applying Despite Experience Gaps: When It Makes Sense to Try' - startOffset: 3189 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3189 - endOffset: 3317 -- name: 'ATS Reality: Parsing Myths vs. Human Screening' - startOffset: 3317 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3317 - endOffset: 3494 -- name: 'Key Lessons from Hundreds of Interviews: Avoid Bias & Iterate' - startOffset: 3494 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3494 - endOffset: 3864 -- name: 'Rethinking CV Format: Historical Constraints and Modern Design' - startOffset: 3864 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=3864 - endOffset: 4166 -- name: Closing Remarks and NextRound Resources - startOffset: 4166 - url: https://www.youtube.com/watch?v=AYi7b-8GPm4&t=4166 - endOffset: 4118 --- Links: diff --git a/_podcast/s10e02-decoding-data-science-job-descriptions.md b/_podcast/data-science-job-red-flags-and-mismatched-roles.md similarity index 97% rename from _podcast/s10e02-decoding-data-science-job-descriptions.md rename to _podcast/data-science-job-red-flags-and-mismatched-roles.md index ec386eb5..047ef030 100644 --- a/_podcast/s10e02-decoding-data-science-job-descriptions.md +++ b/_podcast/data-science-job-red-flags-and-mismatched-roles.md @@ -1,20 +1,153 @@ --- +title: "Data Science Jobs: How to Spot Misleading Job Titles, Hiring Red Flags & Build Better Data Teams" +short: "How to Spot Misleading Job Titles, Hiring Red Flags & Build Better Data Teams" +season: 10 episode: 2 guests: - terezaiofciu -date: 2025-11-07 +image: images/podcast/data-science-job-red-flags-and-mismatched-roles.jpg ids: anchor: Decoding-Data-Science-Job-Descriptions---Tereza-Iofciu-e1m079l youtube: bqxBiIwtmX4 -image: images/podcast/s10e02-decoding-data-science-job-descriptions.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Decoding-Data-Science-Job-Descriptions---Tereza-Iofciu-e1m079l apple: https://podcasts.apple.com/us/podcast/decoding-data-science-job-descriptions-tereza-iofciu/id1541710331?i=1000575150345 spotify: https://open.spotify.com/episode/4v6h48B0c0Je8xLMo5zMs5?si=hcMUqpPPQYm2vrdi2py1UQ youtube: https://www.youtube.com/watch?v=bqxBiIwtmX4 -season: 10 -short: Decoding Data Science Job Descriptions -title: Spot Misleading Data Job Titles, Hiring Red Flags & Build Better Data Teams + +description: "Discover how to spot misleading job titles, hiring red flags and build stronger data teams-assess tech stacks, interview rigor, salary ranges and career fit" +intro: "How can you tell if a "data scientist" job is really a data engineering role — or a mismatched hire waiting to happen? In this episode, Tereza Iofciu, PhD and seasoned data practitioner, walks through practical ways to spot misleading data job titles, hiring red flags, and how to build clearer, healthier data teams. Tereza brings experience across data science manager, data scientist, data engineer and product manager roles, plus teaching and community leadership (neuefische, PyLadies Hamburg, PSF community award), grounding her advice in real hiring and team-building work.

We cover why companies rename roles, examples from Scala, Elasticsearch, ETL and Airflow stacks, and the costs of vague job descriptions. You’ll get a role-clarity checklist (team structure, objectives, responsibilities vs. tech lists), signals of data maturity, interview pitfalls (time-consuming take-home tasks, syntax-focused tests), red flags in descriptions (long tech lists, “rockstar” language), and tactics for researching employers (LinkedIn, team pages, conference talks). Also discussed: salary transparency, remote-work fit, retention and career ladders.

Listen to learn concrete signals and questions to evaluate job descriptions, interviews, and shape better data hiring and team design." +topics: +- data science +- data engineering +- career growth +- hiring +- data teams +- team building +- job search +dateadded: 2022-08-06 +date: 2025-11-07 + +duration: PT00H58M45S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=0 + endOffset: 101 +- name: 'Guest Bio: Tereza’s multidisciplinary data roles & community work' + startOffset: 101 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=101 + endOffset: 160 +- name: 'Academic Background: PhD, information retrieval, recommender systems' + startOffset: 160 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=160 + endOffset: 232 +- name: 'Industry Transition: XING to mytaxi/FREE NOW and evolving responsibilities' + startOffset: 232 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=232 + endOffset: 369 +- name: 'Technical Practices at XING: Scala, Elasticsearch, product-driven engineering' + startOffset: 369 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=369 + endOffset: 487 +- name: 'Building Data Infrastructure at mytaxi: ETL, Airflow and platform challenges' + startOffset: 487 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=487 + endOffset: 615 +- name: 'Job Titles vs. Reality: Renaming roles and shaping career narratives' + startOffset: 615 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=615 + endOffset: 667 +- name: 'Coaching Role: Neuefische bootcamp focus on product, teamwork and coaching' + startOffset: 667 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=667 + endOffset: 802 +- name: 'Teaching Challenges: PhDs, collaboration and professional skills' + startOffset: 802 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=802 + endOffset: 833 +- name: 'Hiring Misalignment: Company expectations versus candidate reality' + startOffset: 833 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=833 + endOffset: 985 +- name: 'Interview Practices: Take-home tasks and candidate time burden' + startOffset: 985 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=985 + endOffset: 1094 +- name: 'Candidate Preparedness: Defining goals and asking the right questions' + startOffset: 1094 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1094 + endOffset: 1206 +- name: 'Interpreting Job Titles: Spotting mislabeled data roles' + startOffset: 1206 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1206 + endOffset: 1310 +- name: 'Career-Stage Fit: Junior versus experienced candidate needs' + startOffset: 1310 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1310 + endOffset: 1381 +- name: 'Role Clarity Checklist: Team, objectives, responsibilities vs. tech lists' + startOffset: 1381 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1381 + endOffset: 1638 +- name: 'Data Team Signals: Presence of data engineering and analytics functions' + startOffset: 1638 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1638 + endOffset: 1820 +- name: 'Red Flags in Descriptions: Long tech lists and vague responsibilities' + startOffset: 1820 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1820 + endOffset: 1863 +- name: 'Language & Culture Signals: “Rockstar”, “ninja” and inclusivity cues' + startOffset: 1863 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1863 + endOffset: 2013 +- name: 'Interview Rigor Indicator: Bullet-point overload and syntax-focused tests' + startOffset: 2013 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2013 + endOffset: 2228 +- name: 'Salary Transparency: German norms and benefits of publishing ranges' + startOffset: 2228 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2228 + endOffset: 2331 +- name: 'Company Research Tactics: LinkedIn, team pages and conference presence' + startOffset: 2331 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2331 + endOffset: 2358 +- name: 'Colleagues & Role Models: Finding inspiring teammates and mentors' + startOffset: 2358 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2358 + endOffset: 2464 +- name: 'Retention & Career Ladders: Using LinkedIn to gauge internal mobility' + startOffset: 2464 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2464 + endOffset: 2625 +- name: 'Remote Work Fit: Assessing WFH policies and support structures' + startOffset: 2625 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2625 + endOffset: 2912 +- name: 'Data Maturity Model: Before, during, after data and hiring implications' + startOffset: 2912 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2912 + endOffset: 3024 +- name: 'Day-to-Day Expectations: Time allocation across maturity stages' + startOffset: 3024 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3024 + endOffset: 3381 +- name: 'Tech Stack Signals: Modern vs legacy tools and what they reveal' + startOffset: 3381 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3381 + endOffset: 3499 +- name: 'Community Visibility: Talks and knowledge sharing as healthy-team signals' + startOffset: 3499 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3499 + endOffset: 3607 +- name: 'Closing & Resources: Slides, talk links and final advice' + startOffset: 3607 + url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3607 + endOffset: 3525 + transcript: - header: Podcast Introduction - header: 'Guest Bio: Tereza’s multidisciplinary data roles & community work' @@ -1134,144 +1267,6 @@ transcript: sec: 3626 time: '1:00:26' who: Tereza -intro: 'How can you tell if a "data scientist" job is really a data engineering role - — or a mismatched hire waiting to happen? In this episode, Tereza Iofciu, PhD and - seasoned data practitioner, walks through practical ways to spot misleading data - job titles, hiring red flags, and how to build clearer, healthier data teams. Tereza - brings experience across data science manager, data scientist, data engineer and - product manager roles, plus teaching and community leadership (neuefische, PyLadies - Hamburg, PSF community award), grounding her advice in real hiring and team-building - work.

We cover why companies rename roles, examples from Scala, Elasticsearch, - ETL and Airflow stacks, and the costs of vague job descriptions. You’ll get a role-clarity - checklist (team structure, objectives, responsibilities vs. tech lists), signals - of data maturity, interview pitfalls (time-consuming take-home tasks, syntax-focused - tests), red flags in descriptions (long tech lists, “rockstar” language), and tactics - for researching employers (LinkedIn, team pages, conference talks). Also discussed: - salary transparency, remote-work fit, retention and career ladders.

Listen - to learn concrete signals and questions to evaluate job descriptions, interviews, - and shape better data hiring and team design.' -description: Discover how to spot misleading job titles, hiring red flags and build - stronger data teams-assess tech stacks, interview rigor, salary ranges and career - fit. -dateadded: '2022-08-06' -duration: PT00H58M45S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=0 - endOffset: 101 -- name: 'Guest Bio: Tereza’s multidisciplinary data roles & community work' - startOffset: 101 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=101 - endOffset: 160 -- name: 'Academic Background: PhD, information retrieval, recommender systems' - startOffset: 160 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=160 - endOffset: 232 -- name: 'Industry Transition: XING to mytaxi/FREE NOW and evolving responsibilities' - startOffset: 232 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=232 - endOffset: 369 -- name: 'Technical Practices at XING: Scala, Elasticsearch, product-driven engineering' - startOffset: 369 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=369 - endOffset: 487 -- name: 'Building Data Infrastructure at mytaxi: ETL, Airflow and platform challenges' - startOffset: 487 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=487 - endOffset: 615 -- name: 'Job Titles vs. Reality: Renaming roles and shaping career narratives' - startOffset: 615 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=615 - endOffset: 667 -- name: 'Coaching Role: Neuefische bootcamp focus on product, teamwork and coaching' - startOffset: 667 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=667 - endOffset: 802 -- name: 'Teaching Challenges: PhDs, collaboration and professional skills' - startOffset: 802 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=802 - endOffset: 833 -- name: 'Hiring Misalignment: Company expectations versus candidate reality' - startOffset: 833 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=833 - endOffset: 985 -- name: 'Interview Practices: Take-home tasks and candidate time burden' - startOffset: 985 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=985 - endOffset: 1094 -- name: 'Candidate Preparedness: Defining goals and asking the right questions' - startOffset: 1094 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1094 - endOffset: 1206 -- name: 'Interpreting Job Titles: Spotting mislabeled data roles' - startOffset: 1206 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1206 - endOffset: 1310 -- name: 'Career-Stage Fit: Junior versus experienced candidate needs' - startOffset: 1310 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1310 - endOffset: 1381 -- name: 'Role Clarity Checklist: Team, objectives, responsibilities vs. tech lists' - startOffset: 1381 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1381 - endOffset: 1638 -- name: 'Data Team Signals: Presence of data engineering and analytics functions' - startOffset: 1638 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1638 - endOffset: 1820 -- name: 'Red Flags in Descriptions: Long tech lists and vague responsibilities' - startOffset: 1820 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1820 - endOffset: 1863 -- name: 'Language & Culture Signals: “Rockstar”, “ninja” and inclusivity cues' - startOffset: 1863 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=1863 - endOffset: 2013 -- name: 'Interview Rigor Indicator: Bullet-point overload and syntax-focused tests' - startOffset: 2013 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2013 - endOffset: 2228 -- name: 'Salary Transparency: German norms and benefits of publishing ranges' - startOffset: 2228 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2228 - endOffset: 2331 -- name: 'Company Research Tactics: LinkedIn, team pages and conference presence' - startOffset: 2331 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2331 - endOffset: 2358 -- name: 'Colleagues & Role Models: Finding inspiring teammates and mentors' - startOffset: 2358 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2358 - endOffset: 2464 -- name: 'Retention & Career Ladders: Using LinkedIn to gauge internal mobility' - startOffset: 2464 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2464 - endOffset: 2625 -- name: 'Remote Work Fit: Assessing WFH policies and support structures' - startOffset: 2625 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2625 - endOffset: 2912 -- name: 'Data Maturity Model: Before, during, after data and hiring implications' - startOffset: 2912 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=2912 - endOffset: 3024 -- name: 'Day-to-Day Expectations: Time allocation across maturity stages' - startOffset: 3024 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3024 - endOffset: 3381 -- name: 'Tech Stack Signals: Modern vs legacy tools and what they reveal' - startOffset: 3381 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3381 - endOffset: 3499 -- name: 'Community Visibility: Talks and knowledge sharing as healthy-team signals' - startOffset: 3499 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3499 - endOffset: 3607 -- name: 'Closing & Resources: Slides, talk links and final advice' - startOffset: 3607 - url: https://www.youtube.com/watch?v=bqxBiIwtmX4&t=3607 - endOffset: 3525 --- Links: diff --git a/_podcast/s06e09-data-science-manager.md b/_podcast/data-science-leadership-hiring-mlops.md similarity index 95% rename from _podcast/s06e09-data-science-manager.md rename to _podcast/data-science-leadership-hiring-mlops.md index 717d1240..a9497be3 100644 --- a/_podcast/s06e09-data-science-manager.md +++ b/_podcast/data-science-leadership-hiring-mlops.md @@ -1,12 +1,11 @@ --- -title: 'Data Science Leadership: Product-First ML, Recommenders & RTB, MLOps, Hiring - & Mentoring' -short: Becoming a Data Science Manager -guests: -- marianosemelman -image: images/podcast/s06e09-data-science-manager.jpg +title: "Data Science Leadership: Product-First ML, Recommenders & RTB, MLOps, Hiring & Mentoring" +short: "Becoming a Data Science Manager" season: 6 episode: 9 +guests: +- marianosemelman +image: images/podcast/data-science-leadership-hiring-mlops.jpg ids: youtube: qOLR84-KHoY anchor: Becoming-a-Data-Science-Manager---Mariano-Semelman-e1cbrf7 @@ -15,6 +14,143 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-Science-Manager---Mariano-Semelman-e1cbrf7 spotify: https://open.spotify.com/episode/28Sy4owRwvSJRFTeKAamz2 apple: https://podcasts.apple.com/us/podcast/becoming-a-data-science-manager-mariano-semelman/id1541710331?i=1000547222296 + +description: "Discover data science leadership, recommender systems & MLOps tactics—hire, mentor and deploy models faster with practical frameworks and tips" +intro: "How do you lead a data science team that prioritizes product impact while building recommender systems, real-time bidding (RTB) solutions, and maintainable MLOps? In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 years of experience, walks through practical leadership decisions that bridge models and products.

Mariano describes his shift from software development to data science leadership, daily responsibilities (meetings, mentoring, planning), and how he structures teams of data scientists and ML engineers. Key topics include product-first ML, search and recommender systems, advertising and RTB campaign optimization, CRISP-DM in production, diagnosing overfitting and feature issues, and pragmatic deployment patterns like start simple, fail fast, and iterative experiments. He also shares onboarding tactics (30-60-90 plans), feedback techniques ("ask permission, care, offer options"), one-on-ones, handling departures, code reviews as a manager, delegation through senior engineers, and hiring/remediation practices.

Listen to learn concrete approaches for prioritizing modeling time, running experiments in production, improving MLOps and NLP practices, and mentoring engineers to deliver measurable product outcomes" +topics: +- data science +- machine learning +- MLOps +- leadership +- career growth +- team building +- hiring +dateadded: 2022-01-09 + +duration: PT01H05M19S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=0 + endOffset: 86 +- name: 'Guest Intro: Mariano Semelman, Head of Data Science at OLX' + startOffset: 86 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=86 + endOffset: 140 +- name: Passion for Product Applications in Data Science + startOffset: 140 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=140 + endOffset: 179 +- name: 'Career Journey: Software Dev to Data Science Leadership' + startOffset: 179 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=179 + endOffset: 345 +- name: 'Daily Responsibilities: Meetings, Mentoring & Planning' + startOffset: 345 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=345 + endOffset: 474 +- name: 'Team Composition: Data Scientists and ML Engineers' + startOffset: 474 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=474 + endOffset: 519 +- name: 'Transition to Management: First Team of Five' + startOffset: 519 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=519 + endOffset: 651 +- name: 'Decision Mindset: Saying Yes and Learning on the Job' + startOffset: 651 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=651 + endOffset: 772 +- name: '30-60-90 Plan: Onboarding, Listening, and Learning' + startOffset: 772 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=772 + endOffset: 916 +- name: 'Project Onboarding: Rapid Learning and Trusting Reports' + startOffset: 916 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=916 + endOffset: 1046 +- name: Embracing Wrong Assumptions to Trigger Discussion + startOffset: 1046 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1046 + endOffset: 1197 +- name: Experience with Search and Recommender Systems + startOffset: 1197 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1197 + endOffset: 1279 +- name: 'Advertising Domain: Real-Time Bidding and Campaign Optimization' + startOffset: 1279 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1279 + endOffset: 1389 +- name: 'Transferable DS Practices: Problem Framing & Feature Engineering' + startOffset: 1389 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1389 + endOffset: 1576 +- name: 'Diagnosing Model Issues: Overfitting, Data, and Features' + startOffset: 1576 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1576 + endOffset: 1769 +- name: 'Product-First Mindset: Prioritizing User Impact' + startOffset: 1769 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1769 + endOffset: 1806 +- name: 'From Model to Product: Experiments, Deployment, Rules of ML' + startOffset: 1806 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1806 + endOffset: 2016 +- name: 'Start Simple and Fail Fast: Iterative Testing in Production' + startOffset: 2016 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2016 + endOffset: 2172 +- name: CRISP-DM Process and Deployment Realities + startOffset: 2172 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2172 + endOffset: 2210 +- name: 'Prioritization: Where Modeling Time Delivers Impact' + startOffset: 2210 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2210 + endOffset: 2425 +- name: 'Feedback Timing: When to Give Performance Feedback' + startOffset: 2425 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2425 + endOffset: 2657 +- name: 'Feedback Technique: Ask Permission, Care, and Offer Options' + startOffset: 2657 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2657 + endOffset: 2893 +- name: One-on-Ones and Creating a Safe Growth Environment + startOffset: 2893 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2893 + endOffset: 3039 +- name: 'Handling Departures: Supporting Team Members Who Leave' + startOffset: 3039 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3039 + endOffset: 3157 +- name: 'Technical Work as a Manager: Code Reviews and Prototypes' + startOffset: 3157 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3157 + endOffset: 3298 +- name: Delegation and Architectural Involvement via Senior Engineers + startOffset: 3298 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3298 + endOffset: 3348 +- name: 'Hiring and Remediation: Interviews, Probation, Development Plans' + startOffset: 3348 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3348 + endOffset: 3697 +- name: 'Staying Current: MLOps, NLP, and Engineering Best Practices' + startOffset: 3697 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3697 + endOffset: 3947 +- name: Key Takeaways and Follow-Up Opportunities + startOffset: 3947 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3947 + endOffset: 3991 +- name: 'Contact Info: LinkedIn and Email for Mariano Semelman' + startOffset: 3991 + url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3991 + endOffset: 3919 + transcript: - header: Podcast Introduction - header: 'Guest Intro: Mariano Semelman, Head of Data Science at OLX' @@ -189,7 +325,7 @@ transcript: sec: 758 time: '12:38' who: Alexey -- header: '30‑60‑90 Plan: Onboarding, Listening, and Learning' +- header: '30-60-90 Plan: Onboarding, Listening, and Learning' - line: I think I got to know about it a few months in advance. Until it's announced and doesn’t actually happen, you don't know if it will happen. As soon as I knew that they wanted to put me in the position, I had one month or so to prepare myself. @@ -320,7 +456,7 @@ transcript: sec: 1274 time: '21:14' who: Alexey -- header: 'Advertising Domain: Real‑Time Bidding and Campaign Optimization' +- header: 'Advertising Domain: Real-Time Bidding and Campaign Optimization' - line: Kind of. It actually confused me a lot because I didn't know this thing beforehand, at least not in detail. That was my first “Aha!” moment, because I assumed it was the team that we were using to do publicity for OLX – to send traffic to OLX. @@ -424,7 +560,7 @@ transcript: sec: 1576 time: '26:16' who: Mariano -- header: 'Product‑First Mindset: Prioritizing User Impact' +- header: 'Product-First Mindset: Prioritizing User Impact' - line: I think one thing you didn't mention is that you also help a lot – from what I see – is connecting product people (from product management) with data scientists and helping them by becoming a translator between them. That's something that @@ -524,7 +660,7 @@ transcript: sec: 2016 time: '33:36' who: Mariano -- header: CRISP‑DM Process and Deployment Realities +- header: CRISP-DM Process and Deployment Realities - line: I like CRISP-DM, but there is no book. It's just an article in Wikipedia. I think there is a book, actually. But anyway, I like this process. It's a very old process – it’s like 20 years old or something like that. Surprisingly, it's @@ -690,7 +826,7 @@ transcript: sec: 2890 time: '48:10' who: Alexey -- header: One‑on‑Ones and Creating a Safe Growth Environment +- header: One-on-Ones and Creating a Safe Growth Environment - line: Yes, one-on-ones. For me, as I mentioned, I believe they should happen at least once a week. Of course, with every single person, maybe 15 minutes to catch up once a week, that's fine. You may feel compelled to skip it if there is no @@ -951,7 +1087,7 @@ transcript: sec: 3944 time: '1:05:44' who: Mariano -- header: Key Takeaways and Follow‑Up Opportunities +- header: Key Takeaways and Follow-Up Opportunities - line: No, but like we actually diverged and I think it turned out to be better than what I had in mind. So yeah, thanks a lot for joining us today. Thanks a lot for sharing your story with us, for sharing your experience. Also, this 30-60-90 thing, @@ -981,145 +1117,4 @@ transcript: sec: 4005 time: '1:06:45' who: Mariano -description: Discover data science leadership, recommender systems & MLOps tactics—hire, - mentor and deploy models faster with practical frameworks and tips. -intro: How do you lead a data science team that prioritizes product impact while building - recommender systems, real‑time bidding (RTB) solutions, and maintainable MLOps? - In this episode, Mariano Semelman, Head of Data Science at OLX Group with over 13 - years of experience, walks through practical leadership decisions that bridge models - and products.

Mariano describes his shift from software development to - data science leadership, daily responsibilities (meetings, mentoring, planning), - and how he structures teams of data scientists and ML engineers. Key topics include - product‑first ML, search and recommender systems, advertising and RTB campaign optimization, - CRISP‑DM in production, diagnosing overfitting and feature issues, and pragmatic - deployment patterns like start simple, fail fast, and iterative experiments. He - also shares onboarding tactics (30‑60‑90 plans), feedback techniques ("ask permission, - care, offer options"), one‑on‑ones, handling departures, code reviews as a manager, - delegation through senior engineers, and hiring/remediation practices.

- Listen to learn concrete approaches for prioritizing modeling time, running experiments - in production, improving MLOps and NLP practices, and mentoring engineers to deliver - measurable product outcomes. -dateadded: '2022-01-09' -duration: PT01H05M19S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=0 - endOffset: 86 -- name: 'Guest Intro: Mariano Semelman, Head of Data Science at OLX' - startOffset: 86 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=86 - endOffset: 140 -- name: Passion for Product Applications in Data Science - startOffset: 140 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=140 - endOffset: 179 -- name: 'Career Journey: Software Dev to Data Science Leadership' - startOffset: 179 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=179 - endOffset: 345 -- name: 'Daily Responsibilities: Meetings, Mentoring & Planning' - startOffset: 345 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=345 - endOffset: 474 -- name: 'Team Composition: Data Scientists and ML Engineers' - startOffset: 474 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=474 - endOffset: 519 -- name: 'Transition to Management: First Team of Five' - startOffset: 519 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=519 - endOffset: 651 -- name: 'Decision Mindset: Saying Yes and Learning on the Job' - startOffset: 651 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=651 - endOffset: 772 -- name: '30‑60‑90 Plan: Onboarding, Listening, and Learning' - startOffset: 772 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=772 - endOffset: 916 -- name: 'Project Onboarding: Rapid Learning and Trusting Reports' - startOffset: 916 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=916 - endOffset: 1046 -- name: Embracing Wrong Assumptions to Trigger Discussion - startOffset: 1046 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1046 - endOffset: 1197 -- name: Experience with Search and Recommender Systems - startOffset: 1197 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1197 - endOffset: 1279 -- name: 'Advertising Domain: Real‑Time Bidding and Campaign Optimization' - startOffset: 1279 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1279 - endOffset: 1389 -- name: 'Transferable DS Practices: Problem Framing & Feature Engineering' - startOffset: 1389 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1389 - endOffset: 1576 -- name: 'Diagnosing Model Issues: Overfitting, Data, and Features' - startOffset: 1576 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1576 - endOffset: 1769 -- name: 'Product‑First Mindset: Prioritizing User Impact' - startOffset: 1769 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1769 - endOffset: 1806 -- name: 'From Model to Product: Experiments, Deployment, Rules of ML' - startOffset: 1806 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=1806 - endOffset: 2016 -- name: 'Start Simple and Fail Fast: Iterative Testing in Production' - startOffset: 2016 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2016 - endOffset: 2172 -- name: CRISP‑DM Process and Deployment Realities - startOffset: 2172 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2172 - endOffset: 2210 -- name: 'Prioritization: Where Modeling Time Delivers Impact' - startOffset: 2210 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2210 - endOffset: 2425 -- name: 'Feedback Timing: When to Give Performance Feedback' - startOffset: 2425 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2425 - endOffset: 2657 -- name: 'Feedback Technique: Ask Permission, Care, and Offer Options' - startOffset: 2657 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2657 - endOffset: 2893 -- name: One‑on‑Ones and Creating a Safe Growth Environment - startOffset: 2893 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=2893 - endOffset: 3039 -- name: 'Handling Departures: Supporting Team Members Who Leave' - startOffset: 3039 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3039 - endOffset: 3157 -- name: 'Technical Work as a Manager: Code Reviews and Prototypes' - startOffset: 3157 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3157 - endOffset: 3298 -- name: Delegation and Architectural Involvement via Senior Engineers - startOffset: 3298 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3298 - endOffset: 3348 -- name: 'Hiring and Remediation: Interviews, Probation, Development Plans' - startOffset: 3348 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3348 - endOffset: 3697 -- name: 'Staying Current: MLOps, NLP, and Engineering Best Practices' - startOffset: 3697 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3697 - endOffset: 3947 -- name: Key Takeaways and Follow‑Up Opportunities - startOffset: 3947 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3947 - endOffset: 3991 -- name: 'Contact Info: LinkedIn and Email for Mariano Semelman' - startOffset: 3991 - url: https://www.youtube.com/watch?v=qOLR84-KHoY&t=3991 - endOffset: 3919 --- diff --git a/_podcast/s13e06-secret-sauce-of-data-science-management.md b/_podcast/data-science-management-and-agile-machine-learning.md similarity index 96% rename from _podcast/s13e06-secret-sauce-of-data-science-management.md rename to _podcast/data-science-management-and-agile-machine-learning.md index ad568681..8d9960e6 100644 --- a/_podcast/s13e06-secret-sauce-of-data-science-management.md +++ b/_podcast/data-science-management-and-agile-machine-learning.md @@ -1,20 +1,112 @@ --- +title: "Master Data Science Management: Agile ML, Debrief Culture, Metrics & Scale to Production" +short: "The Secret Sauce of Data Science Management" +season: 13 episode: 6 guests: - shirmeirlador +image: images/podcast/data-science-management-and-agile-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/The-Secret-Sauce-of-Data-Science-Management---Shir-Meir-Lador-e21cu92 youtube: gcxP0qRO-MY -image: images/podcast/s13e06-secret-sauce-of-data-science-management.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/The-Secret-Sauce-of-Data-Science-Management---Shir-Meir-Lador-e21cu92 apple: https://podcasts.apple.com/us/podcast/the-secret-sauce-of-data-science-management-shir-meir-lador/id1541710331?i=1000606790142 spotify: https://open.spotify.com/episode/4kzcUCVPVN1Opq7XI1Dibd?si=f7GlEOs-TFiC9dxTJlXVyw youtube: https://www.youtube.com/watch?v=gcxP0qRO-MY -season: 13 -short: The Secret Sauce of Data Science Management -title: 'Master Data Science Management: Agile ML, Debrief Culture, Metrics & Scale - to Production' + +description: "Master data science management: learn Agile ML, debrief culture, metrics and POC-to-production strategies to scale teams, boost impact and ship reliable models." +intro: "How do you run data science teams so experiments become reliable, measurable products? In this episode, Shir Meir Lador, a data science group manager at Intuit who builds machine and deep learning models for document intelligence in TurboTax and QuickBooks, walks through practical approaches to data science management and agile ML.

We explore the origins of debrief culture from military pilot training and how pre/post debriefs drive continuous improvement; concrete practices for agile ML including two-week sprints, exploration sprints, design stories and grooming; and how to scope work, handle AI project uncertainty, and use rapid experimentation to mitigate data risks. Shir also digs into metrics for production ML—business impact, A/B testing, customer-focused KPIs—and people metrics like pulse surveys, manager score and skip-level feedback. You’ll hear about leadership pillars (vision, driving results, culture), team development, goal alignment, cross-functional product partnerships, and tactics for fostering innovation (hackathons, paper clubs).

Listen for actionable guidance on measuring success, scaling ML to production, and building the managerial skills to lead high-performance data science teams. This episode is for managers and technical leads focused on production ML, machine learning operations, and team-driven impact" +topics: +- management +- machine learning +dateadded: 2023-04-01 + +duration: PT00H56M57S + +quotableClips: +- name: 'Episode Introduction: The Secret Sauce of Data Science Management' + startOffset: 100 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=100 + endOffset: 160 +- name: 'Career Background: Electrical Engineering to Document Intelligence at Intuit' + startOffset: 160 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=160 + endOffset: 271 +- name: 'Military Leadership Lessons: Pilot Training & Debrief Culture Origins' + startOffset: 271 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=271 + endOffset: 324 +- name: 'Debriefing Practice: Pre/post Focus Areas for Continuous Improvement' + startOffset: 324 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=324 + endOffset: 558 +- name: 'Group Manager Role: Strategy, Mentoring, Standards and Roadmaps' + startOffset: 558 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=558 + endOffset: 713 +- name: 'Measuring Success: Business Impact and Team Engagement Metrics' + startOffset: 713 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=713 + endOffset: 776 +- name: 'People Metrics: Pulse Surveys, Manager Score and Skip-level Feedback' + startOffset: 776 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=776 + endOffset: 979 +- name: 'Leadership Pillars: Vision, Driving Results, Building High-performance Culture' + startOffset: 979 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=979 + endOffset: 1043 +- name: 'Managing Leadership Relationships: Communicating Vision and Securing Resources' + startOffset: 1043 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1043 + endOffset: 1464 +- name: 'Team Development: Goal-setting, One-on-ones, Feedback and Recognition' + startOffset: 1464 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1464 + endOffset: 1585 +- name: 'Goal Alignment: Cascading Roadmap Goals to Individual Development' + startOffset: 1585 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1585 + endOffset: 1920 +- name: 'Fostering Innovation: Hackathons, Paper Clubs and Learning Forums' + startOffset: 1920 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1920 + endOffset: 2071 +- name: 'Cross-Functional Integration: Product Partnerships and Expectation Management' + startOffset: 2071 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2071 + endOffset: 2466 +- name: 'AI Project Uncertainty: Data Risks, Unknowns and Rapid Experimentation' + startOffset: 2466 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2466 + endOffset: 2658 +- name: 'Agile for ML: Two-week Sprints, Exploration Tasks and Grooming Practices' + startOffset: 2658 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2658 + endOffset: 2736 +- name: 'Scoping ML Work: Exploration Sprints, Design Stories and Iterative Milestones' + startOffset: 2736 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2736 + endOffset: 2994 +- name: 'Core Manager Skills: Communication, Strategic Clarity and Growth Mindset' + startOffset: 2994 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2994 + endOffset: 3299 +- name: 'POC to Production: Customer-focused Metrics, A/B Testing and Incremental + Rollout' + startOffset: 3299 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3299 + endOffset: 3498 +- name: 'Resources & Further Reading: Shir’s Talks and Blog Posts' + startOffset: 3498 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3498 + endOffset: 3517 +- name: Episode Wrap-up and Closing Remarks + startOffset: 3517 + url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3517 + endOffset: 3417 + transcript: - header: 'Episode Introduction: The Secret Sauce of Data Science Management' - line: This week, we'll talk about the secret sauce of data science management. We @@ -1096,110 +1188,6 @@ transcript: sec: 3517 time: '58:37' who: Alexey -description: 'Master data science management: learn Agile ML, debrief culture, metrics - and POC-to-production strategies to scale teams, boost impact and ship reliable - models.' -intro: How do you run data science teams so experiments become reliable, measurable - products? In this episode, Shir Meir Lador, a data science group manager at Intuit - who builds machine and deep learning models for document intelligence in TurboTax - and QuickBooks, walks through practical approaches to data science management and - agile ML.

We explore the origins of debrief culture from military pilot - training and how pre/post debriefs drive continuous improvement; concrete practices - for agile ML including two-week sprints, exploration sprints, design stories and - grooming; and how to scope work, handle AI project uncertainty, and use rapid experimentation - to mitigate data risks. Shir also digs into metrics for production ML—business impact, - A/B testing, customer-focused KPIs—and people metrics like pulse surveys, manager - score and skip-level feedback. You’ll hear about leadership pillars (vision, driving - results, culture), team development, goal alignment, cross-functional product partnerships, - and tactics for fostering innovation (hackathons, paper clubs).

Listen - for actionable guidance on measuring success, scaling ML to production, and building - the managerial skills to lead high-performance data science teams. This episode - is for managers and technical leads focused on production ML, machine learning operations, - and team-driven impact. -dateadded: '2023-04-01' -duration: PT00H56M57S -quotableClips: -- name: 'Episode Introduction: The Secret Sauce of Data Science Management' - startOffset: 100 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=100 - endOffset: 160 -- name: 'Career Background: Electrical Engineering to Document Intelligence at Intuit' - startOffset: 160 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=160 - endOffset: 271 -- name: 'Military Leadership Lessons: Pilot Training & Debrief Culture Origins' - startOffset: 271 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=271 - endOffset: 324 -- name: 'Debriefing Practice: Pre/post Focus Areas for Continuous Improvement' - startOffset: 324 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=324 - endOffset: 558 -- name: 'Group Manager Role: Strategy, Mentoring, Standards and Roadmaps' - startOffset: 558 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=558 - endOffset: 713 -- name: 'Measuring Success: Business Impact and Team Engagement Metrics' - startOffset: 713 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=713 - endOffset: 776 -- name: 'People Metrics: Pulse Surveys, Manager Score and Skip-level Feedback' - startOffset: 776 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=776 - endOffset: 979 -- name: 'Leadership Pillars: Vision, Driving Results, Building High-performance Culture' - startOffset: 979 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=979 - endOffset: 1043 -- name: 'Managing Leadership Relationships: Communicating Vision and Securing Resources' - startOffset: 1043 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1043 - endOffset: 1464 -- name: 'Team Development: Goal-setting, One-on-ones, Feedback and Recognition' - startOffset: 1464 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1464 - endOffset: 1585 -- name: 'Goal Alignment: Cascading Roadmap Goals to Individual Development' - startOffset: 1585 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1585 - endOffset: 1920 -- name: 'Fostering Innovation: Hackathons, Paper Clubs and Learning Forums' - startOffset: 1920 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=1920 - endOffset: 2071 -- name: 'Cross-Functional Integration: Product Partnerships and Expectation Management' - startOffset: 2071 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2071 - endOffset: 2466 -- name: 'AI Project Uncertainty: Data Risks, Unknowns and Rapid Experimentation' - startOffset: 2466 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2466 - endOffset: 2658 -- name: 'Agile for ML: Two-week Sprints, Exploration Tasks and Grooming Practices' - startOffset: 2658 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2658 - endOffset: 2736 -- name: 'Scoping ML Work: Exploration Sprints, Design Stories and Iterative Milestones' - startOffset: 2736 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2736 - endOffset: 2994 -- name: 'Core Manager Skills: Communication, Strategic Clarity and Growth Mindset' - startOffset: 2994 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=2994 - endOffset: 3299 -- name: 'POC to Production: Customer-focused Metrics, A/B Testing and Incremental - Rollout' - startOffset: 3299 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3299 - endOffset: 3498 -- name: 'Resources & Further Reading: Shir’s Talks and Blog Posts' - startOffset: 3498 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3498 - endOffset: 3517 -- name: Episode Wrap-up and Closing Remarks - startOffset: 3517 - url: https://www.youtube.com/watch?v=gcxP0qRO-MY&t=3517 - endOffset: 3417 --- Links: diff --git a/_podcast/s06e03-manager-vs-expert.md b/_podcast/data-science-manager-vs-expert-hiring-guide.md similarity index 96% rename from _podcast/s06e03-manager-vs-expert.md rename to _podcast/data-science-manager-vs-expert-hiring-guide.md index 1f5dd102..2a4e565d 100644 --- a/_podcast/s06e03-manager-vs-expert.md +++ b/_podcast/data-science-manager-vs-expert-hiring-guide.md @@ -1,12 +1,11 @@ --- -title: 'Data Science Manager vs Expert: Hiring Strategy, Skills, Team Building & When - to Use ML' -short: Data Science Manager vs Data Science Expert -guests: -- barbarasobkowiak -image: images/podcast/s06e03-manager-vs-expert.jpg +title: "Data Science Manager vs Expert: Hiring Strategy, Skills, Team Building & When to Use ML" +short: "Data Science Manager vs Data Science Expert" season: 6 episode: 3 +guests: +- barbarasobkowiak +image: images/podcast/data-science-manager-vs-expert-hiring-guide.jpg ids: youtube: hFmIgaN-F8Y anchor: Data-Science-Manager-vs-Data-Science-Expert---Barbara-Sobkowiak-e1ah3od @@ -15,6 +14,132 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Science-Manager-vs-Data-Science-Expert---Barbara-Sobkowiak-e1ah3od spotify: https://open.spotify.com/episode/5Ug8YA3hKY9Kr5hVFDqZ77 apple: https://podcasts.apple.com/us/podcast/data-science-manager-vs-data-science-expert-barbara/id1541710331?i=1000542496818 + +description: "Learn hiring strategies for Data Science Manager vs Data Science Expert—when to hire experts, build teams, assess ML needs, and boost business impact" +intro: "When should you hire a data science manager versus a deep technical expert, and how do you decide whether machine learning is actually the right solution? In this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, and manager by passion — walks through her career from GIS → SQL → BI to leading teams, and tackles hiring strategy, role design, and practical ML use cases like mental health monitoring and demand forecasting.

We cover common pitfalls (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager skill balance between technical literacy and soft skills, and what “hands-on” really means for managers: high-level understanding, code review, and time allocation. Learn when to hire a data science expert for complex models or domain knowledge, and when a manager-plus-generalist approach or a startup “unicorn” makes sense. Barbara also discusses team building (learning plans, pairing), project prioritization, model monitoring, feasibility checks (data quality and baselines), and measuring impact with KPIs and client discovery.

Listen to gain practical hiring criteria, role profiles, and decision frameworks for when to use machine learning and how to build teams that deliver." +topics: +- data science +- machine learning +- leadership +- team building +dateadded: 2021-11-21 + +duration: PT00H59M33S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=0 + endOffset: 89 +- name: 'Episode Topic: Data Science Manager vs Data Science Expert' + startOffset: 89 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=89 + endOffset: 120 +- name: 'Career Journey: GIS → SQL → BI → Data Science Manager' + startOffset: 120 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=120 + endOffset: 266 +- name: 'ML Use Cases: Mental Health Monitoring & Demand Forecasting' + startOffset: 266 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=266 + endOffset: 298 +- name: 'Misleading Job Ads: Manager vs Expert Confusion on LinkedIn' + startOffset: 298 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=298 + endOffset: 448 +- name: 'Root Causes: HR/IT Job Descriptions Missing Managerial Needs' + startOffset: 448 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=448 + endOffset: 502 +- name: 'Manager Skill Balance: Technical Knowledge vs Soft Skills' + startOffset: 502 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=502 + endOffset: 722 +- name: 'Technical Expectation: High-Level Understanding vs Deep Expertise' + startOffset: 722 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=722 + endOffset: 809 +- name: 'Manager Responsibilities: Strategy, Team Development, Stakeholder Communication' + startOffset: 809 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=809 + endOffset: 949 +- name: 'Hands-On Reality: Coding, Model Review, and Time Allocation' + startOffset: 949 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=949 + endOffset: 1054 +- name: 'Manager Experience: Hands-On ML Helpful but Not Mandatory' + startOffset: 1054 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1054 + endOffset: 1180 +- name: 'Business Development: Manager Role in Sales and Client Strategy' + startOffset: 1180 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1180 + endOffset: 1251 +- name: 'Team Development: Learning Plans, Courses, and Pairing' + startOffset: 1251 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1251 + endOffset: 1434 +- name: 'Quality Oversight: Code Reviews vs Managerial Guidance' + startOffset: 1434 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1434 + endOffset: 1502 +- name: 'Data Science Expert: Deep Technical and Domain Expertise' + startOffset: 1502 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1502 + endOffset: 1728 +- name: 'Hiring an Expert: When Complex Models and Domain Knowledge Are Needed' + startOffset: 1728 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1728 + endOffset: 1837 +- name: 'Hiring Strategy: Manager + Expert vs Generalist for Startups' + startOffset: 1837 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1837 + endOffset: 1916 +- name: 'Manager Job Profile: Team Building, Communication, and AI Literacy' + startOffset: 1916 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1916 + endOffset: 2044 +- name: 'Risks of Hiring Experts as Managers: Team and Business Translation Gaps' + startOffset: 2044 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2044 + endOffset: 2317 +- name: 'Startup Hiring: Unicorns Who Wear Many Hats' + startOffset: 2317 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2317 + endOffset: 2447 +- name: 'Project Prioritization: Estimation, Resource Allocation, and Buffers' + startOffset: 2447 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2447 + endOffset: 2774 +- name: 'Measuring Impact: Client Feedback, KPIs, and Model Monitoring' + startOffset: 2774 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2774 + endOffset: 3012 +- name: 'Client Discovery: Baselines, Data Availability, and Success Metrics' + startOffset: 3012 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3012 + endOffset: 3237 +- name: 'Feasibility Check: Data Quality and Necessity of Machine Learning' + startOffset: 3237 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3237 + endOffset: 3271 +- name: 'Diversity Spotlight: Women in Data Science and Interview Confidence' + startOffset: 3271 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3271 + endOffset: 3543 +- name: 'Connect with Guest: Barbara Sobkowiak on LinkedIn' + startOffset: 3543 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3543 + endOffset: 3560 +- name: 'Career Advice: Find Satisfaction, Mentors, and Networking' + startOffset: 3560 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3560 + endOffset: 3624 +- name: Episode Wrap-up and Unanswered Questions + startOffset: 3624 + url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3624 + endOffset: 3573 + transcript: - header: Podcast Introduction - header: 'Episode Topic: Data Science Manager vs Data Science Expert' @@ -959,139 +1084,6 @@ transcript: sec: 3662 time: '1:01:02' who: Barbara -description: Learn hiring strategies for Data Science Manager vs Data Science Expert—when - to hire experts, build teams, assess ML needs, and boost business impact. -intro: 'When should you hire a data science manager versus a deep technical expert, - and how do you decide whether machine learning is actually the right solution? In - this episode Barbara Sobkowiak — data scientist by training, GIS specialist by education, - and manager by passion — walks through her career from GIS → SQL → BI to leading - teams, and tackles hiring strategy, role design, and practical ML use cases like - mental health monitoring and demand forecasting.

We cover common pitfalls - (misleading job ads, HR/IT job descriptions that miss managerial needs), the manager - skill balance between technical literacy and soft skills, and what “hands-on” really - means for managers: high‑level understanding, code review, and time allocation. - Learn when to hire a data science expert for complex models or domain knowledge, - and when a manager-plus-generalist approach or a startup “unicorn” makes sense. - Barbara also discusses team building (learning plans, pairing), project prioritization, - model monitoring, feasibility checks (data quality and baselines), and measuring - impact with KPIs and client discovery.

Listen to gain practical hiring - criteria, role profiles, and decision frameworks for when to use machine learning - and how to build teams that deliver.' -dateadded: '2021-11-21' -duration: PT00H59M33S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=0 - endOffset: 89 -- name: 'Episode Topic: Data Science Manager vs Data Science Expert' - startOffset: 89 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=89 - endOffset: 120 -- name: 'Career Journey: GIS → SQL → BI → Data Science Manager' - startOffset: 120 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=120 - endOffset: 266 -- name: 'ML Use Cases: Mental Health Monitoring & Demand Forecasting' - startOffset: 266 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=266 - endOffset: 298 -- name: 'Misleading Job Ads: Manager vs Expert Confusion on LinkedIn' - startOffset: 298 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=298 - endOffset: 448 -- name: 'Root Causes: HR/IT Job Descriptions Missing Managerial Needs' - startOffset: 448 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=448 - endOffset: 502 -- name: 'Manager Skill Balance: Technical Knowledge vs Soft Skills' - startOffset: 502 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=502 - endOffset: 722 -- name: 'Technical Expectation: High-Level Understanding vs Deep Expertise' - startOffset: 722 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=722 - endOffset: 809 -- name: 'Manager Responsibilities: Strategy, Team Development, Stakeholder Communication' - startOffset: 809 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=809 - endOffset: 949 -- name: 'Hands-On Reality: Coding, Model Review, and Time Allocation' - startOffset: 949 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=949 - endOffset: 1054 -- name: 'Manager Experience: Hands-On ML Helpful but Not Mandatory' - startOffset: 1054 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1054 - endOffset: 1180 -- name: 'Business Development: Manager Role in Sales and Client Strategy' - startOffset: 1180 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1180 - endOffset: 1251 -- name: 'Team Development: Learning Plans, Courses, and Pairing' - startOffset: 1251 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1251 - endOffset: 1434 -- name: 'Quality Oversight: Code Reviews vs Managerial Guidance' - startOffset: 1434 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1434 - endOffset: 1502 -- name: 'Data Science Expert: Deep Technical and Domain Expertise' - startOffset: 1502 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1502 - endOffset: 1728 -- name: 'Hiring an Expert: When Complex Models and Domain Knowledge Are Needed' - startOffset: 1728 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1728 - endOffset: 1837 -- name: 'Hiring Strategy: Manager + Expert vs Generalist for Startups' - startOffset: 1837 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1837 - endOffset: 1916 -- name: 'Manager Job Profile: Team Building, Communication, and AI Literacy' - startOffset: 1916 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=1916 - endOffset: 2044 -- name: 'Risks of Hiring Experts as Managers: Team and Business Translation Gaps' - startOffset: 2044 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2044 - endOffset: 2317 -- name: 'Startup Hiring: Unicorns Who Wear Many Hats' - startOffset: 2317 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2317 - endOffset: 2447 -- name: 'Project Prioritization: Estimation, Resource Allocation, and Buffers' - startOffset: 2447 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2447 - endOffset: 2774 -- name: 'Measuring Impact: Client Feedback, KPIs, and Model Monitoring' - startOffset: 2774 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=2774 - endOffset: 3012 -- name: 'Client Discovery: Baselines, Data Availability, and Success Metrics' - startOffset: 3012 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3012 - endOffset: 3237 -- name: 'Feasibility Check: Data Quality and Necessity of Machine Learning' - startOffset: 3237 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3237 - endOffset: 3271 -- name: 'Diversity Spotlight: Women in Data Science and Interview Confidence' - startOffset: 3271 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3271 - endOffset: 3543 -- name: 'Connect with Guest: Barbara Sobkowiak on LinkedIn' - startOffset: 3543 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3543 - endOffset: 3560 -- name: 'Career Advice: Find Satisfaction, Mentors, and Networking' - startOffset: 3560 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3560 - endOffset: 3624 -- name: Episode Wrap-up and Unanswered Questions - startOffset: 3624 - url: https://www.youtube.com/watch?v=hFmIgaN-F8Y&t=3624 - endOffset: 3573 --- Links: diff --git a/_podcast/s09e07-designing-data-science-organization.md b/_podcast/data-science-team-structure-and-org-design.md similarity index 95% rename from _podcast/s09e07-designing-data-science-organization.md rename to _podcast/data-science-team-structure-and-org-design.md index a2cbc11e..b9c32fbd 100644 --- a/_podcast/s09e07-designing-data-science-organization.md +++ b/_podcast/data-science-team-structure-and-org-design.md @@ -1,20 +1,137 @@ --- +title: "Designing High-Impact Data Science Teams: Centralized vs Embedded Models, Experimentation & Staffing" +short: "Designing a Data Science Team" +season: 9 episode: 7 guests: - lisacohen +image: images/podcast/data-science-team-structure-and-org-design.jpg ids: anchor: Designing-a-Data-Science-Organization---Lisa-Cohen-e1kcm5e youtube: F_rJ4fg5ZEA -image: images/podcast/s09e07-designing-data-science-organization.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Designing-a-Data-Science-Organization---Lisa-Cohen-e1kcm5e apple: https://podcasts.apple.com/us/podcast/designing-a-data-science-organization-lisa-cohen/id1541710331?i=1000569172916 spotify: https://open.spotify.com/episode/62ZzHBEuOLbm6ft0u9dlh7?si=182bea5ac49243af youtube: https://www.youtube.com/watch?v=F_rJ4fg5ZEA -season: 9 -short: Designing a Data Science Organization -title: 'Designing High-Impact Data Science Orgs: Centralized vs Embedded Models, Experimentation - & Staffing' + +description: "Discover how to design high-impact data science orgs: centralized vs embedded models, staffing ratios and experimentation to speed decisions and scale impact." +intro: "How should you structure a data science organization to maximize product impact: centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director of Data Science at Twitter who leads 70 data scientists and previously led Azure Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation patterns for designing high-impact data science orgs.

We cover centralized vs embedded models and what “embedding” really means for reporting lines and day-to-day integration with feature teams; Twitter’s hybrid per-division approach for product and ads; staffing guidance (including an engineers-to-data-scientist ratio reference); and rhythms for cross-functional planning, OKRs, and dependency management. Lisa also discusses experimentation and experiment review, defining success metrics and ship criteria, knowledge sharing practices, differences between analytics and ML-heavy data science, and how to partner with product, engineering, design, and research.

Listen to gain actionable guidance on choosing an org model, setting staffing expectations, establishing experiment and metrics practices, and aligning data pipelines, data quality, and OKRs to drive data-driven product decisions." +topics: +- data science +- data teams +- leadership +- machine learning +dateadded: 2022-07-08 + +duration: PT00H58M55S + +quotableClips: +- name: 'Guest Introduction: Lisa Cohen, Director of Data Science at Twitter' + startOffset: 77 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=77 + endOffset: 102 +- name: 'Career Background: Applied Math, Microsoft telemetry, Azure to Twitter' + startOffset: 102 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=102 + endOffset: 387 +- name: 'Org Models Overview: Centralized vs decentralized data science organization' + startOffset: 387 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=387 + endOffset: 514 +- name: 'Embedding Explained: Reporting lines vs day-to-day integration with feature + teams' + startOffset: 514 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=514 + endOffset: 641 +- name: 'Hybrid Structure: Centralization per division and multiple DS orgs' + startOffset: 641 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=641 + endOffset: 926 +- name: 'Reporting Structure: Embedded teams vs centralized data science reporting' + startOffset: 926 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=926 + endOffset: 1123 +- name: 'Team Rhythms & Planning: Cross-functional ceremonies and dependency management' + startOffset: 1123 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1123 + endOffset: 1318 +- name: 'Cross-Functional Alignment: OKRs and aligning goals across levels' + startOffset: 1318 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1318 + endOffset: 1493 +- name: 'Twitter’s Approach: Hybrid per-division model for product and ads' + startOffset: 1493 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1493 + endOffset: 1548 +- name: 'Decentralized Model: Immersive domain context, faster decisions, career tradeoffs' + startOffset: 1548 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1548 + endOffset: 1765 +- name: 'Centralized Model: Knowledge sharing, consistency, and context-building challenges' + startOffset: 1765 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1765 + endOffset: 1852 +- name: 'Communicating Insights: Translating metrics for product, engineering, and + design' + startOffset: 1852 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1852 + endOffset: 1988 +- name: 'Starting Data Science: Foundations—data pipelines, data quality, and analytics' + startOffset: 1988 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1988 + endOffset: 2209 +- name: 'Staffing Guidance: Engineers-to-data-scientist ratios and ML partnerships + (8:1 reference)' + startOffset: 2209 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2209 + endOffset: 2539 +- name: 'Knowledge Sharing & Publication: Research archives, Slack channels, and push + mechanisms' + startOffset: 2539 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2539 + endOffset: 2769 +- name: 'Product Partnership: Co-ownership with product, engineering, design, and + research' + startOffset: 2769 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2769 + endOffset: 2840 +- name: 'Metrics & Experimentation: Defining success metrics, ship criteria, and experiment + review' + startOffset: 2840 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2840 + endOffset: 3044 +- name: 'Analytics vs Data Science: Analysts driving dashboards vs ML-heavy DS work' + startOffset: 3044 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3044 + endOffset: 3150 +- name: 'OKRs & Exploration Time: Using objectives to prioritize and allocate research + time' + startOffset: 3150 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3150 + endOffset: 3256 +- name: 'Resolving Conflicts: Data-driven opportunity sizing for prioritization decisions' + startOffset: 3256 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3256 + endOffset: 3348 +- name: 'Data-Driven Product Innovation: Guiding roadmap decisions with trusted data' + startOffset: 3348 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3348 + endOffset: 3451 +- name: 'Qualitative Research Collaboration: Bridging user studies with quantitative + analysis' + startOffset: 3451 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3451 + endOffset: 3578 +- name: 'Contact & Resources: Lisa on Twitter, LinkedIn, and Medium' + startOffset: 3578 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3578 + endOffset: 3603 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3603 + url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3603 + endOffset: 3535 + transcript: - header: 'Guest Introduction: Lisa Cohen, Director of Data Science at Twitter' - line: This week, we'll talk about designing a data science organization. We have @@ -149,7 +266,7 @@ transcript: sec: 498 time: '8:18' who: Alexey -- header: 'Embedding Explained: Reporting lines vs day‑to‑day integration with feature +- header: 'Embedding Explained: Reporting lines vs day-to-day integration with feature teams' - line: Yeah, we can go back maybe to the Azure example. So we have a whole organization, with the VP managing the managers across the various areas – all data science @@ -341,7 +458,7 @@ transcript: sec: 1101 time: '18:21' who: Lisa -- header: 'Team Rhythms & Planning: Cross‑functional ceremonies and dependency management' +- header: 'Team Rhythms & Planning: Cross-functional ceremonies and dependency management' - line: But I guess a team has to have some sort of rhythm – some sort of ceremonies and things like this. For example, you start with planning, then you work for two weeks, then you finish with a retrospective – some sort of process, right? @@ -400,7 +517,7 @@ transcript: sec: 1239 time: '20:39' who: Lisa -- header: 'Cross‑Functional Alignment: OKRs and aligning goals across levels' +- header: 'Cross-Functional Alignment: OKRs and aligning goals across levels' - line: But on the surface, it looks a bit complicated, right? Because you have a lot of functions that are not really connected, if you think about the hierarchical structures, but they still somehow work together and move towards the same direction @@ -450,7 +567,7 @@ transcript: sec: 1443 time: '24:03' who: Lisa -- header: 'Twitter’s Approach: Hybrid per‑division model for product and ads' +- header: 'Twitter’s Approach: Hybrid per-division model for product and ads' - line: I'm taking a lot of notes because I want to come back to this and talk about that. But I also wanted to take a step back and, again, come back to this “centralized vs decentralized”. I think we've talked about what we can call “centralized,” @@ -533,7 +650,7 @@ transcript: sec: 1729 time: '28:49' who: Lisa -- header: 'Centralized Model: Knowledge sharing, consistency, and context‑building +- header: 'Centralized Model: Knowledge sharing, consistency, and context-building challenges' - line: Okay. And what are the cons of the centralized approach? What are the disadvantages? sec: 1765 @@ -676,7 +793,7 @@ transcript: sec: 2186 time: '36:26' who: Lisa -- header: 'Staffing Guidance: Engineers‑to‑data‑scientist ratios and ML partnerships +- header: 'Staffing Guidance: Engineers-to-data-scientist ratios and ML partnerships (8:1 reference)' - line: We have a question, “How many data scientists will I need? How do I estimate this before starting a project?” @@ -864,7 +981,7 @@ transcript: sec: 2755 time: '45:55' who: Lisa -- header: 'Product Partnership: Co‑ownership with product, engineering, design, and +- header: 'Product Partnership: Co-ownership with product, engineering, design, and research' - line: But you still can work from home if you want to, right? Okay. There was something else that I wanted to talk about, which is – I took a look at your LinkedIn and @@ -937,7 +1054,7 @@ transcript: sec: 2890 time: '48:10' who: Lisa -- header: 'Analytics vs Data Science: Analysts driving dashboards vs ML‑heavy DS work' +- header: 'Analytics vs Data Science: Analysts driving dashboards vs ML-heavy DS work' - line: Do you have product analysts? Or is it mostly data scientists who do analytics? sec: 3044 time: '50:44' @@ -1004,7 +1121,7 @@ transcript: sec: 3178 time: '52:58' who: Lisa -- header: 'Resolving Conflicts: Data‑driven opportunity sizing for prioritization +- header: 'Resolving Conflicts: Data-driven opportunity sizing for prioritization decisions' - line: How often does it happen – maybe not specifically at Twitter, but just in your experience – that in this kind of setup, different functions have conflicting @@ -1041,7 +1158,7 @@ transcript: sec: 3347 time: '55:47' who: Lisa -- header: 'Data‑Driven Product Innovation: Guiding roadmap decisions with trusted +- header: 'Data-Driven Product Innovation: Guiding roadmap decisions with trusted data' - line: Okay. [chuckles] Coming back to this sentence, “partnering closely with product management, engineering, design and research,” we covered that – “to pursue data-driven @@ -1138,7 +1255,7 @@ transcript: sec: 3595 time: '59:55' who: Alexey -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: This is great, yeah. I love the conversation. Thank you for driving through all the different topics I’m exploring here. Great to chat with you, as always. sec: 3603 @@ -1150,131 +1267,6 @@ transcript: sec: 3612 time: '1:00:12' who: Alexey -description: 'Discover how to design high-impact data science orgs: centralized vs - embedded models, staffing ratios and experimentation to speed decisions and scale - impact.' -intro: 'How should you structure a data science organization to maximize product impact: - centralized, embedded, or a hybrid of both? In this episode, Lisa Cohen, Director - of Data Science at Twitter who leads 70 data scientists and previously led Azure - Customer Growth Analytics at Microsoft, walks through practical tradeoffs and implementation - patterns for designing high‑impact data science orgs.

We cover centralized - vs embedded models and what “embedding” really means for reporting lines and day‑to‑day - integration with feature teams; Twitter’s hybrid per‑division approach for product - and ads; staffing guidance (including an engineers‑to‑data‑scientist ratio reference); - and rhythms for cross‑functional planning, OKRs, and dependency management. Lisa - also discusses experimentation and experiment review, defining success metrics and - ship criteria, knowledge sharing practices, differences between analytics and ML‑heavy - data science, and how to partner with product, engineering, design, and research. -

Listen to gain actionable guidance on choosing an org model, setting staffing - expectations, establishing experiment and metrics practices, and aligning data pipelines, - data quality, and OKRs to drive data‑driven product decisions.' -dateadded: '2022-07-08' -duration: PT00H58M55S -quotableClips: -- name: 'Guest Introduction: Lisa Cohen, Director of Data Science at Twitter' - startOffset: 77 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=77 - endOffset: 102 -- name: 'Career Background: Applied Math, Microsoft telemetry, Azure to Twitter' - startOffset: 102 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=102 - endOffset: 387 -- name: 'Org Models Overview: Centralized vs decentralized data science organization' - startOffset: 387 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=387 - endOffset: 514 -- name: 'Embedding Explained: Reporting lines vs day‑to‑day integration with feature - teams' - startOffset: 514 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=514 - endOffset: 641 -- name: 'Hybrid Structure: Centralization per division and multiple DS orgs' - startOffset: 641 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=641 - endOffset: 926 -- name: 'Reporting Structure: Embedded teams vs centralized data science reporting' - startOffset: 926 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=926 - endOffset: 1123 -- name: 'Team Rhythms & Planning: Cross‑functional ceremonies and dependency management' - startOffset: 1123 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1123 - endOffset: 1318 -- name: 'Cross‑Functional Alignment: OKRs and aligning goals across levels' - startOffset: 1318 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1318 - endOffset: 1493 -- name: 'Twitter’s Approach: Hybrid per‑division model for product and ads' - startOffset: 1493 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1493 - endOffset: 1548 -- name: 'Decentralized Model: Immersive domain context, faster decisions, career tradeoffs' - startOffset: 1548 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1548 - endOffset: 1765 -- name: 'Centralized Model: Knowledge sharing, consistency, and context‑building challenges' - startOffset: 1765 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1765 - endOffset: 1852 -- name: 'Communicating Insights: Translating metrics for product, engineering, and - design' - startOffset: 1852 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1852 - endOffset: 1988 -- name: 'Starting Data Science: Foundations—data pipelines, data quality, and analytics' - startOffset: 1988 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=1988 - endOffset: 2209 -- name: 'Staffing Guidance: Engineers‑to‑data‑scientist ratios and ML partnerships - (8:1 reference)' - startOffset: 2209 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2209 - endOffset: 2539 -- name: 'Knowledge Sharing & Publication: Research archives, Slack channels, and push - mechanisms' - startOffset: 2539 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2539 - endOffset: 2769 -- name: 'Product Partnership: Co‑ownership with product, engineering, design, and - research' - startOffset: 2769 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2769 - endOffset: 2840 -- name: 'Metrics & Experimentation: Defining success metrics, ship criteria, and experiment - review' - startOffset: 2840 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=2840 - endOffset: 3044 -- name: 'Analytics vs Data Science: Analysts driving dashboards vs ML‑heavy DS work' - startOffset: 3044 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3044 - endOffset: 3150 -- name: 'OKRs & Exploration Time: Using objectives to prioritize and allocate research - time' - startOffset: 3150 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3150 - endOffset: 3256 -- name: 'Resolving Conflicts: Data‑driven opportunity sizing for prioritization decisions' - startOffset: 3256 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3256 - endOffset: 3348 -- name: 'Data‑Driven Product Innovation: Guiding roadmap decisions with trusted data' - startOffset: 3348 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3348 - endOffset: 3451 -- name: 'Qualitative Research Collaboration: Bridging user studies with quantitative - analysis' - startOffset: 3451 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3451 - endOffset: 3578 -- name: 'Contact & Resources: Lisa on Twitter, LinkedIn, and Medium' - startOffset: 3578 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3578 - endOffset: 3603 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3603 - url: https://www.youtube.com/watch?v=F_rJ4fg5ZEA&t=3603 - endOffset: 3535 --- Links: diff --git a/_podcast/s12e05-indie-hacking.md b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md similarity index 96% rename from _podcast/s12e05-indie-hacking.md rename to _podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md index 3f26ea7e..d981ffc5 100644 --- a/_podcast/s12e05-indie-hacking.md +++ b/_podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.md @@ -1,29 +1,150 @@ --- +title: "Indie Hacking and Bootstrapping Side Projects for Data Scientists: Build, Launch & Monetize Indie Hacker Products" +short: "Indie Hacking and Bootstrapping Side Projects for Data Scientists" +season: 12 episode: 5 guests: - paulineclavelloux -date: 2025-11-07 -topics: -- Entrepreneurship -- Indie Hacking -- Freelance -- Product Development -- Startups -- Bootstrapping -- Data Tools +image: images/podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg ids: anchor: Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb youtube: KsV_SVXlTo8 -image: images/podcast/s12e05-indie-hacking.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb apple: https://podcasts.apple.com/us/podcast/indie-hacking-pauline-clavelloux/id1541710331?i=1000595787491 spotify: https://open.spotify.com/episode/2DlD756csrDFAxfuTjSKwY?si=_H2G3bJtQIuJMAe8daEIYg youtube: https://www.youtube.com/watch?v=KsV_SVXlTo8 -season: 12 -short: Indie Hacking -title: 'Build, Launch & Monetize Indie Hacker Products: Crypto Alerts, Generative - AI & Growth' + +description: "Build indie-hacking products: launch crypto alerts & generative AI apps, validate ideas, choose tech, price effectively and monetize for sustainable growth." +intro: "How do you build, launch, and actually monetize indie-hacker products in crypto alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux — an IBM data science manager and consultant with eight years’ experience who also ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired selfie-to-art tool) — walks through the practical steps.

We cover Pauline’s career path and an ML production case study (money-laundering detection), then move into indie-hacking essentials: bootstrapping, splitting time between a full-time role and side projects, and validating ideas. You’ll hear how she productized projects (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API fine-tuning vs self-hosted GPUs), managed operating costs, and launched via Twitter and niche listings. The conversation also tackles customer acquisition, pricing constraints, marketing and content strategy, and skills gained across GCP, data engineering, web dev, and growth.

Listen for actionable guidance on product launch, monetization, and time management for indie hackers working on crypto alerts and generative AI—concrete steps to validate, build, and grow side products without external funding." +topics: +- indie hacking +- bootstrapping +- side projects +- data science +- machine learning +- generative AI +- entrepreneurship +- freelance +dateadded: 2023-01-21 +date: 2025-11-07 + +duration: PT00H59M27S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=0 + endOffset: 72 +- name: 'Career Journey: Engineering Student to IBM Data Scientist' + startOffset: 72 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=72 + endOffset: 178 +- name: 'Consulting Work: Project Types and Client Engagement' + startOffset: 178 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=178 + endOffset: 290 +- name: 'Manager Role: Deliverables, Roadmaps, and Client Communication' + startOffset: 290 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=290 + endOffset: 331 +- name: 'Case Study: Money-Laundering Detection Project and Deployment' + startOffset: 331 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=331 + endOffset: 443 +- name: 'Indie Hacking Explained: Bootstrapping Without External Funding' + startOffset: 443 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=443 + endOffset: 538 +- name: 'Day Job + Side Projects: Time Allocation and Routine' + startOffset: 538 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=538 + endOffset: 674 +- name: 'Cryptopy Origin: Building Crypto Alerts for Personal Trading' + startOffset: 674 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=674 + endOffset: 909 +- name: 'Productization: Company Setup, Landing Pages, Legal and Payments' + startOffset: 909 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=909 + endOffset: 1125 +- name: 'Technology Choices: Python/Flask, Team Contributions, and Architecture' + startOffset: 1125 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1125 + endOffset: 1173 +- name: 'Marketing Efforts: Audience Reach, Social Channels, and Challenges' + startOffset: 1173 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1173 + endOffset: 1306 +- name: Operating Costs and Niche Product Strategy + startOffset: 1306 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1306 + endOffset: 1413 +- name: 'UnrealMe Origin: DreamBooth Inspiration and Rapid Prototyping' + startOffset: 1413 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1413 + endOffset: 1548 +- name: 'Implementation Decisions: API Fine-Tuning vs Self-Hosted GPUs' + startOffset: 1548 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1548 + endOffset: 1721 +- name: 'Launch Channels: Twitter, Black Friday Listings, and Early Sales' + startOffset: 1721 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1721 + endOffset: 1891 +- name: Customer Acquisition and Pricing Constraints + startOffset: 1891 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1891 + endOffset: 2095 +- name: 'Motivation: Creative Drive and Why Indie Hacking Matters' + startOffset: 2095 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2095 + endOffset: 2147 +- name: 'Skills Gained: GCP, Data Engineering, Web Dev, and Marketing' + startOffset: 2147 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2147 + endOffset: 2367 +- name: 'Work–Life Balance: Passion, Energy, and Time Management' + startOffset: 2367 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2367 + endOffset: 2547 +- name: 'AboutStartup.io: Blog Concept, Interviews, and Monetization Paths' + startOffset: 2547 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2547 + endOffset: 2739 +- name: 'Idea Generation: Frustration-Led Problems and Opportunity Sourcing' + startOffset: 2739 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2739 + endOffset: 2934 +- name: 'Idea Validation: Competitor Scan, Skills Check, and Build Criteria' + startOffset: 2934 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2934 + endOffset: 3035 +- name: 'Twitter Growth: Personal Branding and Audience Building' + startOffset: 3035 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3035 + endOffset: 3158 +- name: 'Content Strategy: Balancing Data Science and Indie-Hacking Posts' + startOffset: 3158 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3158 + endOffset: 3204 +- name: 'Community Access: Contact Options via Twitter and Slack' + startOffset: 3204 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3204 + endOffset: 3275 +- name: 'Indie Inspiration: Pieter Levels and the “Many Projects” Approach' + startOffset: 3275 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3275 + endOffset: 3430 +- name: 'Recommended Resources: Data Sense and AboutStartup.io' + startOffset: 3430 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3430 + endOffset: 3499 +- name: Episode Wrap-Up and Final Thoughts + startOffset: 3499 + url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3499 + endOffset: 3567 + transcript: - header: Episode Introduction - header: Episode Introduction @@ -1217,139 +1338,6 @@ transcript: sec: 3567 time: '59:27' who: Pauline -intro: 'How do you build, launch, and actually monetize indie-hacker products in crypto - alerts and generative AI while keeping a day job? In this episode, Pauline Clavelloux - — an IBM data science manager and consultant with eight years’ experience who also - ships side projects like Cryptopy (crypto alerts) and UnrealMe (a DreamBooth-inspired - selfie-to-art tool) — walks through the practical steps.

We cover Pauline’s - career path and an ML production case study (money‑laundering detection), then move - into indie-hacking essentials: bootstrapping, splitting time between a full‑time - role and side projects, and validating ideas. You’ll hear how she productized projects - (company setup, landing pages, legal, payments), chose a stack (Python/Flask, API - fine‑tuning vs self‑hosted GPUs), managed operating costs, and launched via Twitter - and niche listings. The conversation also tackles customer acquisition, pricing - constraints, marketing and content strategy, and skills gained across GCP, data - engineering, web dev, and growth.

Listen for actionable guidance on product - launch, monetization, and time management for indie hackers working on crypto alerts - and generative AI—concrete steps to validate, build, and grow side products without - external funding.' -description: 'Build indie-hacking products: launch crypto alerts & generative AI apps, - validate ideas, choose tech, price effectively and monetize for sustainable growth.' -dateadded: '2023-01-21' -duration: PT00H59M27S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=0 - endOffset: 72 -- name: 'Career Journey: Engineering Student to IBM Data Scientist' - startOffset: 72 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=72 - endOffset: 178 -- name: 'Consulting Work: Project Types and Client Engagement' - startOffset: 178 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=178 - endOffset: 290 -- name: 'Manager Role: Deliverables, Roadmaps, and Client Communication' - startOffset: 290 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=290 - endOffset: 331 -- name: 'Case Study: Money-Laundering Detection Project and Deployment' - startOffset: 331 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=331 - endOffset: 443 -- name: 'Indie Hacking Explained: Bootstrapping Without External Funding' - startOffset: 443 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=443 - endOffset: 538 -- name: 'Day Job + Side Projects: Time Allocation and Routine' - startOffset: 538 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=538 - endOffset: 674 -- name: 'Cryptopy Origin: Building Crypto Alerts for Personal Trading' - startOffset: 674 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=674 - endOffset: 909 -- name: 'Productization: Company Setup, Landing Pages, Legal and Payments' - startOffset: 909 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=909 - endOffset: 1125 -- name: 'Technology Choices: Python/Flask, Team Contributions, and Architecture' - startOffset: 1125 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1125 - endOffset: 1173 -- name: 'Marketing Efforts: Audience Reach, Social Channels, and Challenges' - startOffset: 1173 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1173 - endOffset: 1306 -- name: Operating Costs and Niche Product Strategy - startOffset: 1306 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1306 - endOffset: 1413 -- name: 'UnrealMe Origin: DreamBooth Inspiration and Rapid Prototyping' - startOffset: 1413 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1413 - endOffset: 1548 -- name: 'Implementation Decisions: API Fine-Tuning vs Self-Hosted GPUs' - startOffset: 1548 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1548 - endOffset: 1721 -- name: 'Launch Channels: Twitter, Black Friday Listings, and Early Sales' - startOffset: 1721 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1721 - endOffset: 1891 -- name: Customer Acquisition and Pricing Constraints - startOffset: 1891 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=1891 - endOffset: 2095 -- name: 'Motivation: Creative Drive and Why Indie Hacking Matters' - startOffset: 2095 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2095 - endOffset: 2147 -- name: 'Skills Gained: GCP, Data Engineering, Web Dev, and Marketing' - startOffset: 2147 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2147 - endOffset: 2367 -- name: 'Work–Life Balance: Passion, Energy, and Time Management' - startOffset: 2367 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2367 - endOffset: 2547 -- name: 'AboutStartup.io: Blog Concept, Interviews, and Monetization Paths' - startOffset: 2547 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2547 - endOffset: 2739 -- name: 'Idea Generation: Frustration-Led Problems and Opportunity Sourcing' - startOffset: 2739 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2739 - endOffset: 2934 -- name: 'Idea Validation: Competitor Scan, Skills Check, and Build Criteria' - startOffset: 2934 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=2934 - endOffset: 3035 -- name: 'Twitter Growth: Personal Branding and Audience Building' - startOffset: 3035 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3035 - endOffset: 3158 -- name: 'Content Strategy: Balancing Data Science and Indie-Hacking Posts' - startOffset: 3158 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3158 - endOffset: 3204 -- name: 'Community Access: Contact Options via Twitter and Slack' - startOffset: 3204 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3204 - endOffset: 3275 -- name: 'Indie Inspiration: Pieter Levels and the “Many Projects” Approach' - startOffset: 3275 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3275 - endOffset: 3430 -- name: 'Recommended Resources: Data Sense and AboutStartup.io' - startOffset: 3430 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3430 - endOffset: 3499 -- name: Episode Wrap-Up and Final Thoughts - startOffset: 3499 - url: https://www.youtube.com/watch?v=KsV_SVXlTo8&t=3499 - endOffset: 3567 --- Links: diff --git a/_podcast/s14e03-data-strategy-key-principles-and-best-practices.md b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md similarity index 97% rename from _podcast/s14e03-data-strategy-key-principles-and-best-practices.md rename to _podcast/data-strategy-and-dataops-for-ai-powered-products.md index cc6828ab..84b8f2e7 100644 --- a/_podcast/s14e03-data-strategy-key-principles-and-best-practices.md +++ b/_podcast/data-strategy-and-dataops-for-ai-powered-products.md @@ -1,20 +1,125 @@ --- +title: "Actionable Data Strategy & DataOps for AI-Powered Products: Pitch, Measure, Use GPT" +short: "Data Strategy: Key Principles and Best Practices" +season: 14 episode: 3 guests: - boyanangelov +image: images/podcast/data-strategy-and-dataops-for-ai-powered-products.jpg ids: - anchor: atatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete + anchor: datatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete youtube: jGbfeYdlCiQ -image: images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Data-Strategy-Key-Principles-and-Best-Practices---Boyan-Angelov-e24mete apple: https://podcasts.apple.com/us/podcast/data-strategy-key-principles-and-best-practices-boyan/id1541710331?i=1000614629229 spotify: https://open.spotify.com/episode/7tITQ4nLypogRLUjjK75mx?si=722BlhoLSGuxZlE9ia7VhA youtube: https://www.youtube.com/watch?v=jGbfeYdlCiQ -season: 14 -short: 'Data Strategy: Key Principles and Best Practices' -title: 'Actionable Data Strategy & DataOps for AI-Powered Products: Pitch, Measure, - Use GPT' + +description: "Master actionable data strategy, DataOps & GPT: learn to pitch small AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products." +intro: "How do you turn AI ambitions into measurable, deliverable data products? In this episode Boyan Angelov — author of Elements of Data Strategy and leader of data strategy at Exxeta AG — walks through practical steps to make data strategy actionable for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused plan and explains the due diligence needed to align business goals with feasible use cases.

Topics covered include use case ideation, feasibility and prioritization, managing influence cascades and scope creep, impact assessment and portfolio management, and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for data — and clarify platform, AI and BI roles and the core skills required for strategists. Boyan also shows how GPT and ChatGPT can be used as a writing co-pilot for outlines, pitches and technical guidance, and recommends starting small with budgeted use cases plus baseline and post-implementation metrics to measure success.

Listen to get concrete guidance on pitching, measuring and operationalizing a data strategy for AI-powered products — including practical DataOps and GPT workflows you can apply right away" +topics: +- data strategy +- dataops +- AI +- data strategy +dateadded: 2023-05-27 + +duration: PT00H56M39S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=0 + endOffset: 113 +- name: Guest Introduction & Current Role + startOffset: 113 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=113 + endOffset: 170 +- name: Guest Background & Career Path + startOffset: 170 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=170 + endOffset: 347 +- name: 'Becoming a Data Strategist: Accidental Transition' + startOffset: 347 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=347 + endOffset: 493 +- name: 'Defining Data Strategy: Actionable, Flexible Plans' + startOffset: 493 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=493 + endOffset: 613 +- name: Due Diligence & Aligning Business Goals + startOffset: 613 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=613 + endOffset: 808 +- name: 'Designing Strategy: Use Case Ideation, Feasibility & Prioritization' + startOffset: 808 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=808 + endOffset: 981 +- name: Influence Cascade & Scope Creep in Data Products + startOffset: 981 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=981 + endOffset: 1102 +- name: Impact Assessment, Portfolio Management & Delivery + startOffset: 1102 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1102 + endOffset: 1410 +- name: Data- and AI-Powered Products Defined + startOffset: 1410 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1410 + endOffset: 1497 +- name: 'DataOps Principles: Lean, Agile & CI/CD Practices' + startOffset: 1497 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1497 + endOffset: 1638 +- name: 'Strategy Roles: Platform, AI and BI Distinctions' + startOffset: 1638 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1638 + endOffset: 1802 +- name: 'Core Skills: Data Knowledge, Communication & Systems Thinking' + startOffset: 1802 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1802 + endOffset: 2195 +- name: 'Path to Data Strategist: Business Fluency & Deliberate Practice' + startOffset: 2195 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2195 + endOffset: 2349 +- name: 'Translation Skill: Explaining Use Cases to Stakeholders' + startOffset: 2349 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2349 + endOffset: 2491 +- name: 'From Strategist to CTO: Ownership, Budgeting & Management' + startOffset: 2491 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2491 + endOffset: 2626 +- name: 'GPT as Writing Co-Pilot: Sidebars, Editing & Ethical Considerations' + startOffset: 2626 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2626 + endOffset: 2840 +- name: 'GPT for Drafting: Outlines, PowerPoint & Chapter Structure' + startOffset: 2840 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2840 + endOffset: 3062 +- name: 'ChatGPT for Data Strategy: Ideation, Prompting & Tech Guidance' + startOffset: 3062 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3062 + endOffset: 3164 +- name: 'Pitching Strategy: Start Small with a Budgeted Use Case' + startOffset: 3164 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3164 + endOffset: 3332 +- name: 'Baselines & Measurement: Pre- and Post-Implementation Metrics' + startOffset: 3332 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3332 + endOffset: 3416 +- name: 'Recommended Reading: Data Strategy, DataOps & Infonomics' + startOffset: 3416 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3416 + endOffset: 3512 +- name: Episode Wrap-Up & Resources + startOffset: 3512 + url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3512 + endOffset: 3399 + transcript: - header: Podcast Introduction - header: Guest Introduction & Current Role @@ -831,7 +936,7 @@ transcript: sec: 2548 time: '42:28' who: Boyan -- header: 'GPT as Writing Co‑Pilot: Sidebars, Editing & Ethical Considerations' +- header: 'GPT as Writing Co-Pilot: Sidebars, Editing & Ethical Considerations' - line: Interesting. Then there's something I really wanted to talk to you about. For your book, you used GPT, and that's really nice that you explicitly acknowledge that. @@ -1139,119 +1244,6 @@ transcript: sec: 3512 time: '58:32' who: Alexey -description: 'Master actionable data strategy, DataOps & GPT: learn to pitch small - AI use cases, set baselines, apply CI/CD and deliver measurable AI-powered products.' -intro: How do you turn AI ambitions into measurable, deliverable data products? In - this episode Boyan Angelov — author of Elements of Data Strategy and leader of data - strategy at Exxeta AG — walks through practical steps to make data strategy actionable - for AI-powered products. Drawing on a decade across bioinformatics, clinical trials, - HRTech, LegalTech and consulting, Boyan reframes data strategy as a flexible, outcome-focused - plan and explains the due diligence needed to align business goals with feasible - use cases.

Topics covered include use case ideation, feasibility and prioritization, - managing influence cascades and scope creep, impact assessment and portfolio management, - and delivery practices. We dig into DataOps principles — lean, agile and CI/CD for - data — and clarify platform, AI and BI roles and the core skills required for strategists. - Boyan also shows how GPT and ChatGPT can be used as a writing co‑pilot for outlines, - pitches and technical guidance, and recommends starting small with budgeted use - cases plus baseline and post-implementation metrics to measure success.

- Listen to get concrete guidance on pitching, measuring and operationalizing a data - strategy for AI-powered products — including practical DataOps and GPT workflows - you can apply right away. -dateadded: '2023-05-27' -duration: PT00H56M39S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=0 - endOffset: 113 -- name: Guest Introduction & Current Role - startOffset: 113 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=113 - endOffset: 170 -- name: Guest Background & Career Path - startOffset: 170 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=170 - endOffset: 347 -- name: 'Becoming a Data Strategist: Accidental Transition' - startOffset: 347 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=347 - endOffset: 493 -- name: 'Defining Data Strategy: Actionable, Flexible Plans' - startOffset: 493 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=493 - endOffset: 613 -- name: Due Diligence & Aligning Business Goals - startOffset: 613 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=613 - endOffset: 808 -- name: 'Designing Strategy: Use Case Ideation, Feasibility & Prioritization' - startOffset: 808 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=808 - endOffset: 981 -- name: Influence Cascade & Scope Creep in Data Products - startOffset: 981 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=981 - endOffset: 1102 -- name: Impact Assessment, Portfolio Management & Delivery - startOffset: 1102 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1102 - endOffset: 1410 -- name: Data- and AI-Powered Products Defined - startOffset: 1410 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1410 - endOffset: 1497 -- name: 'DataOps Principles: Lean, Agile & CI/CD Practices' - startOffset: 1497 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1497 - endOffset: 1638 -- name: 'Strategy Roles: Platform, AI and BI Distinctions' - startOffset: 1638 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1638 - endOffset: 1802 -- name: 'Core Skills: Data Knowledge, Communication & Systems Thinking' - startOffset: 1802 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=1802 - endOffset: 2195 -- name: 'Path to Data Strategist: Business Fluency & Deliberate Practice' - startOffset: 2195 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2195 - endOffset: 2349 -- name: 'Translation Skill: Explaining Use Cases to Stakeholders' - startOffset: 2349 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2349 - endOffset: 2491 -- name: 'From Strategist to CTO: Ownership, Budgeting & Management' - startOffset: 2491 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2491 - endOffset: 2626 -- name: 'GPT as Writing Co‑Pilot: Sidebars, Editing & Ethical Considerations' - startOffset: 2626 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2626 - endOffset: 2840 -- name: 'GPT for Drafting: Outlines, PowerPoint & Chapter Structure' - startOffset: 2840 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=2840 - endOffset: 3062 -- name: 'ChatGPT for Data Strategy: Ideation, Prompting & Tech Guidance' - startOffset: 3062 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3062 - endOffset: 3164 -- name: 'Pitching Strategy: Start Small with a Budgeted Use Case' - startOffset: 3164 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3164 - endOffset: 3332 -- name: 'Baselines & Measurement: Pre- and Post-Implementation Metrics' - startOffset: 3332 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3332 - endOffset: 3416 -- name: 'Recommended Reading: Data Strategy, DataOps & Infonomics' - startOffset: 3416 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3416 - endOffset: 3512 -- name: Episode Wrap-Up & Resources - startOffset: 3512 - url: https://www.youtube.com/watch?v=jGbfeYdlCiQ&t=3512 - endOffset: 3399 --- Links: diff --git a/_podcast/s01e01-roles.md b/_podcast/data-team-roles.md similarity index 98% rename from _podcast/s01e01-roles.md rename to _podcast/data-team-roles.md index 60d51e34..c131ace3 100644 --- a/_podcast/s01e01-roles.md +++ b/_podcast/data-team-roles.md @@ -1,15 +1,11 @@ --- -title: 'Data Team Roles Explained: Skills, Responsibilities, and How Teams Ship ML - Products' -short: Roles in a Data Team -guests: -- alexeygrigorev -image: images/podcast/s01e01-roles.jpg -keywords: data team roles, data scientist, data engineer, machine learning engineer, - data analyst, MLOps engineer, product manager, data team structure, data science - roles, ML engineer vs data engineer, data team responsibilities, data science career +title: "Data Team Roles Explained: Skills, Responsibilities, and How Teams Ship ML Products" +short: "Roles in a Data Team" season: 1 episode: 1 +guests: +- alexeygrigorev +image: images/podcast/data-team-roles.jpg ids: youtube: UukjwSIAnpw anchor: Roles-in-a-data-team---Alexey-Grigorev-emqcft @@ -18,7 +14,22 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Roles-in-a-data-team---Alexey-Grigorev-emqcft spotify: TODO apple: TODO -dateadded: '2021-02-23' + +topics: +- team building +- data teams +- data science +- machine learning +- data analysis +- data engineering +- MLOps +- product management +- leadership +dateadded: 2021-02-23 + + + +keywords: data team roles, data scientist, data engineer, machine learning engineer, data analyst, MLOps engineer, product manager, data team structure, data science roles, ML engineer vs data engineer, data team responsibilities, data science career --- The topic today is the roles in data teams. We want to understand what kind of people work in the data team, what responsibilities they have, what they do, and what they need to know. diff --git a/_podcast/s03e04-effective-communication-with-business.md b/_podcast/data-translator-role-and-data-strategy.md similarity index 97% rename from _podcast/s03e04-effective-communication-with-business.md rename to _podcast/data-translator-role-and-data-strategy.md index 647a4324..8e4e4deb 100644 --- a/_podcast/s03e04-effective-communication-with-business.md +++ b/_podcast/data-translator-role-and-data-strategy.md @@ -1,13 +1,11 @@ --- -title: 'Data Strategist Guide: Effective Communication to Bridge Data Teams & Management - for Data-Driven Growth' -short: 'Data Strategist Guide to Driving Growth: Prototypes, MVPs & Building Data - Trust' -guests: -- liorbarak -image: images/podcast/s03e04-effective-communication-with-business.jpg +title: "Data Strategist Guide: Effective Communication to Bridge Data Teams & Management for Data-Driven Growth" +short: "Data Strategist Guide to Driving Growth: Prototypes, MVPs & Building Data Trust" season: 3 episode: 4 +guests: +- liorbarak +image: images/podcast/data-translator-role-and-data-strategy.jpg ids: youtube: gqroEsTyLD0 anchor: Effective-Communication-with-Business-for-Data-Professionals---Lior-Barak-e1002rm @@ -16,6 +14,121 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Effective-Communication-with-Business-for-Data-Professionals---Lior-Barak-e1002rm spotify: https://open.spotify.com/episode/4RF592cRWxHgcXbx6pV0Ja apple: https://podcasts.apple.com/us/podcast/effective-communication-business-for-data-professionals/id1541710331?i=1000519463715 + +description: "Discover how a data translator bridges management and tech to drive data-driven growth—practical data strategy, forecasts, prototypes, and team alignment" +intro: "How do you bridge the gap between data teams and management so analytics actually drives growth? In this episode, Lior Barak — author of "Data is Like a Plate of Hummus," co-host of WHAT the Data?! and founder of Tale About Data with 12+ years building data teams — lays out the role of a data translator: a product-minded strategist who converts technical outputs into business-aligned action.

We explore practical tactics for building data trust (proactive alerts, QA dashboards, and confidence intervals for forecasts), embedding with business teams to learn workflows, and using data-led growth to improve recruitment, marketing, and operations. Lior walks through ways to overcome resistance — hackathons and side projects — and advocates lean delivery: MVPs, prototype-first development, clear handover strategies, and scaling with OKRs. He also covers how to explain effort to non-technical stakeholders, break silos through co-working, and use chat-driven remote collaboration effectively.

Listen to learn concrete approaches for data strategy, data communication, and production-ready delivery that help your organization move from data chaos to measurable, data-driven growth." +topics: +- data strategy +- communication +- project management +- leadership +- data teams +dateadded: 2021-05-01 + +duration: PT00H57M23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=0 + endOffset: 91 +- name: 'Episode Theme: Bridging Data & Management' + startOffset: 91 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=91 + endOffset: 150 +- name: 'Guest Background: Lior''s data and product journey' + startOffset: 150 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=150 + endOffset: 248 +- name: 'Role Defined: Data strategist as translator between business and tech' + startOffset: 248 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=248 + endOffset: 466 +- name: 'Communication Tactics: Proactive alerts to maintain data trust' + startOffset: 466 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=466 + endOffset: 648 +- name: 'Forecast Transparency: Confidence intervals and QA dashboards' + startOffset: 648 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=648 + endOffset: 795 +- name: 'Translator Profiles: Product-minded data advocates for alignment' + startOffset: 795 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=795 + endOffset: 860 +- name: 'Embedment: Sitting with business to learn workflows and needs' + startOffset: 860 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=860 + endOffset: 1053 +- name: 'Data-led Growth: Using data to improve recruitment, marketing, and ops' + startOffset: 1053 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1053 + endOffset: 1225 +- name: 'Overcoming Resistance: Hackathons and side projects to prove value' + startOffset: 1225 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1225 + endOffset: 1434 +- name: 'Lean Delivery: MVPs, iterative development, and scaling with OKRs' + startOffset: 1434 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1434 + endOffset: 1577 +- name: 'Prototype-first: Embrace imperfect code to validate solutions' + startOffset: 1577 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1577 + endOffset: 1759 +- name: 'Handover Strategy: Creating ownership for productionization' + startOffset: 1759 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1759 + endOffset: 1962 +- name: 'Value over Aesthetics: Message clarity beats polish in early stages' + startOffset: 1962 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1962 + endOffset: 2092 +- name: 'Hummus Metaphor: Quick prototype versus crafted product' + startOffset: 2092 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2092 + endOffset: 2193 +- name: 'Non-technical Stakeholders: Explain effort in plain language' + startOffset: 2193 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2193 + endOffset: 2384 +- name: 'Cross-team Empathy: Breaking silos through co-working and lunches' + startOffset: 2384 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2384 + endOffset: 2575 +- name: 'Remote Collaboration: Chat-driven triggers and selective meeting use' + startOffset: 2575 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2575 + endOffset: 2729 +- name: 'Product Perspectives: Consumers vs engineers — the hummus analogy' + startOffset: 2729 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2729 + endOffset: 3050 +- name: 'Local Anecdote: Recommended hummus spots in Berlin' + startOffset: 3050 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3050 + endOffset: 3096 +- name: 'Book Overview: Purpose of "Data is Like a Plate of Hummus" + startOffset: 3096 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3096 + endOffset: 3200 +- name: 'Strategy Foundations: Build a stable data ground before models' + startOffset: 3200 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3200 + endOffset: 3349 +- name: 'Data Chaos: Clearing requests, educating users, and leading growth' + startOffset: 3349 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3349 + endOffset: 3460 +- name: 'Resources & Contact: Lior''s LinkedIn, Twitter, and podcast' + startOffset: 3460 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3460 + endOffset: 3481 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3481 + url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3481 + endOffset: 3443 + transcript: - header: Podcast Introduction - header: 'Episode Theme: Bridging Data & Management' @@ -794,7 +907,7 @@ transcript: sec: 3078 time: '51:18' who: Alexey -- header: 'Book Overview: Purpose of "Data is Like a Plate of Hummus"' +- header: 'Book Overview: Purpose of "Data is Like a Plate of Hummus" - line: Why did you call your book ”Data is Like a Plate of Hummus”? I think I am getting some ideas from our conversation. But maybe you have a short answer to that question? @@ -929,126 +1042,6 @@ transcript: sec: 3534 time: '58:54' who: Lior -description: Discover how a data translator bridges management and tech to drive data-driven - growth—practical data strategy, forecasts, prototypes, and team alignment. -intro: 'How do you bridge the gap between data teams and management so analytics actually - drives growth? In this episode, Lior Barak — author of "Data is Like a Plate of Hummus," - co-host of WHAT the Data?! and founder of Tale About Data with 12+ years building - data teams — lays out the role of a data translator: a product-minded strategist - who converts technical outputs into business-aligned action.

We explore - practical tactics for building data trust (proactive alerts, QA dashboards, and - confidence intervals for forecasts), embedding with business teams to learn workflows, - and using data-led growth to improve recruitment, marketing, and operations. Lior - walks through ways to overcome resistance — hackathons and side projects — and advocates - lean delivery: MVPs, prototype-first development, clear handover strategies, and - scaling with OKRs. He also covers how to explain effort to non-technical stakeholders, - break silos through co-working, and use chat-driven remote collaboration effectively. -

Listen to learn concrete approaches for data strategy, data communication, - and production-ready delivery that help your organization move from data chaos to - measurable, data-driven growth.' -dateadded: '2021-05-01' -duration: PT00H57M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=0 - endOffset: 91 -- name: 'Episode Theme: Bridging Data & Management' - startOffset: 91 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=91 - endOffset: 150 -- name: 'Guest Background: Lior''s data and product journey' - startOffset: 150 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=150 - endOffset: 248 -- name: 'Role Defined: Data strategist as translator between business and tech' - startOffset: 248 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=248 - endOffset: 466 -- name: 'Communication Tactics: Proactive alerts to maintain data trust' - startOffset: 466 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=466 - endOffset: 648 -- name: 'Forecast Transparency: Confidence intervals and QA dashboards' - startOffset: 648 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=648 - endOffset: 795 -- name: 'Translator Profiles: Product-minded data advocates for alignment' - startOffset: 795 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=795 - endOffset: 860 -- name: 'Embedment: Sitting with business to learn workflows and needs' - startOffset: 860 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=860 - endOffset: 1053 -- name: 'Data-led Growth: Using data to improve recruitment, marketing, and ops' - startOffset: 1053 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1053 - endOffset: 1225 -- name: 'Overcoming Resistance: Hackathons and side projects to prove value' - startOffset: 1225 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1225 - endOffset: 1434 -- name: 'Lean Delivery: MVPs, iterative development, and scaling with OKRs' - startOffset: 1434 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1434 - endOffset: 1577 -- name: 'Prototype-first: Embrace imperfect code to validate solutions' - startOffset: 1577 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1577 - endOffset: 1759 -- name: 'Handover Strategy: Creating ownership for productionization' - startOffset: 1759 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1759 - endOffset: 1962 -- name: 'Value over Aesthetics: Message clarity beats polish in early stages' - startOffset: 1962 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=1962 - endOffset: 2092 -- name: 'Hummus Metaphor: Quick prototype versus crafted product' - startOffset: 2092 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2092 - endOffset: 2193 -- name: 'Non-technical Stakeholders: Explain effort in plain language' - startOffset: 2193 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2193 - endOffset: 2384 -- name: 'Cross-team Empathy: Breaking silos through co-working and lunches' - startOffset: 2384 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2384 - endOffset: 2575 -- name: 'Remote Collaboration: Chat-driven triggers and selective meeting use' - startOffset: 2575 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2575 - endOffset: 2729 -- name: 'Product Perspectives: Consumers vs engineers — the hummus analogy' - startOffset: 2729 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=2729 - endOffset: 3050 -- name: 'Local Anecdote: Recommended hummus spots in Berlin' - startOffset: 3050 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3050 - endOffset: 3096 -- name: 'Book Overview: Purpose of "Data is Like a Plate of Hummus"' - startOffset: 3096 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3096 - endOffset: 3200 -- name: 'Strategy Foundations: Build a stable data ground before models' - startOffset: 3200 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3200 - endOffset: 3349 -- name: 'Data Chaos: Clearing requests, educating users, and leading growth' - startOffset: 3349 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3349 - endOffset: 3460 -- name: 'Resources & Contact: Lior''s LinkedIn, Twitter, and podcast' - startOffset: 3460 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3460 - endOffset: 3481 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3481 - url: https://www.youtube.com/watch?v=gqroEsTyLD0&t=3481 - endOffset: 3443 --- diff --git a/_podcast/s11e03-from-data-science-to-dataops.md b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md similarity index 95% rename from _podcast/s11e03-from-data-science-to-dataops.md rename to _podcast/dataops-and-gitops-best-practices-for-data-teams.md index 7ab1059b..df40aded 100644 --- a/_podcast/s11e03-from-data-science-to-dataops.md +++ b/_podcast/dataops-and-gitops-best-practices-for-data-teams.md @@ -1,20 +1,149 @@ --- +title: "DataOps & GitOps for Data Teams: Onboarding, IaC, Reproducibility & Production Best Practices" +short: "From Data Science to DataOps" +season: 11 episode: 3 guests: - tomaszhinc +image: images/podcast/dataops-and-gitops-best-practices-for-data-teams.jpg ids: anchor: From-Data-Science-to-DataOps---Tomasz-Hinc-e1p7sjb youtube: lem7knxqNzg -image: images/podcast/s11e03-from-data-science-to-dataops.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Data-Science-to-DataOps---Tomasz-Hinc-e1p7sjb apple: https://podcasts.apple.com/us/podcast/from-data-science-to-dataops-tomasz-hinc/id1541710331?i=1000583457504 spotify: https://open.spotify.com/episode/6jLgdl59sVCdVNJezdIqJY?si=NXasnXtFQVO0KAcCFbvUtQ youtube: https://www.youtube.com/watch?v=lem7knxqNzg -season: 11 -short: From Data Science to DataOps -title: 'DataOps & GitOps for Data Teams: Onboarding, IaC, Reproducibility & Production - Best Practices' + +description: "Master DataOps, GitOps and IaC best practices for reproducibility, onboarding and production reliability — actionable Git workflows, Terraform, Docker tips" +intro: "How do you make data work less fragile and easier to onboard while keeping production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner from Poznań with roots in econometrics, product analytics, data engineering and ML, walks through practical DataOps and GitOps patterns for data teams. We cover platform onboarding (requesting infra vs. merge requests), Infrastructure as Code with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis dry-run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, dependency management—and common production pitfalls like silent failures and Airflow caveats. You’ll hear about reducing onboarding friction for data scientists, the minimal operational skills every data role benefits from (Git, CLI, IAM), and platform team responsibilities for review, enablement and proactive support. If you’re focused on Infrastructure as Code, GitOps, reproducible pipelines, or practical production best practices for batch workloads and CI migrations, this episode delivers hands-on advice, learning paths and tooling choices to make your data work faster, safer and more maintainable" +topics: +- DataOps +- GitOps +- data teams +- tools +dateadded: 2022-10-22 + +duration: PT01H05M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=0 + endOffset: 100 +- name: Guest Introduction & Episode Overview + startOffset: 100 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=100 + endOffset: 145 +- name: 'Career Journey: Econometrics → ML Trainee → Data Roles' + startOffset: 145 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=145 + endOffset: 271 +- name: 'Early Experience: OLX, Government Statistics, Academia' + startOffset: 271 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=271 + endOffset: 320 +- name: 'ML Education: Multi-Dimensional Analysis to Machine Learning' + startOffset: 320 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=320 + endOffset: 394 +- name: 'Behavioral Analysis & Product Analytics: Clickstream Modeling' + startOffset: 394 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=394 + endOffset: 428 +- name: 'Operational Realities: ETL Failures, Production Constraints' + startOffset: 428 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=428 + endOffset: 760 +- name: 'Platform Onboarding: Requesting Infra vs. Doing a Merge Request' + startOffset: 760 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=760 + endOffset: 787 +- name: 'Platform Teams’ Role: Review, Enablement, and Safe Practices' + startOffset: 787 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=787 + endOffset: 852 +- name: 'Motivation Shift: From Model-Centric to Data-Centric Work' + startOffset: 852 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=852 + endOffset: 1139 +- name: 'Defining DataOps: Enabling Faster, Less Scary Data Work (DataOps, DevOps)' + startOffset: 1139 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1139 + endOffset: 1256 +- name: 'DataOps & Infra: SQL, Secrets, GitOps, and Developer Enablement' + startOffset: 1256 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1256 + endOffset: 1384 +- name: 'GitOps & IaC Overview: Terraform, Terragrunt, Atlantis' + startOffset: 1384 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1384 + endOffset: 1422 +- name: 'Infrastructure as Code: Declarative Configurations & Reproducibility' + startOffset: 1422 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1422 + endOffset: 1581 +- name: 'GitOps Workflow: Branch, Merge Request, Atlantis Dry Run, Apply' + startOffset: 1581 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1581 + endOffset: 1654 +- name: 'Onboarding Friction: Tooling Challenges for Data Scientists' + startOffset: 1654 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1654 + endOffset: 1774 +- name: 'Learning Path: Narrow Scope, Hands-On Mentorship, Roadmap Advice' + startOffset: 1774 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1774 + endOffset: 2155 +- name: 'Terminal Comfort: Shell Setup, Autocomplete, and Productivity Tweaks' + startOffset: 2155 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2155 + endOffset: 2300 +- name: 'Learning Resources: YouTube, Articles, and CLI Tutorials' + startOffset: 2300 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2300 + endOffset: 2444 +- name: 'DataOps vs Data Engineering: Support & Communication vs Pipeline Coding' + startOffset: 2444 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2444 + endOffset: 2512 +- name: 'Proactive Support: Monitoring, Onboarding, and Cross-Team Education' + startOffset: 2512 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2512 + endOffset: 2663 +- name: 'Suitable Backgrounds: Any Data Role; Log Reading & Troubleshooting' + startOffset: 2663 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2663 + endOffset: 2875 +- name: 'Minimal Operational Skills: Git, Command Line, IAM, Password Managers' + startOffset: 2875 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2875 + endOffset: 3277 +- name: 'Distinction from Management: Cross-Team Enablement vs Team Leads' + startOffset: 3277 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3277 + endOffset: 3404 +- name: 'Infrastructure Choices for Data: Batch Workloads, ECS/AWS Batch vs Kubernetes' + startOffset: 3404 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3404 + endOffset: 3506 +- name: 'Company-Scale Migration: Jenkins → GitLab CI and Broad Collaboration' + startOffset: 3506 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3506 + endOffset: 3687 +- name: 'Reproducibility & Dependencies: Fixed Versions, Docker, Silent Failures' + startOffset: 3687 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3687 + endOffset: 3748 +- name: 'Confidence in Data: Pragmatic Edge-Case Checks & Airflow Caveats' + startOffset: 3748 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3748 + endOffset: 3941 +- name: Closing Remarks, Resources, and Subscribe Call to Action + startOffset: 3941 + url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3941 + endOffset: 3909 + transcript: - header: Podcast Introduction - header: Guest Introduction & Episode Overview @@ -78,7 +207,7 @@ transcript: sec: 303 time: '5:03' who: Alexey -- header: 'ML Education: Multi‑Dimensional Analysis to Machine Learning' +- header: 'ML Education: Multi-Dimensional Analysis to Machine Learning' - line: Sure. I mentioned “by accident” because I haven't actually searched for it specifically. Yeah, I definitely wanted to be into machine learning and stuff. I just received a link that said, “Hey, some company is hiring for a machine learning @@ -204,7 +333,7 @@ transcript: sec: 848 time: '14:08' who: Tomasz -- header: 'Motivation Shift: From Model‑Centric to Data‑Centric Work' +- header: 'Motivation Shift: From Model-Centric to Data-Centric Work' - line: So when did you realize that you actually enjoy doing this stuff more than your work as a data scientist? How did it happen? sec: 852 @@ -426,7 +555,7 @@ transcript: sec: 1743 time: '29:03' who: Tomasz -- header: 'Learning Path: Narrow Scope, Hands‑On Mentorship, Roadmap Advice' +- header: 'Learning Path: Narrow Scope, Hands-On Mentorship, Roadmap Advice' - line: Okay. One of the questions I wanted to ask you is – how did you actually learn this thing? How did you become a DataOps? But I think from what I understood is, you just simply had to do this and then you had a Zoom call with some sort of @@ -606,7 +735,7 @@ transcript: sec: 2505 time: '41:45' who: Alexey -- header: 'Proactive Support: Monitoring, Onboarding, and Cross‑Team Education' +- header: 'Proactive Support: Monitoring, Onboarding, and Cross-Team Education' - line: Exactly. Most often, honestly, live coding, designing some solutions. If you think about which domains DataOps touches, it's essentially past – meaning absorbing the technical debt. The present – meaning handling the users’ requests, like daily @@ -802,7 +931,7 @@ transcript: sec: 3269 time: '54:29' who: Tomasz -- header: 'Distinction from Management: Cross‑Team Enablement vs Team Leads' +- header: 'Distinction from Management: Cross-Team Enablement vs Team Leads' - line: While you were away, I was trying to keep people on the call entertained. One Adonis mentioned is that what we talked about largely sounded like a data management role – all these Zoom calls, all this support in Slack, all this trying @@ -858,7 +987,7 @@ transcript: sec: 3496 time: '58:16' who: Tomasz -- header: 'Company‑Scale Migration: Jenkins → GitLab CI and Broad Collaboration' +- header: 'Company-Scale Migration: Jenkins → GitLab CI and Broad Collaboration' - line: Okay, we have a few questions. One of the questions is, “What was your most interesting project and why?” sec: 3506 @@ -917,7 +1046,7 @@ transcript: sec: 3747 time: '1:02:27' who: Tomasz -- header: 'Confidence in Data: Pragmatic Edge‑Case Checks & Airflow Caveats' +- header: 'Confidence in Data: Pragmatic Edge-Case Checks & Airflow Caveats' - line: Okay. Last question for today. At the beginning, you told us a story when you worked in analytics and somebody from management asked you how confident you were in the results. So how do you usually answer this question? @@ -980,142 +1109,6 @@ transcript: sec: 4009 time: '1:06:49' who: Alexey -description: Master DataOps, GitOps and IaC best practices for reproducibility, onboarding - and production reliability — actionable Git workflows, Terraform, Docker tips. -intro: How do you make data work less fragile and easier to onboard while keeping - production safe and reproducible? In this episode, Tomasz Hinc, a DataOps practitioner - from Poznań with roots in econometrics, product analytics, data engineering and - ML, walks through practical DataOps and GitOps patterns for data teams. We cover - platform onboarding (requesting infra vs. merge requests), Infrastructure as Code - with Terraform, Terragrunt and Atlantis, and a GitOps workflow from branch to Atlantis - dry‑run and apply. Tomasz explains reproducibility strategies—fixed versions, Docker, - dependency management—and common production pitfalls like silent failures and Airflow - caveats. You’ll hear about reducing onboarding friction for data scientists, the - minimal operational skills every data role benefits from (Git, CLI, IAM), and platform - team responsibilities for review, enablement and proactive support. If you’re focused - on Infrastructure as Code, GitOps, reproducible pipelines, or practical production - best practices for batch workloads and CI migrations, this episode delivers hands‑on - advice, learning paths and tooling choices to make your data work faster, safer - and more maintainable. -dateadded: '2022-10-22' -duration: PT01H05M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=0 - endOffset: 100 -- name: Guest Introduction & Episode Overview - startOffset: 100 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=100 - endOffset: 145 -- name: 'Career Journey: Econometrics → ML Trainee → Data Roles' - startOffset: 145 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=145 - endOffset: 271 -- name: 'Early Experience: OLX, Government Statistics, Academia' - startOffset: 271 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=271 - endOffset: 320 -- name: 'ML Education: Multi‑Dimensional Analysis to Machine Learning' - startOffset: 320 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=320 - endOffset: 394 -- name: 'Behavioral Analysis & Product Analytics: Clickstream Modeling' - startOffset: 394 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=394 - endOffset: 428 -- name: 'Operational Realities: ETL Failures, Production Constraints' - startOffset: 428 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=428 - endOffset: 760 -- name: 'Platform Onboarding: Requesting Infra vs. Doing a Merge Request' - startOffset: 760 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=760 - endOffset: 787 -- name: 'Platform Teams’ Role: Review, Enablement, and Safe Practices' - startOffset: 787 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=787 - endOffset: 852 -- name: 'Motivation Shift: From Model‑Centric to Data‑Centric Work' - startOffset: 852 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=852 - endOffset: 1139 -- name: 'Defining DataOps: Enabling Faster, Less Scary Data Work (DataOps, DevOps)' - startOffset: 1139 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1139 - endOffset: 1256 -- name: 'DataOps & Infra: SQL, Secrets, GitOps, and Developer Enablement' - startOffset: 1256 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1256 - endOffset: 1384 -- name: 'GitOps & IaC Overview: Terraform, Terragrunt, Atlantis' - startOffset: 1384 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1384 - endOffset: 1422 -- name: 'Infrastructure as Code: Declarative Configurations & Reproducibility' - startOffset: 1422 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1422 - endOffset: 1581 -- name: 'GitOps Workflow: Branch, Merge Request, Atlantis Dry Run, Apply' - startOffset: 1581 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1581 - endOffset: 1654 -- name: 'Onboarding Friction: Tooling Challenges for Data Scientists' - startOffset: 1654 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1654 - endOffset: 1774 -- name: 'Learning Path: Narrow Scope, Hands‑On Mentorship, Roadmap Advice' - startOffset: 1774 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=1774 - endOffset: 2155 -- name: 'Terminal Comfort: Shell Setup, Autocomplete, and Productivity Tweaks' - startOffset: 2155 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2155 - endOffset: 2300 -- name: 'Learning Resources: YouTube, Articles, and CLI Tutorials' - startOffset: 2300 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2300 - endOffset: 2444 -- name: 'DataOps vs Data Engineering: Support & Communication vs Pipeline Coding' - startOffset: 2444 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2444 - endOffset: 2512 -- name: 'Proactive Support: Monitoring, Onboarding, and Cross‑Team Education' - startOffset: 2512 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2512 - endOffset: 2663 -- name: 'Suitable Backgrounds: Any Data Role; Log Reading & Troubleshooting' - startOffset: 2663 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2663 - endOffset: 2875 -- name: 'Minimal Operational Skills: Git, Command Line, IAM, Password Managers' - startOffset: 2875 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=2875 - endOffset: 3277 -- name: 'Distinction from Management: Cross‑Team Enablement vs Team Leads' - startOffset: 3277 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3277 - endOffset: 3404 -- name: 'Infrastructure Choices for Data: Batch Workloads, ECS/AWS Batch vs Kubernetes' - startOffset: 3404 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3404 - endOffset: 3506 -- name: 'Company‑Scale Migration: Jenkins → GitLab CI and Broad Collaboration' - startOffset: 3506 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3506 - endOffset: 3687 -- name: 'Reproducibility & Dependencies: Fixed Versions, Docker, Silent Failures' - startOffset: 3687 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3687 - endOffset: 3748 -- name: 'Confidence in Data: Pragmatic Edge‑Case Checks & Airflow Caveats' - startOffset: 3748 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3748 - endOffset: 3941 -- name: Closing Remarks, Resources, and Subscribe Call to Action - startOffset: 3941 - url: https://www.youtube.com/watch?v=lem7knxqNzg&t=3941 - endOffset: 3909 --- Links: diff --git a/_podcast/s08e05-storytime-for-dataops.md b/_podcast/dataops-automation-and-reliable-data-pipelines.md similarity index 97% rename from _podcast/s08e05-storytime-for-dataops.md rename to _podcast/dataops-automation-and-reliable-data-pipelines.md index 1c6eb2ee..80341010 100644 --- a/_podcast/s08e05-storytime-for-dataops.md +++ b/_podcast/dataops-automation-and-reliable-data-pipelines.md @@ -1,39 +1,151 @@ --- +title: "Mastering DataOps: Automation, Observability & CI/CD for Reliable Data Pipelines" +short: "Storytime for DataOps" season: 8 episode: 5 -title: 'Mastering DataOps: Automation, Observability & CI/CD for Reliable Data Pipelines' -short: Storytime for DataOps -description: 'Master DataOps: automate pipelines, data observability and CI/CD to - cut errors, speed deployments, and deliver reliable, testable data pipelines.' guests: - christopherbergh -intro: 'How do you build reliable data pipelines that move fast without breaking production? - In this episode, Christopher Bergh — CEO and Head Chef at DataKitchen, co-author - of the DataOps Cookbook and Manifesto, and a 25+-year veteran across research, engineering, - analytics, and leadership — walks through practical approaches to mastering DataOps: - automation, observability, and CI/CD for dependable data delivery.

We cover - core targets like error reduction, deployment cycle time, and team productivity; - the role of data observability and monitoring in catching production errors; and - the trade-offs between “done” and “good.” Chris explains the shift from runbooks - to automated playbooks, an automation-first mindset (“code that acts on data”), - and seven practical steps for healthier pipelines—VC, tests, CI/CD, and more. He - contrasts DataOps with MLOps, argues for end-to-end versioning, and discusses tooling - choices including dbt, Great Expectations, and SQL tests, plus platform orchestration - and governance.

Listen to learn concrete tactics for improving data quality, - shrinking incident toil, proving systems with end-to-end testing, and where to focus - time and tooling to accelerate reliable analytics delivery.' +image: images/podcast/dataops-automation-and-reliable-data-pipelines.jpg ids: anchor: Storytime-for-DataOps---Christopher-Bergh-e1hgl0m youtube: 0Fx5PCoLkf4 -image: images/podcast/s08e05-storytime-for-dataops.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Storytime-for-DataOps---Christopher-Bergh-e1hgl0m apple: https://podcasts.apple.com/us/podcast/storytime-for-dataops-christopher-bergh/id1541710331?i=1000558399936 spotify: https://open.spotify.com/episode/2PcBsHslUVnjXFhC9hv6zk youtube: https://www.youtube.com/watch?v=0Fx5PCoLkf4 + +description: "Master DataOps: automate pipelines, data observability and CI/CD to cut errors, speed deployments, and deliver reliable, testable data pipelines." +intro: "How do you build reliable data pipelines that move fast without breaking production? In this episode, Christopher Bergh — CEO and Head Chef at DataKitchen, co-author of the DataOps Cookbook and Manifesto, and a 25+-year veteran across research, engineering, analytics, and leadership — walks through practical approaches to mastering DataOps: automation, observability, and CI/CD for dependable data delivery.

We cover core targets like error reduction, deployment cycle time, and team productivity; the role of data observability and monitoring in catching production errors; and the trade-offs between “done” and “good.” Chris explains the shift from runbooks to automated playbooks, an automation-first mindset (“code that acts on data”), and seven practical steps for healthier pipelines—VC, tests, CI/CD, and more. He contrasts DataOps with MLOps, argues for end-to-end versioning, and discusses tooling choices including dbt, Great Expectations, and SQL tests, plus platform orchestration and governance.

Listen to learn concrete tactics for improving data quality, shrinking incident toil, proving systems with end-to-end testing, and where to focus time and tooling to accelerate reliable analytics delivery." topics: - dataops - practices +dateadded: 2022-04-23 + +duration: PT01H02M23S + +quotableClips: +- name: 'Opening banter: "Father of DataOps" anecdote' + startOffset: 1 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1 + endOffset: 80 +- name: Chris Bergh background and career pivot to data leadership + startOffset: 80 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=80 + endOffset: 121 +- name: 'Transition: from software engineer to managing data teams; factory metaphor' + startOffset: 121 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=121 + endOffset: 255 +- name: 'Factory + Agile: balancing production stability and rapid change' + startOffset: 255 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=255 + endOffset: 402 +- name: 'Core targets: error reduction, deployment cycle time, and team productivity' + startOffset: 402 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=402 + endOffset: 442 +- name: Data observability & monitoring for data quality and production errors + startOffset: 442 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=442 + endOffset: 711 +- name: 'Production quality consequences: detecting and remediating simple failures' + startOffset: 711 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=711 + endOffset: 742 +- name: 'Processes vs tools: leadership, automation, and organizational focus' + startOffset: 742 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=742 + endOffset: 800 +- name: 'Naming the movement: choosing "DataOps" and the DevOps analogy' + startOffset: 800 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=800 + endOffset: 1094 +- name: 'Human impact: stress, blame culture, and owning the process' + startOffset: 1094 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1094 + endOffset: 1196 +- name: 'Defining "done" vs "good": readiness criteria and trade-offs' + startOffset: 1196 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1196 + endOffset: 1262 +- name: 'Heroism vs feedback: early releases and customer iteration' + startOffset: 1262 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1262 + endOffset: 1499 +- name: 'Two iteration loops: customer validation and data/model validity' + startOffset: 1499 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1499 + endOffset: 1694 +- name: 'Optimizing value streams: breaking silos across teams and governance' + startOffset: 1694 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1694 + endOffset: 1883 +- name: 'Deferred-value traps: data lake/cloud hype and postponed outcomes' + startOffset: 1883 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1883 + endOffset: 2027 +- name: 'Seven practical steps for healthier data pipelines: VC, tests, CI/CD' + startOffset: 2027 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2027 + endOffset: 2077 +- name: 'Runbooks to automation: move from checklists to automated playbooks' + startOffset: 2077 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2077 + endOffset: 2233 +- name: 'Automation-first mindset: "code that acts on data" beyond labels' + startOffset: 2233 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2233 + endOffset: 2281 +- name: 'Replaceability: handoffs, documentation, and on-call reduction' + startOffset: 2281 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2281 + endOffset: 2429 +- name: 'Hairball anti-pattern: technical debt, maintainability, and refactoring' + startOffset: 2429 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2429 + endOffset: 2586 +- name: 'Adoption barriers: proving systems with end-to-end testing and data' + startOffset: 2586 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2586 + endOffset: 2652 +- name: Test environments & test data challenges; recommend ~15% time for process + startOffset: 2652 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2652 + endOffset: 2905 +- name: 'Tooling for DataOps: dbt, Great Expectations, SQL tests, and strategies' + startOffset: 2905 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2905 + endOffset: 3042 +- name: 'DataOps vs MLOps: shared DevOps principles applied to models and pipelines' + startOffset: 3042 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3042 + endOffset: 3081 +- name: 'End-to-end versioning: code, models, visualizations, governance as one unit' + startOffset: 3081 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3081 + endOffset: 3213 +- name: 'DataKitchen snapshot: company mission, "Head Chef" role, and team focus' + startOffset: 3213 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3213 + endOffset: 3392 +- name: 'Platform overview: orchestrating environments, tests, and observability' + startOffset: 3392 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3392 + endOffset: 3400 +- name: 'Market context: DataOps vendor landscape and funding trends' + startOffset: 3400 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3400 + endOffset: 3627 +- name: 'Learning resources: DataOps Cookbook, manifesto, courses, and manager guide' + startOffset: 3627 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3627 + endOffset: 3708 +- name: 'Closing remarks: adoption outlook and links to resources' + startOffset: 3708 + url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3708 + endOffset: 3743 + transcript: - header: 'Opening banter: "Father of DataOps" anecdote' - line: Some people call you the Father of DataOps. @@ -1102,129 +1214,6 @@ transcript: sec: 3744 time: '1:02:24' who: Alexey -dateadded: '2022-04-23' -duration: PT01H02M23S -quotableClips: -- name: 'Opening banter: "Father of DataOps" anecdote' - startOffset: 1 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1 - endOffset: 80 -- name: Chris Bergh background and career pivot to data leadership - startOffset: 80 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=80 - endOffset: 121 -- name: 'Transition: from software engineer to managing data teams; factory metaphor' - startOffset: 121 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=121 - endOffset: 255 -- name: 'Factory + Agile: balancing production stability and rapid change' - startOffset: 255 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=255 - endOffset: 402 -- name: 'Core targets: error reduction, deployment cycle time, and team productivity' - startOffset: 402 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=402 - endOffset: 442 -- name: Data observability & monitoring for data quality and production errors - startOffset: 442 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=442 - endOffset: 711 -- name: 'Production quality consequences: detecting and remediating simple failures' - startOffset: 711 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=711 - endOffset: 742 -- name: 'Processes vs tools: leadership, automation, and organizational focus' - startOffset: 742 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=742 - endOffset: 800 -- name: 'Naming the movement: choosing "DataOps" and the DevOps analogy' - startOffset: 800 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=800 - endOffset: 1094 -- name: 'Human impact: stress, blame culture, and owning the process' - startOffset: 1094 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1094 - endOffset: 1196 -- name: 'Defining "done" vs "good": readiness criteria and trade-offs' - startOffset: 1196 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1196 - endOffset: 1262 -- name: 'Heroism vs feedback: early releases and customer iteration' - startOffset: 1262 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1262 - endOffset: 1499 -- name: 'Two iteration loops: customer validation and data/model validity' - startOffset: 1499 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1499 - endOffset: 1694 -- name: 'Optimizing value streams: breaking silos across teams and governance' - startOffset: 1694 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1694 - endOffset: 1883 -- name: 'Deferred-value traps: data lake/cloud hype and postponed outcomes' - startOffset: 1883 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=1883 - endOffset: 2027 -- name: 'Seven practical steps for healthier data pipelines: VC, tests, CI/CD' - startOffset: 2027 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2027 - endOffset: 2077 -- name: 'Runbooks to automation: move from checklists to automated playbooks' - startOffset: 2077 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2077 - endOffset: 2233 -- name: 'Automation-first mindset: "code that acts on data" beyond labels' - startOffset: 2233 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2233 - endOffset: 2281 -- name: 'Replaceability: handoffs, documentation, and on-call reduction' - startOffset: 2281 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2281 - endOffset: 2429 -- name: 'Hairball anti-pattern: technical debt, maintainability, and refactoring' - startOffset: 2429 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2429 - endOffset: 2586 -- name: 'Adoption barriers: proving systems with end-to-end testing and data' - startOffset: 2586 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2586 - endOffset: 2652 -- name: Test environments & test data challenges; recommend ~15% time for process - startOffset: 2652 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2652 - endOffset: 2905 -- name: 'Tooling for DataOps: dbt, Great Expectations, SQL tests, and strategies' - startOffset: 2905 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=2905 - endOffset: 3042 -- name: 'DataOps vs MLOps: shared DevOps principles applied to models and pipelines' - startOffset: 3042 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3042 - endOffset: 3081 -- name: 'End-to-end versioning: code, models, visualizations, governance as one unit' - startOffset: 3081 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3081 - endOffset: 3213 -- name: 'DataKitchen snapshot: company mission, "Head Chef" role, and team focus' - startOffset: 3213 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3213 - endOffset: 3392 -- name: 'Platform overview: orchestrating environments, tests, and observability' - startOffset: 3392 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3392 - endOffset: 3400 -- name: 'Market context: DataOps vendor landscape and funding trends' - startOffset: 3400 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3400 - endOffset: 3627 -- name: 'Learning resources: DataOps Cookbook, manifesto, courses, and manager guide' - startOffset: 3627 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3627 - endOffset: 3708 -- name: 'Closing remarks: adoption outlook and links to resources' - startOffset: 3708 - url: https://www.youtube.com/watch?v=0Fx5PCoLkf4&t=3708 - endOffset: 3743 --- Links: diff --git a/_podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.md b/_podcast/dataops-for-data-engineering.md similarity index 88% rename from _podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.md rename to _podcast/dataops-for-data-engineering.md index 2bb2e108..bb2bda4a 100644 --- a/_podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.md +++ b/_podcast/dataops-for-data-engineering.md @@ -1,37 +1,122 @@ --- +title: "DataOps for Data Engineering: Automation, Observability, CI/CD & Reliable ML Deployments" +short: "DataOps, Observability, and The Cure for Data Team Blues" +season: 18 episode: 9 guests: - christopherbergh -description: Learn DataOps best practices for observability, CI/CD and deployment - automation to reduce rework, boost model reliability and speed analytics delivery. -intro: How do you move data teams from fragile, firefighting workloads to reliable, - automated production? In this episode, Christopher Bergh of DataKitchen walks through - his career journey from software engineering to data entrepreneurship and tackles - that exact challenge through the lens of DataOps.

You’ll hear a clear definition - of DataOps and why it matters—covering pre-cloud data engineering pain points, early - DevOps lessons, and workforce burnout tied to poor deployment culture. Key topics - include core DataOps practices (automation, observability, productivity), operational - lifecycle thinking (Day One/Two/Three), model reliability and on‑call readiness - for data science, CI/CD pipelines, regression testing and test data for analytics, - and data versioning strategies. The conversation also addresses MLOps and LLMs, - the limits of AI generation versus process improvement, containers versus serverless - tradeoffs, and how observability-first monitoring drives real change.

Listeners - will come away with practical starting steps for individual contributors and leaders - to reduce rework and cycle time, improve deployment automation, and create sustainable - data engineering and ML practices that lower turnover and increase reliability. +image: images/podcast/dataops-for-data-engineering.jpg ids: - anchor: atatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f + anchor: datatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f youtube: HzGpIxV8HtA -image: images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataOps--Observability--and-The-Cure-for-Data-Team-Blues---Christopher-Bergh-e2n775f apple: https://podcasts.apple.com/us/podcast/dataops-observability-and-the-cure-for-data-team/id1541710331?i=1000665429770 spotify: https://open.spotify.com/episode/02VoOk5UkMcvfq7VkSOegb youtube: https://www.youtube.com/watch?v=HzGpIxV8HtA -season: 18 -short: DataOps, Observability, and The Cure for Data Team Blues -title: 'DataOps for Data Engineering: Automation, Observability, CI/CD & Reliable - ML Deployments' +description: "Master DataOps, data engineering, and CI/CD to deploy reliable ML, cut cycle time, reduce rework, and build production-ready tests for on-call readiness." +topics: +- DataOps +- MLOps +- data engineering +- production +- practices +intro: "How do you transform fragile data pipelines and unreliable ML deployments into automated, observable, production-ready systems? In this episode Christopher Bergh, CEO of DataKitchen and co-author of the DataOps Cookbook and DataOps Manifesto, walks through practical DataOps for data engineering—drawing on 25+ years across research, software engineering, and analytics.

We trace his career from pre-cloud SQL Server scaling challenges to early DevOps lessons, then dig into what DataOps means for teams facing burnout, deployment fear, and inconsistent processes. Key topics include automation, observability, CI/CD pipelines, regression tests and test data for analytics, model reliability and on-call readiness, end-to-end deployment automation, data versioning, and the differences between containers and serverless. The episode also clarifies MLOps and LLM buzzwords, explores day-one/day-two/day-three operational lifecycle practices, and outlines concrete steps to reduce rework and cycle time.

If you’re a data engineer, data scientist, or engineering leader looking to improve analytics delivery, this conversation offers actionable guidance on implementing DataOps practices — automation, monitoring, CI/CD, and culture changes — to make ML deployments more reliable and repeatable." +dateadded: 2024-09-04 +duration: PT01H01M55S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=0 + endOffset: 132 +- name: 'Guest Introduction: Christopher Bergh & DataKitchen' + startOffset: 132 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=132 + endOffset: 245 +- name: 'Career Journey: From Software Engineering to Data Entrepreneurship' + startOffset: 245 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=245 + endOffset: 366 +- name: Pre-cloud Data Engineering Challenges (SQL Server, scaling) + startOffset: 366 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=366 + endOffset: 509 +- name: DevOps Adoption Timeline and Early Lessons + startOffset: 509 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=509 + endOffset: 713 +- name: DataOps Definition and Workforce Burnout Statistics + startOffset: 713 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=713 + endOffset: 807 +- name: 'Deployment Culture: Fear vs. Heroism in Data Teams' + startOffset: 807 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=807 + endOffset: 952 +- name: 'Core DataOps Practices: Automation, Observability, and Productivity' + startOffset: 952 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=952 + endOffset: 1126 +- name: 'DataOps Today: MLOps, LLMs, and Buzzword Clarification' + startOffset: 1126 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1126 + endOffset: 1436 +- name: 'Operational Lifecycle: Day One, Day Two, Day Three' + startOffset: 1436 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1436 + endOffset: 1573 +- name: Model Reliability and On-call Readiness for Data Science + startOffset: 1573 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1573 + endOffset: 1855 +- name: CI/CD Pipelines, Regression Tests, and Test Data for Analytics + startOffset: 1855 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1855 + endOffset: 2053 +- name: Reducing Rework and Cycle Time in Data Workflows + startOffset: 2053 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2053 + endOffset: 2344 +- name: AI Tools and the Limits of Generation vs. Process Improvement + startOffset: 2344 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2344 + endOffset: 2559 +- name: 'End-to-End Deployment Automation: Version Control and Tests' + startOffset: 2559 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2559 + endOffset: 2670 +- name: 'Variable Adoption: Pockets of Best Practice and Integration Gaps' + startOffset: 2670 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2670 + endOffset: 3029 +- name: 'Observability-First Approach: Monitoring Production to Drive Change' + startOffset: 3029 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3029 + endOffset: 3162 +- name: 'Containers vs. Serverless: Docker, Kubernetes, and Alternatives' + startOffset: 3162 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3162 + endOffset: 3245 +- name: 'Data Versioning Strategy: Immutability and Versioning Code' + startOffset: 3245 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3245 + endOffset: 3495 +- name: 'Culture and Leadership: Lowering Turnover with Better Processes' + startOffset: 3495 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3495 + endOffset: 3514 +- name: Practical Starting Steps for Individual Contributors + startOffset: 3514 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3514 + endOffset: 3680 +- name: Closing Summary and Next Steps + startOffset: 3680 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3680 + endOffset: 3847 +- name: Episode End + startOffset: 3847 + url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3847 + endOffset: 3715 transcript: - header: Podcast Introduction - header: 'Guest Introduction: Christopher Bergh & DataKitchen' @@ -218,7 +303,7 @@ transcript: sec: 1464 time: '24:24' who: Christopher -- header: Model Reliability and On‑call Readiness for Data Science +- header: Model Reliability and On-call Readiness for Data Science - line: Let's take a data scientist as an example. They pull data, do some transformations, and build a model. Day one is about getting that initial version ready. What happens on day two? @@ -375,99 +460,12 @@ transcript: sec: 3847 time: '1:04:07' who: Alexey -dateadded: '2024-09-04' -duration: PT01H01M55S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=0 - endOffset: 132 -- name: 'Guest Introduction: Christopher Bergh & DataKitchen' - startOffset: 132 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=132 - endOffset: 245 -- name: 'Career Journey: From Software Engineering to Data Entrepreneurship' - startOffset: 245 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=245 - endOffset: 366 -- name: Pre-cloud Data Engineering Challenges (SQL Server, scaling) - startOffset: 366 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=366 - endOffset: 509 -- name: DevOps Adoption Timeline and Early Lessons - startOffset: 509 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=509 - endOffset: 713 -- name: DataOps Definition and Workforce Burnout Statistics - startOffset: 713 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=713 - endOffset: 807 -- name: 'Deployment Culture: Fear vs. Heroism in Data Teams' - startOffset: 807 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=807 - endOffset: 952 -- name: 'Core DataOps Practices: Automation, Observability, and Productivity' - startOffset: 952 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=952 - endOffset: 1126 -- name: 'DataOps Today: MLOps, LLMs, and Buzzword Clarification' - startOffset: 1126 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1126 - endOffset: 1436 -- name: 'Operational Lifecycle: Day One, Day Two, Day Three' - startOffset: 1436 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1436 - endOffset: 1573 -- name: Model Reliability and On‑call Readiness for Data Science - startOffset: 1573 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1573 - endOffset: 1855 -- name: CI/CD Pipelines, Regression Tests, and Test Data for Analytics - startOffset: 1855 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=1855 - endOffset: 2053 -- name: Reducing Rework and Cycle Time in Data Workflows - startOffset: 2053 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2053 - endOffset: 2344 -- name: AI Tools and the Limits of Generation vs. Process Improvement - startOffset: 2344 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2344 - endOffset: 2559 -- name: 'End-to-End Deployment Automation: Version Control and Tests' - startOffset: 2559 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2559 - endOffset: 2670 -- name: 'Variable Adoption: Pockets of Best Practice and Integration Gaps' - startOffset: 2670 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=2670 - endOffset: 3029 -- name: 'Observability-First Approach: Monitoring Production to Drive Change' - startOffset: 3029 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3029 - endOffset: 3162 -- name: 'Containers vs. Serverless: Docker, Kubernetes, and Alternatives' - startOffset: 3162 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3162 - endOffset: 3245 -- name: 'Data Versioning Strategy: Immutability and Versioning Code' - startOffset: 3245 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3245 - endOffset: 3495 -- name: 'Culture and Leadership: Lowering Turnover with Better Processes' - startOffset: 3495 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3495 - endOffset: 3514 -- name: Practical Starting Steps for Individual Contributors - startOffset: 3514 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3514 - endOffset: 3680 -- name: Closing Summary and Next Steps - startOffset: 3680 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3680 - endOffset: 3847 -- name: Episode End - startOffset: 3847 - url: https://www.youtube.com/watch?v=HzGpIxV8HtA&t=3847 - endOffset: 3715 +context: 'DataOps is the episode’s unifying idea: treating data and ML work as engineered, + production-ready products by applying software best practices—automation, CI/CD, + testing and test data, immutable versioning, and observability—plus cultural change + and leadership to remove fear, reduce rework and burnout, and shorten cycle time. + The through-line argues that operationalizing the full lifecycle (day-one provisioning + through day-two reliability and day-three evolution) turns pockets of heroic, ad-hoc + data work into consistent, reliable delivery that enables safe, scalable use of + AI and analytics.' --- diff --git a/_podcast/s02e11-dataops.md b/_podcast/dataops-principles-and-scalable-data-platforms.md similarity index 95% rename from _podcast/s02e11-dataops.md rename to _podcast/dataops-principles-and-scalable-data-platforms.md index 0a55ce5d..cd90c42b 100644 --- a/_podcast/s02e11-dataops.md +++ b/_podcast/dataops-principles-and-scalable-data-platforms.md @@ -1,12 +1,11 @@ --- -title: 'DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self‑Service - Lakehouse & Reproducibility' -short: DataOps 101 -guests: -- larsalbertsson -image: images/podcast/s02e11-dataops.jpg +title: "DataOps 101 for Scaling Data Platforms: Immutable Pipelines, Self-Service Lakehouse & Reproducibility" +short: "DataOps 101" season: 2 episode: 11 +guests: +- larsalbertsson +image: images/podcast/dataops-principles-and-scalable-data-platforms.jpg ids: youtube: vyF3yGsF6UY anchor: DataOps-101---Lars-Albertsson-ethsp1 @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/DataOps-101---Lars-Albertsson-ethsp1 spotify: https://open.spotify.com/episode/5c2m4FVq4KPCfSXndCAzNd apple: https://podcasts.apple.com/us/podcast/dataops-101-lars-albertsson/id1541710331?i=1000514542438 + +description: "Discover DataOps strategies, immutable pipelines & a self-service lakehouse to boost reproducibility, scale data platforms, enable analysts and speed delivery" +intro: "How do you scale a data platform that supports self-service analytics while keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, founder of Scling and former Google, Spotify and Schibsted engineer, walks through pragmatic DataOps principles for building scalable data platforms.

We dig into building self-service at Spotify, orchestration with Luigi, and the core platform components—storage, compute and workflow engines—plus compute choices like Spark, Flink, containers and managed services. Lars explains immutable, functional pipeline design to solve reproducibility problems, contrasts data lakes and warehouses (raw dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, CDC and database versioning strategies. He also explores batch vs streaming trade-offs, micro-batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, and risks around data mesh and decentralization.

Listeners will come away with concrete architectural trade-offs, patterns for immutable pipelines and self-service lakehouse design, and recommended readings from the Scling list to deepen expertise in DataOps, lineage, versioning and practical data engineering" +topics: +- DataOps +- date engineering +- MLOps +dateadded: 2021-03-27 + +duration: PT01H09M27S + +quotableClips: +- name: Episode Opening & Guest Introduction + startOffset: 159 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=159 + endOffset: 218 +- name: 'Career Journey: Google, Spotify, Consulting and Scling' + startOffset: 218 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=218 + endOffset: 472 +- name: 'Scaling Data Teams: Building Self-Service at Spotify' + startOffset: 472 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=472 + endOffset: 648 +- name: 'Orchestration Spotlight: Luigi as a Data Build System' + startOffset: 648 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=648 + endOffset: 710 +- name: 'DataOps Defined: Enablement, Workflows and People Alignment' + startOffset: 710 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=710 + endOffset: 1002 +- name: 'Data Platform Principles: Immutability & Functional Architecture' + startOffset: 1002 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1002 + endOffset: 1212 +- name: 'Reproducibility Problems: Mutable ETL vs Immutable Pipelines' + startOffset: 1212 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1212 + endOffset: 1289 +- name: 'Data Lake vs Data Warehouse: Raw Data, Aggregates & Use Cases' + startOffset: 1289 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1289 + endOffset: 1409 +- name: 'Data Lake Fundamentals: Object Storage, Governance & Raw Dumps' + startOffset: 1409 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1409 + endOffset: 1702 +- name: 'Ingress & Egress: Offline Processing and Self-Service SQL' + startOffset: 1702 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1702 + endOffset: 1834 +- name: 'Core Platform Components: Storage, Compute & Workflow Engine' + startOffset: 1834 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1834 + endOffset: 1878 +- name: 'Compute Options: Spark, Flink, Containers and Managed Services' + startOffset: 1878 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1878 + endOffset: 2157 +- name: 'Cloud Trade-offs: Prepackaged Platforms vs DIY Assembly' + startOffset: 2157 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2157 + endOffset: 2397 +- name: 'Recommended Reading: Lambda Architecture, Practical DataOps & Scling List' + startOffset: 2397 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2397 + endOffset: 2513 +- name: 'Batch vs Streaming: Latency Tradeoffs and Typical Use Cases' + startOffset: 2513 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2513 + endOffset: 2711 +- name: 'Micro-batching vs Streaming: Dependency Management & Predictability' + startOffset: 2711 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2711 + endOffset: 2812 +- name: 'DataOps Maturity: Test-Certified Practices, Quality & Schema Automation' + startOffset: 2812 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2812 + endOffset: 3013 +- name: 'Enabling Self-Service Analytics: Embedding Engineers with Analysts' + startOffset: 3013 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3013 + endOffset: 3211 +- name: 'MLOps vs DataOps: Shared Principles and ML-Specific Requirements' + startOffset: 3211 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3211 + endOffset: 3466 +- name: 'Data Mesh Overview: Decentralization, Ownership & Governance Risks' + startOffset: 3466 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3466 + endOffset: 3782 +- name: 'Splitting the Platform: When to Decentralize vs Centralize' + startOffset: 3782 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3782 + endOffset: 3858 +- name: 'Lineage & Versioning: Code-Defined Pipelines vs Catalog Tools' + startOffset: 3858 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3858 + endOffset: 3961 +- name: 'Database Versioning: Full Dumps, CDC (Change Data Capture) Strategies' + startOffset: 3961 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3961 + endOffset: 4072 +- name: 'Lakehouse Architecture: Warehouse Features Layered on Data Lake' + startOffset: 4072 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4072 + endOffset: 4261 +- name: 'Further Resources: Scling Reading List & Presentations' + startOffset: 4261 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4261 + endOffset: 4326 +- name: Episode Closing + startOffset: 4326 + url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4326 + endOffset: 4167 + transcript: - header: Episode Opening & Guest Introduction - line: This week, we'll talk about data Ops — what is this and how is it different @@ -86,7 +202,7 @@ transcript: sec: 468 time: '7:48' who: Lars -- header: 'Scaling Data Teams: Building Self‑Service at Spotify' +- header: 'Scaling Data Teams: Building Self-Service at Spotify' - line: You said, you started with DataOps, even before it became a thing — in 2013. How was it called there? How did you come up with this? sec: 472 @@ -344,7 +460,7 @@ transcript: sec: 1586 time: '26:26' who: Lars -- header: 'Ingress & Egress: Offline Processing and Self‑Service SQL' +- header: 'Ingress & Egress: Offline Processing and Self-Service SQL' - line: 'At Spotify, we had songs that have been played and then we joined with the user so we know what product they were or what country they''re in. Then these pipelines fan out, these popular data sets are used for many purposes: for reporting @@ -410,7 +526,7 @@ transcript: sec: 1878 time: '31:18' who: Lars -- header: 'Cloud Trade‑offs: Prepackaged Platforms vs DIY Assembly' +- header: 'Cloud Trade-offs: Prepackaged Platforms vs DIY Assembly' - line: Then you need compute. You need some way to perform these transformations. There are scalable things like Spark and Flink. For most companies, horizontal scalability is actually not necessary. You can get 12 terabyte memory machines @@ -545,7 +661,7 @@ transcript: sec: 2549 time: '42:29' who: Lars -- header: 'Micro‑batching vs Streaming: Dependency Management & Predictability' +- header: 'Micro-batching vs Streaming: Dependency Management & Predictability' - line: 'Then you have batch, where things can be really slow, like reporting, or you''re making analytics or business insights. You can wait for an hour and that''s fine. Then you have streaming. It takes care of the window in-between. Then the @@ -588,7 +704,7 @@ transcript: sec: 2719 time: '45:19' who: Lars -- header: 'DataOps Maturity: Test‑Certified Practices, Quality & Schema Automation' +- header: 'DataOps Maturity: Test-Certified Practices, Quality & Schema Automation' - line: Makes sense. Thank you. I also wanted to talk about maturity levels, and you briefly touched on them. What are the maturity levels of an organization? When an organization is ready for DataOps? And what are the different levels of readiness? @@ -603,7 +719,7 @@ transcript: sec: 2833 time: '47:13' who: Lars -- header: 'Enabling Self‑Service Analytics: Embedding Engineers with Analysts' +- header: 'Enabling Self-Service Analytics: Embedding Engineers with Analysts' - line: Regarding the maturity levels, I don't have a super great definition of maturity levels. There was an interesting development at Spotify. When I was at Google, if we traced back to that time, we had a maturity ladder in terms of DevOps — @@ -668,7 +784,7 @@ transcript: sec: 3155 time: '52:35' who: Lars -- header: 'MLOps vs DataOps: Shared Principles and ML‑Specific Requirements' +- header: 'MLOps vs DataOps: Shared Principles and ML-Specific Requirements' - line: This reminds me that at the beginning of our chat, we wanted to ask you about different “something-Ops”. We already talked about the difference between DevOps and DataOps. Correct me if I'm wrong, but in case of DataOps, you have the same @@ -815,7 +931,7 @@ transcript: sec: 3856 time: '1:04:16' who: Lars -- header: 'Lineage & Versioning: Code‑Defined Pipelines vs Catalog Tools' +- header: 'Lineage & Versioning: Code-Defined Pipelines vs Catalog Tools' - line: How do you keep track of all the transformations that have been undertaken between each newly created data set within the data platform? sec: 3858 @@ -918,130 +1034,6 @@ transcript: sec: 4326 time: '1:12:06' who: Lars -description: Discover DataOps strategies, immutable pipelines & a self-service lakehouse - to boost reproducibility, scale data platforms, enable analysts and speed delivery. -intro: How do you scale a data platform that supports self‑service analytics while - keeping pipelines reproducible and maintainable? In this episode, Lars Albertsson, - founder of Scling and former Google, Spotify and Schibsted engineer, walks through - pragmatic DataOps principles for building scalable data platforms.

We dig - into building self‑service at Spotify, orchestration with Luigi, and the core platform - components—storage, compute and workflow engines—plus compute choices like Spark, - Flink, containers and managed services. Lars explains immutable, functional pipeline - design to solve reproducibility problems, contrasts data lakes and warehouses (raw - dumps vs aggregates), and covers object storage, governance, ingress/egress patterns, - CDC and database versioning strategies. He also explores batch vs streaming trade‑offs, - micro‑batching, DataOps maturity (tests, schema automation), MLOps vs DataOps overlaps, - and risks around data mesh and decentralization.

Listeners will come away - with concrete architectural trade‑offs, patterns for immutable pipelines and self‑service - lakehouse design, and recommended readings from the Scling list to deepen expertise - in DataOps, lineage, versioning and practical data engineering. -dateadded: '2021-03-27' -duration: PT01H09M27S -quotableClips: -- name: Episode Opening & Guest Introduction - startOffset: 159 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=159 - endOffset: 218 -- name: 'Career Journey: Google, Spotify, Consulting and Scling' - startOffset: 218 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=218 - endOffset: 472 -- name: 'Scaling Data Teams: Building Self‑Service at Spotify' - startOffset: 472 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=472 - endOffset: 648 -- name: 'Orchestration Spotlight: Luigi as a Data Build System' - startOffset: 648 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=648 - endOffset: 710 -- name: 'DataOps Defined: Enablement, Workflows and People Alignment' - startOffset: 710 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=710 - endOffset: 1002 -- name: 'Data Platform Principles: Immutability & Functional Architecture' - startOffset: 1002 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1002 - endOffset: 1212 -- name: 'Reproducibility Problems: Mutable ETL vs Immutable Pipelines' - startOffset: 1212 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1212 - endOffset: 1289 -- name: 'Data Lake vs Data Warehouse: Raw Data, Aggregates & Use Cases' - startOffset: 1289 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1289 - endOffset: 1409 -- name: 'Data Lake Fundamentals: Object Storage, Governance & Raw Dumps' - startOffset: 1409 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1409 - endOffset: 1702 -- name: 'Ingress & Egress: Offline Processing and Self‑Service SQL' - startOffset: 1702 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1702 - endOffset: 1834 -- name: 'Core Platform Components: Storage, Compute & Workflow Engine' - startOffset: 1834 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1834 - endOffset: 1878 -- name: 'Compute Options: Spark, Flink, Containers and Managed Services' - startOffset: 1878 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=1878 - endOffset: 2157 -- name: 'Cloud Trade‑offs: Prepackaged Platforms vs DIY Assembly' - startOffset: 2157 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2157 - endOffset: 2397 -- name: 'Recommended Reading: Lambda Architecture, Practical DataOps & Scling List' - startOffset: 2397 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2397 - endOffset: 2513 -- name: 'Batch vs Streaming: Latency Tradeoffs and Typical Use Cases' - startOffset: 2513 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2513 - endOffset: 2711 -- name: 'Micro‑batching vs Streaming: Dependency Management & Predictability' - startOffset: 2711 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2711 - endOffset: 2812 -- name: 'DataOps Maturity: Test‑Certified Practices, Quality & Schema Automation' - startOffset: 2812 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=2812 - endOffset: 3013 -- name: 'Enabling Self‑Service Analytics: Embedding Engineers with Analysts' - startOffset: 3013 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3013 - endOffset: 3211 -- name: 'MLOps vs DataOps: Shared Principles and ML‑Specific Requirements' - startOffset: 3211 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3211 - endOffset: 3466 -- name: 'Data Mesh Overview: Decentralization, Ownership & Governance Risks' - startOffset: 3466 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3466 - endOffset: 3782 -- name: 'Splitting the Platform: When to Decentralize vs Centralize' - startOffset: 3782 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3782 - endOffset: 3858 -- name: 'Lineage & Versioning: Code‑Defined Pipelines vs Catalog Tools' - startOffset: 3858 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3858 - endOffset: 3961 -- name: 'Database Versioning: Full Dumps, CDC (Change Data Capture) Strategies' - startOffset: 3961 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=3961 - endOffset: 4072 -- name: 'Lakehouse Architecture: Warehouse Features Layered on Data Lake' - startOffset: 4072 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4072 - endOffset: 4261 -- name: 'Further Resources: Scling Reading List & Presentations' - startOffset: 4261 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4261 - endOffset: 4326 -- name: Episode Closing - startOffset: 4326 - url: https://www.youtube.com/watch?v=vyF3yGsF6UY&t=4326 - endOffset: 4167 --- We talked about: diff --git a/_podcast/s07e01-datatalksclub-behind-the-scenes.md b/_podcast/datatalksclub-building-scaling-data-community.md similarity index 97% rename from _podcast/s07e01-datatalksclub-behind-the-scenes.md rename to _podcast/datatalksclub-building-scaling-data-community.md index ca0e28a3..c290ba4a 100644 --- a/_podcast/s07e01-datatalksclub-behind-the-scenes.md +++ b/_podcast/datatalksclub-building-scaling-data-community.md @@ -1,13 +1,12 @@ --- -title: 'DataTalks.Club Behind the Scenes: Alexey Grigorev on Scaling and Growing the - Community' -short: DataTalks.Club Behind the Scenes +title: "DataTalks.Club Behind the Scenes: Alexey Grigorev on Scaling and Growing the Community" +short: "DataTalks.Club Behind the Scenes" +season: 7 +episode: 1 guests: - eugeneyan - alexeygrigorev -image: images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg -season: 7 -episode: 1 +image: images/podcast/datatalksclub-building-scaling-data-community.jpg ids: youtube: IxTyq96juVE anchor: DataTalks-Club-Behind-the-Scenes---Eugene-Yan--Alexey-Grigorev-e1d4567 @@ -16,6 +15,116 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/DataTalks-Club-Behind-the-Scenes---Eugene-Yan--Alexey-Grigorev-e1d4567 spotify: https://open.spotify.com/episode/3ltAxUsCE8EAf0pRb9zxDK apple: https://podcasts.apple.com/us/podcast/datatalks-club-behind-the-scenes-eugene-yan-alexey/id1541710331?i=1000548608967 + +description: "Discover how to scale a 9k+ data science community, automate events, and advance your machine learning career with deployment, mentorship and growth tactics." +intro: "How do you scale a grassroots machine learning community from a few forum posts to thousands of active members? In this episode, Alexey Grigorev — founder of DataTalks.Club — sits down with Eugene Yan to walk through the real-world steps behind scaling and growing a machine learning community. Alexey shares his origins (forums, landing page, early events), the growth inflection that led to ~9k members, and practical event formats that work: Open Source Spotlight, Minis, Book of the Week, live coding and office hours.

We cover tactical topics listeners can apply: content production and automation (planning, Zapier, Eventbrite), monetization and sponsorship models, and how project-based offerings like ML Bookcamp and Machine Learning Zoomcamp emphasize end-to-end deployment (Flask, AWS Lambda, Kubernetes, Kubeflow). Alexey also discusses community management lessons — mentoring, product mindset, avoiding tool churn — plus career and productivity advice such as learning-by-projects, public deadlines, and maintaining motivation.

If you’re building or scaling a machine learning community, this episode offers concrete strategies for community growth, event design, content automation, and running project-focused training." +topics: +- community building +- machine learning +- data science +- data engineering +- MLOps +dateadded: 2022-01-23 + +duration: PT00H56M57S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=0 + endOffset: 9 +- name: 'Career Transition: Java to Machine Learning (Coursera, Andrew Ng)' + startOffset: 9 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=9 + endOffset: 86 +- name: Freelancing, Master's, and first data-science roles; building data pipelines + startOffset: 86 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=86 + endOffset: 306 +- name: 'Career Lessons: step outside comfort zone; product mindset; prefer simple + models' + startOffset: 306 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=306 + endOffset: 387 +- name: 'Principal Data Scientist Role: internal consulting, architecture, mentoring' + startOffset: 387 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=387 + endOffset: 576 +- name: 'Motivation to Start the Community: early interactions and LinkedIn outreach' + startOffset: 576 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=576 + endOffset: 605 +- name: 'Community Origins: forums, landing page, first events and format inspiration' + startOffset: 605 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=605 + endOffset: 1014 +- name: 'Community Growth & Events: conference boost and scaling to ~9k members' + startOffset: 1014 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1014 + endOffset: 1222 +- name: 'Content Production & Automation: planning, scheduling, Zapier, Eventbrite' + startOffset: 1222 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1222 + endOffset: 1478 +- name: 'Event Formats: Open Source Spotlight, Minis, Book of the Week' + startOffset: 1478 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1478 + endOffset: 1671 +- name: 'Notable Guests & Popular Episodes: Martin Kleppmann, Elena Samuylova, Santiago' + startOffset: 1671 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1671 + endOffset: 1897 +- name: 'Monetization & Sponsorship: costs, TopCoder, Toloka crowdsourcing workshop' + startOffset: 1897 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1897 + endOffset: 2302 +- name: 'ML Bookcamp & Machine Learning Zoomcamp: project-based, end-to-end learning' + startOffset: 2302 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2302 + endOffset: 2346 +- name: 'Deployment Focus in the Book/Course: Flask, AWS Lambda, Kubernetes, Kubeflow' + startOffset: 2346 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2346 + endOffset: 2569 +- name: 'Career Advice: join communities, answer questions, find mentors' + startOffset: 2569 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2569 + endOffset: 2635 +- name: 'Motivation & Persistence: handling frustration and sustaining interest' + startOffset: 2635 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2635 + endOffset: 2740 +- name: 'Tool Evaluation Strategy: avoid tool churn, follow lasting trends, Kedro + curiosity' + startOffset: 2740 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2740 + endOffset: 2936 +- name: 'Productivity & Workflow: public deadlines, accountability, batching work' + startOffset: 2936 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2936 + endOffset: 3031 +- name: 'Learning by Projects & Notes: just-in-time learning, Notion, READMEs, GitHub + docs' + startOffset: 3031 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3031 + endOffset: 3184 +- name: 'Community Inspiration & Format Ideas: borrowing from ML Ops and JavaRanch' + startOffset: 3184 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3184 + endOffset: 3307 +- name: 'Interactive Formats: live coding, office hours, ML Zoomcamp sessions' + startOffset: 3307 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3307 + endOffset: 3356 +- name: Community Thanks & Future Plans + startOffset: 3356 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3356 + endOffset: 3410 +- name: Podcast Closing + startOffset: 3410 + url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3410 + endOffset: 3417 + transcript: - header: Podcast Introduction - header: 'Career Transition: Java to Machine Learning (Coursera, Andrew Ng)' @@ -929,122 +1038,6 @@ transcript: sec: 3426 time: '57:06' who: Alexey -description: 'Discover a machine learning career from Java: build data pipelines, - finish ML Zoomcamp projects, deploy with Flask & Kubernetes, and grow a 9k community.' -intro: 'How do you move from a Java background into a sustainable machine learning - career while building production-ready data pipelines and scaling a learning community? - In this episode, Alexey Grigorev, founder of DataTalks.Club, and Eugene Yan, Applied - Scientist at Amazon, walk through real-world steps for that transition—from taking - Andrew Ng’s Coursera course to first data-science roles, freelancing, and master’s - programs.

They discuss building pragmatic data pipelines, adopting a product - mindset, and why simple models often win in production. Learn what a principal data - scientist actually does—internal consulting, architecture, and mentoring—and how - that perspective shaped community efforts that grew to roughly 9,000 members. Alexey - explains the origins and formats of DataTalks.Club events, conference-driven growth, - and the automation behind content production (Zapier, Eventbrite).

You’ll - also get an inside look at project-based learning with ML Bookcamp / Machine Learning - Zoomcamp, deployment focus (Flask, AWS Lambda, Kubernetes, Kubeflow), monetization - and sponsorship realities, and practical career tactics: joining communities, finding - mentors, and learning by projects. Tune in for actionable guidance on machine learning - career development, ML in production, and community building.' -dateadded: '2022-01-23' -duration: PT00H56M57S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=0 - endOffset: 9 -- name: 'Career Transition: Java to Machine Learning (Coursera, Andrew Ng)' - startOffset: 9 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=9 - endOffset: 86 -- name: Freelancing, Master's, and first data-science roles; building data pipelines - startOffset: 86 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=86 - endOffset: 306 -- name: 'Career Lessons: step outside comfort zone; product mindset; prefer simple - models' - startOffset: 306 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=306 - endOffset: 387 -- name: 'Principal Data Scientist Role: internal consulting, architecture, mentoring' - startOffset: 387 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=387 - endOffset: 576 -- name: 'Motivation to Start the Community: early interactions and LinkedIn outreach' - startOffset: 576 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=576 - endOffset: 605 -- name: 'Community Origins: forums, landing page, first events and format inspiration' - startOffset: 605 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=605 - endOffset: 1014 -- name: 'Community Growth & Events: conference boost and scaling to ~9k members' - startOffset: 1014 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1014 - endOffset: 1222 -- name: 'Content Production & Automation: planning, scheduling, Zapier, Eventbrite' - startOffset: 1222 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1222 - endOffset: 1478 -- name: 'Event Formats: Open Source Spotlight, Minis, Book of the Week' - startOffset: 1478 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1478 - endOffset: 1671 -- name: 'Notable Guests & Popular Episodes: Martin Kleppmann, Elena Samuylova, Santiago' - startOffset: 1671 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1671 - endOffset: 1897 -- name: 'Monetization & Sponsorship: costs, TopCoder, Toloka crowdsourcing workshop' - startOffset: 1897 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=1897 - endOffset: 2302 -- name: 'ML Bookcamp & Machine Learning Zoomcamp: project-based, end-to-end learning' - startOffset: 2302 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2302 - endOffset: 2346 -- name: 'Deployment Focus in the Book/Course: Flask, AWS Lambda, Kubernetes, Kubeflow' - startOffset: 2346 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2346 - endOffset: 2569 -- name: 'Career Advice: join communities, answer questions, find mentors' - startOffset: 2569 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2569 - endOffset: 2635 -- name: 'Motivation & Persistence: handling frustration and sustaining interest' - startOffset: 2635 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2635 - endOffset: 2740 -- name: 'Tool Evaluation Strategy: avoid tool churn, follow lasting trends, Kedro - curiosity' - startOffset: 2740 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2740 - endOffset: 2936 -- name: 'Productivity & Workflow: public deadlines, accountability, batching work' - startOffset: 2936 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=2936 - endOffset: 3031 -- name: 'Learning by Projects & Notes: just-in-time learning, Notion, READMEs, GitHub - docs' - startOffset: 3031 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3031 - endOffset: 3184 -- name: 'Community Inspiration & Format Ideas: borrowing from ML Ops and JavaRanch' - startOffset: 3184 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3184 - endOffset: 3307 -- name: 'Interactive Formats: live coding, office hours, ML Zoomcamp sessions' - startOffset: 3307 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3307 - endOffset: 3356 -- name: Community Thanks & Future Plans - startOffset: 3356 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3356 - endOffset: 3410 -- name: Podcast Closing - startOffset: 3410 - url: https://www.youtube.com/watch?v=IxTyq96juVE&t=3410 - endOffset: 3417 --- Links: diff --git a/_podcast/s16e01-datatalks-club-anniversary-interview.md b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md similarity index 96% rename from _podcast/s16e01-datatalks-club-anniversary-interview.md rename to _podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md index 726d768f..0b32fa3d 100644 --- a/_podcast/s16e01-datatalks-club-anniversary-interview.md +++ b/_podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.md @@ -1,36 +1,114 @@ --- +title: "Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth and Evolution" +short: "DataTalks.Club Anniversary Interview" +season: 16 episode: 1 guests: - alexeygrigorev - johannabayer -intro: How do you build and sustain a data community that helps people switch into - machine learning careers while adapting to rapid AI change? In this anniversary - episode of DataTalks.Club, contributors who transitioned from roles like Java development - into machine learning and Python—and who now work full‑time on community and engineering - efforts—reflect on that exact challenge. They cover practical topics including sustainability - and monetization strategies, the roles of community and marketing leads, and decisions - around building courses (LLM/AI content versus volatile material).

You’ll - hear a detailed discussion of GPTs and LLMs and their effects on data workflows, - hiring and take‑home tests; community programs like Project of the Week, competitions, - and portfolio building; moderation and safety practices; and the evolution from - a Slack community to instructor‑led Zoomcamps and a Machine Learning Bookcamp. The - episode also examines outcomes—career switches, internships, and student success—plus - metrics that matter (newsletter performance, active users, sponsors). Listen to - learn concrete ideas for running a community‑driven learning program, designing - resilient courses in an AI era, and measuring long‑term impact. +image: images/podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg ids: - anchor: atatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo + anchor: datatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo youtube: nCqwZT9zA0M -image: images/podcast/s16e01-datatalks-club-anniversary-interview.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataTalks-Club-Anniversary-Interview---Alexey-Grigorev--Johanna-Bayer-e2a5cqo apple: https://podcasts.apple.com/us/podcast/datatalks-club-anniversary-interview-alexey-grigorev/id1541710331?i=1000631114088 spotify: https://open.spotify.com/episode/0j1eKj9NbK3oAXHXHyaNae?si=M7rw9WixTvWw-BfKPXPwVg youtube: https://www.youtube.com/watch?v=nCqwZT9zA0M -season: 16 -short: DataTalks.Club Anniversary Interview -title: How DataTalks.Club Built a Thriving Data Community, ML/LLM Courses & Career - Switches +intro: "How do you build a sustainable data community that endures beyond meetup hype and founder energy? In this episode Alexey Grigorev, founder of DataTalks.Club, and Johanna Bayer, a researcher about to complete her PhD in machine learning for clinical neuroimaging at the University of Melbourne, discuss three years of community growth and evolution.

Alexey brings the perspective of launching and running a global data community, while Johanna contributes her background in psychology, computational neuroscience, and research software engineering, plus advocacy for open source and open science. Together they explore core topics around sustainable data community building: membership growth, volunteer and contributor roles, the intersection of research software engineering with community practice, and how open source and open science principles support longevity.

Listeners will come away with concrete considerations for creating and maintaining a data-focused community—practical lessons on community governance, contributor engagement, and aligning technical and social infrastructure—making this episode valuable for anyone building a dataTalks-style group, open source project, or research software community." +topics: +- community building +- machine learning +- data science +- data engineering +- MLOps +dateadded: 2023-10-16 +duration: PT01H02M57S +quotableClips: +- name: Episode Opening & DataTalks.Club 3rd Anniversary + startOffset: 0 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=0 + endOffset: 77 +- name: 'Career Shift: From Java Developer to Machine Learning & Python' + startOffset: 77 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=77 + endOffset: 251 +- name: 'Transition: Full-time on DataTalks.Club; engineering-heavy roles' + startOffset: 251 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=251 + endOffset: 355 +- name: 'Sustainability Strategy: Monetization and Sponsorship Focus' + startOffset: 355 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=355 + endOffset: 363 +- name: 'Team & Operations: Roles of Francis (community) and Valeria (marketing)' + startOffset: 363 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=363 + endOffset: 442 +- name: 'Course Planning: Considering LLM/AI Courses vs. Rapidly Changing Content' + startOffset: 442 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=442 + endOffset: 670 +- name: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take-home Tests' + startOffset: 670 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=670 + endOffset: 896 +- name: 'Community Participation: Slack Engagement, TAs, and Webinar Contributions' + startOffset: 896 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=896 + endOffset: 1223 +- name: 'Community Programs: Project of the Week, Competitions, and Portfolios' + startOffset: 1223 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1223 + endOffset: 1398 +- name: 'Community Lessons: Finding a Niche and Moderation Challenges' + startOffset: 1398 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1398 + endOffset: 1895 +- name: 'Origin Story: Launching the Slack Community and Early Organic Growth' + startOffset: 1895 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1895 + endOffset: 2026 +- name: 'Course Model: Creating Zoomcamps Inspired by Community-Driven Courses' + startOffset: 2026 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2026 + endOffset: 2131 +- name: 'Book & Course Synergy: Developing the Machine Learning Bookcamp' + startOffset: 2131 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2131 + endOffset: 2224 +- name: 'COVID Effect: Timing, Online Momentum, and Community Persistence' + startOffset: 2224 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2224 + endOffset: 2351 +- name: 'Typical Outcomes: Career Switches, Internships, and Student Successes' + startOffset: 2351 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2351 + endOffset: 2572 +- name: 'Content Strategy: Choosing Guests and Avoiding Hype-Chasing' + startOffset: 2572 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2572 + endOffset: 2748 +- name: 'Safety & Moderation: Handling Unsolicited Messages and Community Safety' + startOffset: 2748 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2748 + endOffset: 2908 +- name: 'Publishing Journey: Writing, Reviewing, and Working with Publishers' + startOffset: 2908 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2908 + endOffset: 3328 +- name: 'Backburner Projects: Hackathons, Event Recommender Ideas, and Event Tools' + startOffset: 3328 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3328 + endOffset: 3559 +- name: 'Success Metrics: Newsletter Performance, Active Users, and Sponsors' + startOffset: 3559 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3559 + endOffset: 3726 +- name: 'Closing Remarks: Next Steps and Continuing Community Growth' + startOffset: 3726 + url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3726 + endOffset: 3777 transcript: - header: Episode Opening & DataTalks.Club 3rd Anniversary - line: Welcome everyone. My name is Johanna and Welcome to DataTalks.Club. DataTalks.Club @@ -119,7 +197,7 @@ transcript: sec: 250 time: '4:10' who: Johanna -- header: 'Transition: Full‑time on DataTalks.Club; engineering-heavy roles' +- header: 'Transition: Full-time on DataTalks.Club; engineering-heavy roles' - line: Yeah. I was mostly doing the engineering stuff there, even though my title was “data scientist”. It's a fun fact. Now, since April, I'm fully focused on DataTalks.Club. @@ -240,7 +318,7 @@ transcript: sec: 605 time: '10:05' who: Johanna -- header: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take‑home Tests' +- header: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take-home Tests' - line: Let's actually move to the next question, which kind of touches on what we've just discussed. “How do you think the other jobs will change as different GPT-like services come into play and extend the skills of data professionals?” @@ -588,7 +666,7 @@ transcript: sec: 1981 time: '33:01' who: Alexey -- header: 'Course Model: Creating Zoomcamps Inspired by Community‑Driven Courses' +- header: 'Course Model: Creating Zoomcamps Inspired by Community-Driven Courses' - line: Yeah, yeah. sec: 2026 time: '33:46' @@ -746,7 +824,7 @@ transcript: sec: 2525 time: '42:05' who: Johanna -- header: 'Content Strategy: Choosing Guests and Avoiding Hype‑Chasing' +- header: 'Content Strategy: Choosing Guests and Avoiding Hype-Chasing' - line: What topics or trends in the data world are you most excited about exploring in upcoming club events or interviews? sec: 2572 @@ -1126,91 +1204,7 @@ transcript: sec: 3777 time: '1:02:57' who: Johanna -dateadded: '2023-10-16' -duration: PT01H02M57S -quotableClips: -- name: Episode Opening & DataTalks.Club 3rd Anniversary - startOffset: 0 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=0 - endOffset: 77 -- name: 'Career Shift: From Java Developer to Machine Learning & Python' - startOffset: 77 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=77 - endOffset: 251 -- name: 'Transition: Full‑time on DataTalks.Club; engineering-heavy roles' - startOffset: 251 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=251 - endOffset: 355 -- name: 'Sustainability Strategy: Monetization and Sponsorship Focus' - startOffset: 355 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=355 - endOffset: 363 -- name: 'Team & Operations: Roles of Francis (community) and Valeria (marketing)' - startOffset: 363 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=363 - endOffset: 442 -- name: 'Course Planning: Considering LLM/AI Courses vs. Rapidly Changing Content' - startOffset: 442 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=442 - endOffset: 670 -- name: 'GPT and LLMs: Impact on Data Workflows, Hiring, and Take‑home Tests' - startOffset: 670 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=670 - endOffset: 896 -- name: 'Community Participation: Slack Engagement, TAs, and Webinar Contributions' - startOffset: 896 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=896 - endOffset: 1223 -- name: 'Community Programs: Project of the Week, Competitions, and Portfolios' - startOffset: 1223 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1223 - endOffset: 1398 -- name: 'Community Lessons: Finding a Niche and Moderation Challenges' - startOffset: 1398 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1398 - endOffset: 1895 -- name: 'Origin Story: Launching the Slack Community and Early Organic Growth' - startOffset: 1895 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=1895 - endOffset: 2026 -- name: 'Course Model: Creating Zoomcamps Inspired by Community‑Driven Courses' - startOffset: 2026 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2026 - endOffset: 2131 -- name: 'Book & Course Synergy: Developing the Machine Learning Bookcamp' - startOffset: 2131 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2131 - endOffset: 2224 -- name: 'COVID Effect: Timing, Online Momentum, and Community Persistence' - startOffset: 2224 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2224 - endOffset: 2351 -- name: 'Typical Outcomes: Career Switches, Internships, and Student Successes' - startOffset: 2351 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2351 - endOffset: 2572 -- name: 'Content Strategy: Choosing Guests and Avoiding Hype‑Chasing' - startOffset: 2572 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2572 - endOffset: 2748 -- name: 'Safety & Moderation: Handling Unsolicited Messages and Community Safety' - startOffset: 2748 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2748 - endOffset: 2908 -- name: 'Publishing Journey: Writing, Reviewing, and Working with Publishers' - startOffset: 2908 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=2908 - endOffset: 3328 -- name: 'Backburner Projects: Hackathons, Event Recommender Ideas, and Event Tools' - startOffset: 3328 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3328 - endOffset: 3559 -- name: 'Success Metrics: Newsletter Performance, Active Users, and Sponsors' - startOffset: 3559 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3559 - endOffset: 3726 -- name: 'Closing Remarks: Next Steps and Continuing Community Growth' - startOffset: 3726 - url: https://www.youtube.com/watch?v=nCqwZT9zA0M&t=3726 - endOffset: 3777 +context: 'Building a Sustainable Data Community: 3 Years of DataTalks.Club Growth + and Evolution' +description: "Discover DataTalks.Club's 3-year playbook to build a sustainable data community—scaling, engagement & retention tactics that grow your network." --- diff --git a/_podcast/s19e03-datatalks-club-anniversary-podcast.md b/_podcast/datatalksclub-scaling-and-free-courses.md similarity index 94% rename from _podcast/s19e03-datatalks-club-anniversary-podcast.md rename to _podcast/datatalksclub-scaling-and-free-courses.md index e512551e..6663799c 100644 --- a/_podcast/s19e03-datatalks-club-anniversary-podcast.md +++ b/_podcast/datatalksclub-scaling-and-free-courses.md @@ -1,34 +1,154 @@ --- +title: "Inside Scaling DataTalks.Club: How We Built Free Data Engineering, MLOps & LLM Courses" +short: "DataTalks.Club Anniversary Podcast" +season: 19 episode: 3 guests: - alexeygrigorev -description: How do you grow an open, free-to-learn data community into a sustainable - education platform? In this episode, Alexey Grigorev — who founded DataTalks.Club - during COVID and later transitioned to running it full-time — walks through the - practical decisions and trade-offs behind building courses, community, and a product. - We cover the course portfolio (machine learning, data engineering, MLOps, LLMs, - stock analytics), organic growth from the Data Engineering Zoomcamp, and the technical - stack for scaling (a Django-based course platform). Johanna shares lessons on sponsorship - dynamics, prepaid tax realities in Germany, protecting community safety from scams, - and staying technical through pet projects, LLM experiments and an automated storytelling - pipeline. You’ll also hear about launching an LLM course and RAG experiments, early - validation and product-market fit, scaling challenges and loneliness, plus concrete - ways to contribute—guesting, mentoring, or joining project weeks. Listen for actionable - insights on running an online data community, course productization, community moderation, - and practical next steps if you want to build or support a data education ecosystem. +image: images/podcast/datatalksclub-scaling-and-free-courses.jpg ids: - anchor: atatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 + anchor: datatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 youtube: GHbeXIKnkLQ -image: images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/DataTalks-Club-4th-Anniversary-AMA-Podcast--Alexey-Grigorev-and-Johanna-Bayer-e2q3ch2 apple: https://podcasts.apple.com/us/podcast/datatalks-club-4th-anniversary-ama-podcast-alexey-grigorev/id1541710331?i=1000674473200 spotify: https://open.spotify.com/episode/50wIZxjq6goREu9pwXYITP?si=mPW0v5fBQxuBpg622CpCEA youtube: https://www.youtube.com/watch?v=GHbeXIKnkLQ -season: 19 -short: DataTalks.Club Anniversary Podcast -title: 'Inside Scaling DataTalks.Club: How We Built Free Data Engineering, MLOps & - LLM Courses' +description: "Discover how DataTalks.Club built free Data Engineering, MLOps & LLM courses: scaling open-source curriculum, community growth, and career-ready projects." +topics: +- MLOps +- LLMs +- data engineering +- machine learning +- career transition +- community building +- teaching +dateadded: 2024-11-08 +duration: PT01H03M17S +quotableClips: +- name: Podcast Welcome & AMA Format (community links and live questions) + startOffset: 0 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=0 + endOffset: 95 +- name: 'Host Intro: Johanna as special host' + startOffset: 95 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=95 + endOffset: 149 +- name: 'Origin Story: Founding DataTalks.Club during COVID' + startOffset: 149 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=149 + endOffset: 232 +- name: 'Career Shift: Transition to running DataTalks.Club full-time' + startOffset: 232 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=232 + endOffset: 246 +- name: 'Financial Decision: Leaving corporate work and early sustainability' + startOffset: 246 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=246 + endOffset: 307 +- name: 'Course Portfolio: Machine Learning, Data Engineering, MLOps, LLMs, Stock + Analytics' + startOffset: 307 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=307 + endOffset: 493 +- name: 'Organic Growth: Word-of-mouth success of Data Engineering Zoomcamp' + startOffset: 493 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=493 + endOffset: 562 +- name: 'Community Safety: Upwork scam awareness and moderation tips' + startOffset: 562 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=562 + endOffset: 724 +- name: 'Mission: Free-to-learn education inspired by Open Data Science' + startOffset: 724 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=724 + endOffset: 987 +- name: 'Community Impact: Student success stories and donations' + startOffset: 987 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=987 + endOffset: 1076 +- name: 'Sponsorship Dynamics: Revenue volatility and runway management' + startOffset: 1076 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1076 + endOffset: 1214 +- name: 'Taxes & Cashflow: Prepaid tax system in Germany' + startOffset: 1214 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1214 + endOffset: 1443 +- name: 'Staying Technical: Pet projects, LLM experiments, and automated storytelling + pipeline' + startOffset: 1443 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1443 + endOffset: 1603 +- name: 'Product Work: Building the course platform in Django to scale courses' + startOffset: 1603 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1603 + endOffset: 1754 +- name: 'LLMs & RAG: From skepticism to launching an LLM course' + startOffset: 1754 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1754 + endOffset: 1910 +- name: 'Life Update: Reflections on full-time community work and no regrets' + startOffset: 1910 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1910 + endOffset: 2020 +- name: 'Early Validation: First event success and finding product-market fit' + startOffset: 2020 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2020 + endOffset: 2197 +- name: 'Community Longevity: Active engagement, investment, and self-organization' + startOffset: 2197 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2197 + endOffset: 2354 +- name: 'AI and Roles: Impact of AutoML/LLMs on data analysts and data scientists' + startOffset: 2354 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2354 + endOffset: 2544 +- name: 'AI in Healthcare: Human touch versus automated assistance' + startOffset: 2544 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2544 + endOffset: 2744 +- name: 'Scaling Challenges: Time investment, loneliness, and rejecting acquisition + offers' + startOffset: 2744 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2744 + endOffset: 2882 +- name: 'Networking Benefits: Masterminds, meetups, and personal connections' + startOffset: 2882 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2882 + endOffset: 2989 +- name: 'Growth Objectives: More sponsors, new courses, and instructor autonomy' + startOffset: 2989 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2989 + endOffset: 3098 +- name: 'How to Help: Be a guest, mentor in Slack, and join Project of the Week' + startOffset: 3098 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3098 + endOffset: 3226 +- name: 'Events Roadmap: Competitions, future hackathons, and ML course contests' + startOffset: 3226 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3226 + endOffset: 3329 +- name: 'Course Schedule: Stock market analytics rerun and upcoming workshops' + startOffset: 3329 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3329 + endOffset: 3401 +- name: 'Podcast Workflow: Guest research, question prep, and interview process' + startOffset: 3401 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3401 + endOffset: 3527 +- name: 'Career Advice: Starting in data science now and junior hiring realities' + startOffset: 3527 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3527 + endOffset: 3670 +- name: 'Personal Reads: Book recommendations and current reading' + startOffset: 3670 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3670 + endOffset: 3761 +- name: Closing Remarks & Thank You + startOffset: 3761 + url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3761 + endOffset: 3797 transcript: - header: Podcast Welcome & AMA Format (community links and live questions) - line: Hi, everyone. Welcome to our event. This event is brought to you by DataTalks.Club, @@ -196,7 +316,7 @@ transcript: - line: 'Yeah, I didn’t know about the Upwork scam. That’s really good to know. Alright, let’s take some questions from the community. Here’s one: "Why did you decide to create a free-to-learn community? What keeps you motivated, and have you ever - thought about stopping or leaving the community?"' + thought about stopping or leaving the community?" sec: 694 time: '11:34' who: Johanna @@ -1073,130 +1193,16 @@ transcript: sec: 3797 time: '1:03:17' who: Johanna -dateadded: '2024-11-08' -duration: PT01H03M17S -quotableClips: -- name: Podcast Welcome & AMA Format (community links and live questions) - startOffset: 0 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=0 - endOffset: 95 -- name: 'Host Intro: Johanna as special host' - startOffset: 95 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=95 - endOffset: 149 -- name: 'Origin Story: Founding DataTalks.Club during COVID' - startOffset: 149 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=149 - endOffset: 232 -- name: 'Career Shift: Transition to running DataTalks.Club full-time' - startOffset: 232 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=232 - endOffset: 246 -- name: 'Financial Decision: Leaving corporate work and early sustainability' - startOffset: 246 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=246 - endOffset: 307 -- name: 'Course Portfolio: Machine Learning, Data Engineering, MLOps, LLMs, Stock - Analytics' - startOffset: 307 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=307 - endOffset: 493 -- name: 'Organic Growth: Word-of-mouth success of Data Engineering Zoomcamp' - startOffset: 493 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=493 - endOffset: 562 -- name: 'Community Safety: Upwork scam awareness and moderation tips' - startOffset: 562 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=562 - endOffset: 724 -- name: 'Mission: Free-to-learn education inspired by Open Data Science' - startOffset: 724 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=724 - endOffset: 987 -- name: 'Community Impact: Student success stories and donations' - startOffset: 987 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=987 - endOffset: 1076 -- name: 'Sponsorship Dynamics: Revenue volatility and runway management' - startOffset: 1076 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1076 - endOffset: 1214 -- name: 'Taxes & Cashflow: Prepaid tax system in Germany' - startOffset: 1214 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1214 - endOffset: 1443 -- name: 'Staying Technical: Pet projects, LLM experiments, and automated storytelling - pipeline' - startOffset: 1443 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1443 - endOffset: 1603 -- name: 'Product Work: Building the course platform in Django to scale courses' - startOffset: 1603 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1603 - endOffset: 1754 -- name: 'LLMs & RAG: From skepticism to launching an LLM course' - startOffset: 1754 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1754 - endOffset: 1910 -- name: 'Life Update: Reflections on full-time community work and no regrets' - startOffset: 1910 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=1910 - endOffset: 2020 -- name: 'Early Validation: First event success and finding product-market fit' - startOffset: 2020 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2020 - endOffset: 2197 -- name: 'Community Longevity: Active engagement, investment, and self-organization' - startOffset: 2197 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2197 - endOffset: 2354 -- name: 'AI and Roles: Impact of AutoML/LLMs on data analysts and data scientists' - startOffset: 2354 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2354 - endOffset: 2544 -- name: 'AI in Healthcare: Human touch versus automated assistance' - startOffset: 2544 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2544 - endOffset: 2744 -- name: 'Scaling Challenges: Time investment, loneliness, and rejecting acquisition - offers' - startOffset: 2744 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2744 - endOffset: 2882 -- name: 'Networking Benefits: Masterminds, meetups, and personal connections' - startOffset: 2882 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2882 - endOffset: 2989 -- name: 'Growth Objectives: More sponsors, new courses, and instructor autonomy' - startOffset: 2989 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=2989 - endOffset: 3098 -- name: 'How to Help: Be a guest, mentor in Slack, and join Project of the Week' - startOffset: 3098 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3098 - endOffset: 3226 -- name: 'Events Roadmap: Competitions, future hackathons, and ML course contests' - startOffset: 3226 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3226 - endOffset: 3329 -- name: 'Course Schedule: Stock market analytics rerun and upcoming workshops' - startOffset: 3329 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3329 - endOffset: 3401 -- name: 'Podcast Workflow: Guest research, question prep, and interview process' - startOffset: 3401 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3401 - endOffset: 3527 -- name: 'Career Advice: Starting in data science now and junior hiring realities' - startOffset: 3527 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3527 - endOffset: 3670 -- name: 'Personal Reads: Book recommendations and current reading' - startOffset: 3670 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3670 - endOffset: 3761 -- name: Closing Remarks & Thank You - startOffset: 3761 - url: https://www.youtube.com/watch?v=GHbeXIKnkLQ&t=3761 - endOffset: 3797 +context: 'Context: Born during COVID as a volunteer meetup, DataTalks.Club scaled + organically into a free-to-learn, community-first education platform—driven by practical + courses (data engineering, ML, MLOps, LLMs), events, mentorship, and hands-on student + success—while the founders stayed technically engaged and navigated financial, operational, + and growth trade-offs. + + Core: The unifying idea is that sustainable, founder-led communities that combine + technical experimentation and product-building with human-centered connection (mentorship, + events, accessible learning) create lasting impact and resilience—allowing thoughtful + stewardship to adapt to AI-driven change, achieve product-market fit, and scale + education without sacrificing community values.' +intro: "How do you scale a volunteer-run learning community into a sustainable platform offering free data engineering, MLOps, and LLM courses? In this episode Alexey Grigorev, founder of DataTalks.Club, walks through the origin story of the project, the leap to running it full-time, and the practical tradeoffs of building free data engineering courses at scale.

Alexey’s background as the founder guides discussions on course portfolio decisions (Machine Learning, Data Engineering, MLOps, LLMs, Stock Analytics), organic growth strategies like Zoomcamp word-of-mouth, and technical choices—building the course platform in Django. We cover community safety and moderation, revenue volatility from sponsorships, tax and cashflow considerations in Germany, and how staying technical through pet projects and LLM experiments informed their curriculum (including RAG and LLM course development).

Listeners will get concrete takeaways on scaling online education, community-driven learning, course product work, and practical ways to help—mentoring, guesting, or joining projects and events. Useful for educators, course builders, and data practitioners wondering how to create and sustain free, high-quality data science and MLOps training." --- diff --git a/_podcast/s15e03-llms-for-everyone.md b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md similarity index 95% rename from _podcast/s15e03-llms-for-everyone.md rename to _podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md index 2dcb4930..b1e72485 100644 --- a/_podcast/s15e03-llms-for-everyone.md +++ b/_podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.md @@ -1,20 +1,150 @@ --- +title: "Deploying LLMs in Production: Fine-Tuning, Retrieval & Open-Source vs API Tradeoffs" +short: "LLMs for Everyone" +season: 15 episode: 3 guests: - meryemarik +image: images/podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg ids: - anchor: atatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf + anchor: datatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf youtube: 6dn6uZFkk04 -image: images/podcast/s15e03-llms-for-everyone.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/LLMs-for-Everyone---Meryem-Arik-e27bouf apple: https://podcasts.apple.com/us/podcast/llms-for-everyone-meryem-arik/id1541710331?i=1000622675129 spotify: https://open.spotify.com/episode/0tmi2ytNk1bEPldcbhkvhN?si=DtU2OM3RTFmPBdY8sFCv5g youtube: https://www.youtube.com/watch?v=6dn6uZFkk04 -season: 15 -short: LLMs for Everyone -title: 'Deploying LLMs in Production: Fine-Tuning, Retrieval & Open-Source vs API - Tradeoffs' + +description: "Discover LLM deployment tactics: fine-tuning, retrieval and open-source vs API tradeoffs to cut latency, control costs, and ground production models." +topics: +- LLMs +- MLOps +- open-source +- production +- retrieval-augmented generation +intro: "How do you take large language models from experiment to reliable production—balancing fine-tuning, retrieval strategies, and the tradeoffs between open-source models and API services? In this episode, Meryem Arik, a recovering physicist and co-founder of TitanML, walks through practical choices for LLM deployment based on her pivot from computer vision to building tools that make models smaller, cheaper, and easier to run in production.

We cover model fundamentals and selection (classification vs generative tasks), open-source model options like LLaMA, FLAN-T5, Falcon and MPT, and the operational realities of serving: model size, compression, inference optimization, latency and cost tradeoffs. Meryem explains when to prototype with GPT-3.5/4 APIs versus self-hosting, the risks of API model drift, and why fine-tuning or retrieval-augmented generation often beats continuous retraining. You’ll also get a clear breakdown of retrieval patterns, vector databases for semantic search, dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing architectures, reducing costs, and grounding answers reliably with retrieval." +dateadded: 2023-07-29 + +duration: PT00H59M31S + +quotableClips: +- name: 'Episode Introduction: LLMs for Everyone' + startOffset: 0 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=0 + endOffset: 67 +- name: 'Guest Introduction: Meryem Arik and TitanML' + startOffset: 67 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=67 + endOffset: 105 +- name: 'Career Journey: Theoretical Physics → Banking → Tech' + startOffset: 105 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=105 + endOffset: 133 +- name: 'Founding TitanML: pivot from computer vision to LLM deployability' + startOffset: 133 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=133 + endOffset: 289 +- name: 'Startup Realities: co-founder roles, operations, and tradeoffs' + startOffset: 289 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=289 + endOffset: 402 +- name: 'Early LLM Interest: customer-driven pivot and GPT-3 experience' + startOffset: 402 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=402 + endOffset: 557 +- name: 'ChatGPT Breakthrough: conversational interface and accessibility' + startOffset: 557 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=557 + endOffset: 624 +- name: 'LLM Fundamentals: generative vs. non-generative models and transformers' + startOffset: 624 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=624 + endOffset: 704 +- name: 'Model Selection: classification tasks vs. generative tasks' + startOffset: 704 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=704 + endOffset: 825 +- name: 'Open-source Model Landscape: LLaMA, FLAN-T5, Falcon, MPT' + startOffset: 825 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=825 + endOffset: 885 +- name: 'Why LLMs Matter: handling unstructured text at scale' + startOffset: 885 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=885 + endOffset: 1008 +- name: 'Open-source vs API Models: control, privacy, and fine-tuning benefits' + startOffset: 1008 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1008 + endOffset: 1126 +- name: 'Model Drift & API Risk: hidden model changes and production impact' + startOffset: 1126 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1126 + endOffset: 1417 +- name: 'TitanML Product Suite: Train, Optimized, and Takeoff server' + startOffset: 1417 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1417 + endOffset: 1526 +- name: 'Serving Challenges: model size, compression, and inference optimization' + startOffset: 1526 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1526 + endOffset: 1590 +- name: 'Fine-tuning Purpose: specialization, domain adaptation, and tone' + startOffset: 1590 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1590 + endOffset: 1898 +- name: 'Fine-tuning Generative Models: data formats and end-task considerations' + startOffset: 1898 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1898 + endOffset: 2038 +- name: 'Workforce Impact: productivity gains and job disruption scenarios' + startOffset: 2038 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2038 + endOffset: 2446 +- name: 'Dealing with Changing Knowledge: retrieval over continuous retraining' + startOffset: 2446 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2446 + endOffset: 2522 +- name: 'Grounding Answers: indexing docs and retrieval-augmented responses' + startOffset: 2522 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2522 + endOffset: 2802 +- name: 'Retrieval Patterns: injecting passages, summarizers, and grounding layers' + startOffset: 2802 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2802 + endOffset: 2881 +- name: 'Vector Databases Explained: embeddings, indexing, and semantic search' + startOffset: 2881 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2881 + endOffset: 2984 +- name: 'Prototyping vs Production: when to use GPT-3.5/4 APIs vs open-source LLMs' + startOffset: 2984 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2984 + endOffset: 3095 +- name: 'Latency & Cost Tradeoffs: self-hosting performance and hardware choices' + startOffset: 3095 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3095 + endOffset: 3214 +- name: 'Data Quality Metrics: gold-standard examples and output-driven evaluation' + startOffset: 3214 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3214 + endOffset: 3332 +- name: 'Dataset Expansion: LLM-assisted augmentation for training data' + startOffset: 3332 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3332 + endOffset: 3399 +- name: 'Evaluation & Benchmarking: classification vs generative metrics and human review' + startOffset: 3399 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3399 + endOffset: 3548 +- name: 'Learning Resources: Hugging Face, Cohere LLM University, community content' + startOffset: 3548 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3548 + endOffset: 3638 +- name: Episode Close and Final Remarks + startOffset: 3638 + url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3638 + endOffset: 3571 + transcript: - header: 'Episode Introduction: LLMs for Everyone' - header: 'Guest Introduction: Meryem Arik and TitanML' @@ -150,7 +280,7 @@ transcript: sec: 386 time: '6:26' who: Meryem -- header: 'Early LLM Interest: customer-driven pivot and GPT‑3 experience' +- header: 'Early LLM Interest: customer-driven pivot and GPT-3 experience' - line: I know we're kind of late to the party in terms of speaking about the LLM because, as I mentioned at the beginning, this is actually our first event ever about LLMs (where we explicitly talk about them). LLMs are large language models, @@ -218,7 +348,7 @@ transcript: sec: 619 time: '10:19' who: Alexey -- header: 'LLM Fundamentals: generative vs. non‑generative models and transformers' +- header: 'LLM Fundamentals: generative vs. non-generative models and transformers' - line: Yeah. LLMs are large language models. I would kind of distinguish large language models into two things – quite often we conflate these ideas. Large language models, as we typically talk about them, are generative models. What these are, are models @@ -288,7 +418,7 @@ transcript: sec: 808 time: '13:28' who: Alexey -- header: 'Open‑source Model Landscape: LLaMA, FLAN‑T5, Falcon, MPT' +- header: 'Open-source Model Landscape: LLaMA, FLAN-T5, Falcon, MPT' - line: Yeah. There's a whole range and ecosystem of language models and they're good at different things. For example, there's the Google FLAN-T5 range, which is able to generate text. But what that's particularly good at is translation and summarization. @@ -361,7 +491,7 @@ transcript: sec: 979 time: '16:19' who: Alexey -- header: 'Open‑source vs API Models: control, privacy, and fine‑tuning benefits' +- header: 'Open-source vs API Models: control, privacy, and fine-tuning benefits' - line: Sure. There are a whole bunch of open source language models, and they're getting better and better month by month. I think only two days ago, Meta released LLaMA 2, which is a massively improved version from LLaMA 1, trained on 40% more @@ -534,7 +664,7 @@ transcript: sec: 1563 time: '26:03' who: Alexey -- header: 'Fine‑tuning Purpose: specialization, domain adaptation, and tone' +- header: 'Fine-tuning Purpose: specialization, domain adaptation, and tone' - line: Sure. When you take a model off the shelf, what it has and what it's very, very good at, is general language knowledge and understanding. Your model will speak English or speak whatever language it was trained in, and it'll have reasonably @@ -636,7 +766,7 @@ transcript: sec: 1877 time: '31:17' who: Alexey -- header: 'Fine‑tuning Generative Models: data formats and end‑task considerations' +- header: 'Fine-tuning Generative Models: data formats and end-task considerations' - line: Yeah. This kind of changes depending on the end task that you want it to get it to do. But in cases that we've done, you can literally just have strings of documents, you can just have raw text that you can fine-tune on. So you don't @@ -825,7 +955,7 @@ transcript: sec: 2521 time: '42:01' who: Alexey -- header: 'Grounding Answers: indexing docs and retrieval‑augmented responses' +- header: 'Grounding Answers: indexing docs and retrieval-augmented responses' - line: Yeah, exactly, a huge knowledge base. And I think most companies have those kinds of knowledge bases, whether in Confluence, or Notion, etc. What you can do is embed all of that documentation and reinvent it every single time it changes @@ -958,7 +1088,7 @@ transcript: sec: 2970 time: '49:30' who: Meryem -- header: 'Prototyping vs Production: when to use GPT‑3.5/4 APIs vs open‑source LLMs' +- header: 'Prototyping vs Production: when to use GPT-3.5/4 APIs vs open-source LLMs' - line: For this task, do you know if we should go with an open source LLM or go with GPT-3.5 or 4? Are there any pros and cons? sec: 2984 @@ -989,7 +1119,7 @@ transcript: sec: 3074 time: '51:14' who: Alexey -- header: 'Latency & Cost Tradeoffs: self‑hosting performance and hardware choices' +- header: 'Latency & Cost Tradeoffs: self-hosting performance and hardware choices' - line: I mean, they are really fast. They're really, really fast, because they're hosted on very expensive hardware. If you were to host your model on the same hardware, using good techniques – using something like the Titan Takeoff server @@ -1023,7 +1153,7 @@ transcript: sec: 3177 time: '52:57' who: Meryem -- header: 'Data Quality Metrics: gold‑standard examples and output‑driven evaluation' +- header: 'Data Quality Metrics: gold-standard examples and output-driven evaluation' - line: We have a few interesting questions from Tara. The first question he's asking is, “How can you measure if the data you feed into an LLM is good enough?” Do you even think about these things or are you just saying, “This is the data I @@ -1054,7 +1184,7 @@ transcript: sec: 3312 time: '55:12' who: Alexey -- header: 'Dataset Expansion: LLM‑assisted augmentation for training data' +- header: 'Dataset Expansion: LLM-assisted augmentation for training data' - line: Yeah, it's super similar. A very basic example is – if I have a dataset where one example is “the pig is pink,” I might get my LLM to say “the cat is black”. It just kind of switches words out, but it's semantically similar. Another way @@ -1164,143 +1294,6 @@ transcript: sec: 3638 time: '1:00:38' who: Alexey -description: 'Discover LLM deployment tactics: fine-tuning, retrieval and open-source - vs API tradeoffs to cut latency, control costs, and ground production models.' -intro: 'How do you take large language models from experiment to reliable production—balancing - fine-tuning, retrieval strategies, and the tradeoffs between open‑source models - and API services? In this episode, Meryem Arik, a recovering physicist and co‑founder - of TitanML, walks through practical choices for LLM deployment based on her pivot - from computer vision to building tools that make models smaller, cheaper, and easier - to run in production.

We cover model fundamentals and selection (classification - vs generative tasks), open‑source model options like LLaMA, FLAN‑T5, Falcon and - MPT, and the operational realities of serving: model size, compression, inference - optimization, latency and cost tradeoffs. Meryem explains when to prototype with - GPT‑3.5/4 APIs versus self‑hosting, the risks of API model drift, and why fine‑tuning - or retrieval‑augmented generation often beats continuous retraining. You’ll also - get a clear breakdown of retrieval patterns, vector databases for semantic search, - dataset expansion and evaluation strategies, and TitanML’s Train/Optimized/Takeoff - product approach. Listen to gain actionable guidance for deploying LLMs in production—choosing - architectures, reducing costs, and grounding answers reliably with retrieval.' -dateadded: '2023-07-29' -duration: PT00H59M31S -quotableClips: -- name: 'Episode Introduction: LLMs for Everyone' - startOffset: 0 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=0 - endOffset: 67 -- name: 'Guest Introduction: Meryem Arik and TitanML' - startOffset: 67 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=67 - endOffset: 105 -- name: 'Career Journey: Theoretical Physics → Banking → Tech' - startOffset: 105 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=105 - endOffset: 133 -- name: 'Founding TitanML: pivot from computer vision to LLM deployability' - startOffset: 133 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=133 - endOffset: 289 -- name: 'Startup Realities: co-founder roles, operations, and tradeoffs' - startOffset: 289 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=289 - endOffset: 402 -- name: 'Early LLM Interest: customer-driven pivot and GPT‑3 experience' - startOffset: 402 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=402 - endOffset: 557 -- name: 'ChatGPT Breakthrough: conversational interface and accessibility' - startOffset: 557 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=557 - endOffset: 624 -- name: 'LLM Fundamentals: generative vs. non‑generative models and transformers' - startOffset: 624 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=624 - endOffset: 704 -- name: 'Model Selection: classification tasks vs. generative tasks' - startOffset: 704 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=704 - endOffset: 825 -- name: 'Open‑source Model Landscape: LLaMA, FLAN‑T5, Falcon, MPT' - startOffset: 825 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=825 - endOffset: 885 -- name: 'Why LLMs Matter: handling unstructured text at scale' - startOffset: 885 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=885 - endOffset: 1008 -- name: 'Open‑source vs API Models: control, privacy, and fine‑tuning benefits' - startOffset: 1008 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1008 - endOffset: 1126 -- name: 'Model Drift & API Risk: hidden model changes and production impact' - startOffset: 1126 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1126 - endOffset: 1417 -- name: 'TitanML Product Suite: Train, Optimized, and Takeoff server' - startOffset: 1417 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1417 - endOffset: 1526 -- name: 'Serving Challenges: model size, compression, and inference optimization' - startOffset: 1526 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1526 - endOffset: 1590 -- name: 'Fine‑tuning Purpose: specialization, domain adaptation, and tone' - startOffset: 1590 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1590 - endOffset: 1898 -- name: 'Fine‑tuning Generative Models: data formats and end‑task considerations' - startOffset: 1898 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=1898 - endOffset: 2038 -- name: 'Workforce Impact: productivity gains and job disruption scenarios' - startOffset: 2038 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2038 - endOffset: 2446 -- name: 'Dealing with Changing Knowledge: retrieval over continuous retraining' - startOffset: 2446 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2446 - endOffset: 2522 -- name: 'Grounding Answers: indexing docs and retrieval‑augmented responses' - startOffset: 2522 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2522 - endOffset: 2802 -- name: 'Retrieval Patterns: injecting passages, summarizers, and grounding layers' - startOffset: 2802 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2802 - endOffset: 2881 -- name: 'Vector Databases Explained: embeddings, indexing, and semantic search' - startOffset: 2881 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2881 - endOffset: 2984 -- name: 'Prototyping vs Production: when to use GPT‑3.5/4 APIs vs open‑source LLMs' - startOffset: 2984 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=2984 - endOffset: 3095 -- name: 'Latency & Cost Tradeoffs: self‑hosting performance and hardware choices' - startOffset: 3095 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3095 - endOffset: 3214 -- name: 'Data Quality Metrics: gold‑standard examples and output‑driven evaluation' - startOffset: 3214 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3214 - endOffset: 3332 -- name: 'Dataset Expansion: LLM‑assisted augmentation for training data' - startOffset: 3332 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3332 - endOffset: 3399 -- name: 'Evaluation & Benchmarking: classification vs generative metrics and human - review' - startOffset: 3399 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3399 - endOffset: 3548 -- name: 'Learning Resources: Hugging Face, Cohere LLM University, community content' - startOffset: 3548 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3548 - endOffset: 3638 -- name: Episode Close and Final Remarks - startOffset: 3638 - url: https://www.youtube.com/watch?v=6dn6uZFkk04&t=3638 - endOffset: 3571 --- Links: diff --git a/_podcast/s03e07-market-yourself.md b/_podcast/developer-personal-brand-learn-in-public.md similarity index 97% rename from _podcast/s03e07-market-yourself.md rename to _podcast/developer-personal-brand-learn-in-public.md index 973d6d85..240ca073 100644 --- a/_podcast/s03e07-market-yourself.md +++ b/_podcast/developer-personal-brand-learn-in-public.md @@ -1,12 +1,11 @@ --- -title: 'Learn in Public: Personal Branding & Career Marketing for Developers' -short: 'Learn in Public: Personal Branding & Career Marketing for Developers' -guests: -- swyx -image: images/podcast/s03e07-market-yourself.jpg +title: "Learn in Public: Personal Branding & Career Marketing for Developers" +short: "Learn in Public: Personal Branding & Career Marketing for Developers" season: 3 episode: 7 -date: 2025-11-07 +guests: +- swyx +image: images/podcast/developer-personal-brand-learn-in-public.jpg ids: youtube: tkBCPqWKCL8 anchor: How-to-Market-Yourself-without-Being-a-Celebrity---Shawn-Swyx-Wang-e11ai8t @@ -15,6 +14,136 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/How-to-Market-Yourself-without-Being-a-Celebrity---Shawn-Swyx-Wang-e11ai8t spotify: https://open.spotify.com/episode/6uLyKxpVZv0wItCNyGPdAN apple: https://podcasts.apple.com/us/podcast/how-to-market-yourself-without-being-celebrity-shawn/id1541710331?i=1000522670386 + +description: "Discover personal branding & career marketing for devs: learn-in-public tactics, niche choice and internal promotion to boost visibility and land promotions." +intro: "How do developers build visibility, earn promotions, and steer their careers by learning in public? In this episode, Shawn Swyx Wang — Senior Developer Advocate for AWS Amplify, author of The Coding Career Handbook, and former engineer at Netlify and Temporal — walks through a practical framework for personal branding and career marketing for developers. We unpack why self-marketing matters beyond job hunting and the five-part personal marketing framework: brand, domain, value, skills, and channel.

You''ll hear concrete guidance on choosing and validating a niche (meetups, conferences, community signals), building an owned platform (blog, newsletter, mailing list), and distribution tactics from early social growth to the engagement move "pick up what they put down." Swyx also covers career transition strategies, hiring portfolios and case studies, internal pathways like lateral moves and signature initiatives, and creating reusable talks and demos. Practical tools discussed include brag documents, demos for internal promotion, and open knowledge projects as visibility builders. Tune in to get actionable steps to craft a developer personal brand, grow influence, and apply learn-in-public tactics to advance your career and job opportunities." +topics: +- personal brand +- career growth +- career transition +dateadded: 2021-05-22 +date: 2025-11-07 + +duration: PT01H02M41S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=0 + endOffset: 69 +- name: 'Guest Overview: Swyx and the learn in public movement' + startOffset: 69 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=69 + endOffset: 144 +- name: 'Career Journey: finance to coding, Netlify, AWS, Temporal' + startOffset: 144 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=144 + endOffset: 376 +- name: 'Why Self-Marketing Matters: recognition, promotions, opportunities' + startOffset: 376 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=376 + endOffset: 513 +- name: 'Marketing Beyond Job Hunting: open source and internal persuasion' + startOffset: 513 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=513 + endOffset: 626 +- name: 'Personal Marketing Framework: brand, domain, value, skills, channel' + startOffset: 626 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=626 + endOffset: 756 +- name: 'Personal Brand for Non-star Developers: find distinctiveness' + startOffset: 756 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=756 + endOffset: 787 +- name: 'Brand Consistency: photo, name, and repeated impressions' + startOffset: 787 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=787 + endOffset: 1123 +- name: 'Domain Selection: choosing topics to write and speak about' + startOffset: 1123 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1123 + endOffset: 1272 +- name: 'Niche Strategy: choosing the right level of specialization' + startOffset: 1272 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1272 + endOffset: 1352 +- name: 'Validating a Niche: meetups, conferences, and community signals' + startOffset: 1352 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1352 + endOffset: 1433 +- name: 'Learn in Public: honest progress, corrections, and earned expertise' + startOffset: 1433 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1433 + endOffset: 1554 +- name: 'Owned Platforms & Blogging: mailing lists, newsletters, and personal site' + startOffset: 1554 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1554 + endOffset: 1632 +- name: 'Starting Distribution: social media to drive people to your site' + startOffset: 1632 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1632 + endOffset: 1827 +- name: 'Engagement Tactic: Pick up what they put down to get noticed' + startOffset: 1827 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1827 + endOffset: 1962 +- name: 'Early Social Media Growth: tactics for initial visibility' + startOffset: 1962 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1962 + endOffset: 1991 +- name: 'Career Transition Strategies: students, career changers, mutual value exchange' + startOffset: 1991 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1991 + endOffset: 2195 +- name: 'Focused Applications: targeted research over mass applying' + startOffset: 2195 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2195 + endOffset: 2310 +- name: 'Hiring Portfolio: unsolicited redesigns, product clones, and case studies' + startOffset: 2310 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2310 + endOffset: 2540 +- name: 'Internal Pathways: lateral entry and internal transfers' + startOffset: 2540 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2540 + endOffset: 2597 +- name: 'Work-safe Content Ideas: war stories, industry problems, and summaries' + startOffset: 2597 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2597 + endOffset: 2743 +- name: 'Process Summary: discover, learn in public, work, and iterate' + startOffset: 2743 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2743 + endOffset: 2834 +- name: 'Open Knowledge Projects: collaborative docs and cheat-sheets as visibility' + startOffset: 2834 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2834 + endOffset: 3070 +- name: 'Internal Promotion Tools: brag document, demos, and networking' + startOffset: 3070 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3070 + endOffset: 3256 +- name: 'Signature Initiative: company-wide projects that build influence' + startOffset: 3256 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3256 + endOffset: 3429 +- name: 'Internal Content Strategy: applying external marketing tactics inside' + startOffset: 3429 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3429 + endOffset: 3544 +- name: 'Public Speaking: creating reusable talks and practicing communication' + startOffset: 3544 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3544 + endOffset: 3717 +- name: 'Book & Resources: The Coding Career Handbook, newsletter, and discount' + startOffset: 3717 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3717 + endOffset: 3791 +- name: 'Final Takeaway: non-technical skills dominate engineering ladders' + startOffset: 3791 + url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3791 + endOffset: 3761 + transcript: - header: 'Guest Overview: Swyx and the learn in public movement' - line: This week we will talk about marketing ourselves. We have a special guest @@ -137,8 +266,8 @@ transcript: sec: 358 time: '5:58' who: Swyx -- header: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' -- header: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' +- header: 'Why Self-Marketing Matters: recognition, promotions, opportunities' +- header: 'Why Self-Marketing Matters: recognition, promotions, opportunities' - line: About marketing ourselves. You are one of the people I follow on Twitter for stuff like marketing ourselves and learning in public. So I wanted to ask you, why should we market ourselves? Why is it important for our careers? You have @@ -276,12 +405,12 @@ transcript: stuff and cover your bases. So, there are just so many details here, like we can go into any one of them. who: Swyx -- header: 'Personal Brand for Non‑star Developers: find distinctiveness' +- header: 'Personal Brand for Non-star Developers: find distinctiveness' - line: Let’s say I already have some skills. I’m not the top developer but I know how to code. Let’s say I know Python or JavaScript, or I’m good at data science. How do I find my personal brand as an average data scientist or as an average software engineer? -- header: 'Personal Brand for Non‑star Developers: find distinctiveness' +- header: 'Personal Brand for Non-star Developers: find distinctiveness' - line: Let’s say I already have some skills. I’m not the top developer but I know how to code. Let’s say I know Python or JavaScript, or I’m good at data science. How do I find my personal brand as an average data scientist or as an average @@ -1039,11 +1168,11 @@ transcript: it is just more about, do they like you or do they trust you that are it and everything else can be taught. who: Swyx -- header: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' +- header: 'Work-safe Content Ideas: war stories, industry problems, and summaries' - line: I imagine it takes a lot of time to build a spotify clone. Let’s say I work already. I am already experienced. I want to find a new job in the same domain. In this case what do I share? What do I write about? Stuff that I do at work? -- header: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' +- header: 'Work-safe Content Ideas: war stories, industry problems, and summaries' - line: I imagine it takes a lot of time to build a spotify clone. Let’s say I work already. I am already experienced. I want to find a new job in the same domain. In this case what do I share? What do I write about? Stuff that I do at work? @@ -1113,12 +1242,12 @@ transcript: sec: 2801 time: '46:41' who: Alexey -- header: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' +- header: 'Open Knowledge Projects: collaborative docs and cheat-sheets as visibility' - line: Yes. It’s nice, especially if you have your own highlights from a book or a blog post. Then you can go on Google and type your domain name and then that search word. You can use Google as your own personal search engine for your notes. It’s really helpful when you are trying to look up something to reference people. -- header: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' +- header: 'Open Knowledge Projects: collaborative docs and cheat-sheets as visibility' - line: Yes. It’s nice, especially if you have your own highlights from a book or a blog post. Then you can go on Google and type your domain name and then that search word. You can use Google as your own personal search engine for your notes. @@ -1264,12 +1393,12 @@ transcript: sec: 3240 time: '54:00' who: Alexey -- header: 'Signature Initiative: company‑wide projects that build influence' +- header: 'Signature Initiative: company-wide projects that build influence' - line: There is more than that. You can also do a signature initiative. This is a term that I picked up at AWS. It’s a big project that you hit on your own. That’s what you are known for. It gives you a chance to win outside of your team, to show individual accomplishment and leadership. -- header: 'Signature Initiative: company‑wide projects that build influence' +- header: 'Signature Initiative: company-wide projects that build influence' - line: There is more than that. You can also do a signature initiative. This is a term that I picked up at AWS. It’s a big project that you hit on your own. That’s what you are known for. It gives you a chance to win outside of your team, to @@ -1511,12 +1640,12 @@ transcript: sec: 3781 time: '1:03:01' who: Alexey -- header: 'Final Takeaway: non‑technical skills dominate engineering ladders' +- header: 'Final Takeaway: non-technical skills dominate engineering ladders' - line: I don’t get to talk about the marketing chapter enough. There is so much to career development. I always want to invite people to have a discussion about this. We don’t talk about it enough. We always talk about code. We should talk about the 75% of the engineering ladder criteria that is not technical. -- header: 'Final Takeaway: non‑technical skills dominate engineering ladders' +- header: 'Final Takeaway: non-technical skills dominate engineering ladders' - line: I don’t get to talk about the marketing chapter enough. There is so much to career development. I always want to invite people to have a discussion about this. We don’t talk about it enough. We always talk about code. We should talk @@ -1544,142 +1673,6 @@ transcript: sec: 3830 time: '1:03:50' who: Alexey -intro: 'How do developers build visibility, earn promotions, and steer their careers - by learning in public? In this episode, Shawn Swyx Wang — Senior Developer Advocate - for AWS Amplify, author of The Coding Career Handbook, and former engineer at Netlify - and Temporal — walks through a practical framework for personal branding and career - marketing for developers. We unpack why self-marketing matters beyond job hunting - and the five-part personal marketing framework: brand, domain, value, skills, and - channel.

You''ll hear concrete guidance on choosing and validating a niche - (meetups, conferences, community signals), building an owned platform (blog, newsletter, - mailing list), and distribution tactics from early social growth to the engagement - move "pick up what they put down." Swyx also covers career transition strategies, - hiring portfolios and case studies, internal pathways like lateral moves and signature - initiatives, and creating reusable talks and demos. Practical tools discussed include - brag documents, demos for internal promotion, and open knowledge projects as visibility - builders. Tune in to get actionable steps to craft a developer personal brand, grow - influence, and apply learn-in-public tactics to advance your career and job opportunities.' -description: 'Discover personal branding & career marketing for devs: learn-in-public - tactics, niche choice and internal promotion to boost visibility and land promotions.' -dateadded: '2021-05-22' -duration: PT01H02M41S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=0 - endOffset: 69 -- name: 'Guest Overview: Swyx and the learn in public movement' - startOffset: 69 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=69 - endOffset: 144 -- name: 'Career Journey: finance to coding, Netlify, AWS, Temporal' - startOffset: 144 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=144 - endOffset: 376 -- name: 'Why Self‑Marketing Matters: recognition, promotions, opportunities' - startOffset: 376 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=376 - endOffset: 513 -- name: 'Marketing Beyond Job Hunting: open source and internal persuasion' - startOffset: 513 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=513 - endOffset: 626 -- name: 'Personal Marketing Framework: brand, domain, value, skills, channel' - startOffset: 626 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=626 - endOffset: 756 -- name: 'Personal Brand for Non‑star Developers: find distinctiveness' - startOffset: 756 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=756 - endOffset: 787 -- name: 'Brand Consistency: photo, name, and repeated impressions' - startOffset: 787 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=787 - endOffset: 1123 -- name: 'Domain Selection: choosing topics to write and speak about' - startOffset: 1123 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1123 - endOffset: 1272 -- name: 'Niche Strategy: choosing the right level of specialization' - startOffset: 1272 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1272 - endOffset: 1352 -- name: 'Validating a Niche: meetups, conferences, and community signals' - startOffset: 1352 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1352 - endOffset: 1433 -- name: 'Learn in Public: honest progress, corrections, and earned expertise' - startOffset: 1433 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1433 - endOffset: 1554 -- name: 'Owned Platforms & Blogging: mailing lists, newsletters, and personal site' - startOffset: 1554 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1554 - endOffset: 1632 -- name: 'Starting Distribution: social media to drive people to your site' - startOffset: 1632 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1632 - endOffset: 1827 -- name: 'Engagement Tactic: Pick up what they put down to get noticed' - startOffset: 1827 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1827 - endOffset: 1962 -- name: 'Early Social Media Growth: tactics for initial visibility' - startOffset: 1962 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1962 - endOffset: 1991 -- name: 'Career Transition Strategies: students, career changers, mutual value exchange' - startOffset: 1991 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=1991 - endOffset: 2195 -- name: 'Focused Applications: targeted research over mass applying' - startOffset: 2195 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2195 - endOffset: 2310 -- name: 'Hiring Portfolio: unsolicited redesigns, product clones, and case studies' - startOffset: 2310 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2310 - endOffset: 2540 -- name: 'Internal Pathways: lateral entry and internal transfers' - startOffset: 2540 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2540 - endOffset: 2597 -- name: 'Work‑safe Content Ideas: war stories, industry problems, and summaries' - startOffset: 2597 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2597 - endOffset: 2743 -- name: 'Process Summary: discover, learn in public, work, and iterate' - startOffset: 2743 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2743 - endOffset: 2834 -- name: 'Open Knowledge Projects: collaborative docs and cheat‑sheets as visibility' - startOffset: 2834 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=2834 - endOffset: 3070 -- name: 'Internal Promotion Tools: brag document, demos, and networking' - startOffset: 3070 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3070 - endOffset: 3256 -- name: 'Signature Initiative: company‑wide projects that build influence' - startOffset: 3256 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3256 - endOffset: 3429 -- name: 'Internal Content Strategy: applying external marketing tactics inside' - startOffset: 3429 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3429 - endOffset: 3544 -- name: 'Public Speaking: creating reusable talks and practicing communication' - startOffset: 3544 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3544 - endOffset: 3717 -- name: 'Book & Resources: The Coding Career Handbook, newsletter, and discount' - startOffset: 3717 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3717 - endOffset: 3791 -- name: 'Final Takeaway: non‑technical skills dominate engineering ladders' - startOffset: 3791 - url: https://www.youtube.com/watch?v=tkBCPqWKCL8&t=3791 - endOffset: 3761 --- Links: diff --git a/_podcast/s02e02-developer-advocacy.md b/_podcast/devrel-data-science-open-source-tools.md similarity index 96% rename from _podcast/s02e02-developer-advocacy.md rename to _podcast/devrel-data-science-open-source-tools.md index 44df949e..b542f4c2 100644 --- a/_podcast/s02e02-developer-advocacy.md +++ b/_podcast/devrel-data-science-open-source-tools.md @@ -1,11 +1,11 @@ --- -title: 'DevRel for Data Science: Build Community, Create Content, and Grow Your Career' -short: Developer Advocacy for Data Science -guests: -- elleobrien -image: images/podcast/s02e02-developer-advocacy.jpg +title: "DevRel for Data Science: Build Community, Create Content, and Grow Your Career" +short: "Developer Advocacy for Data Science" season: 2 episode: 2 +guests: +- elleobrien +image: images/podcast/devrel-data-science-open-source-tools.jpg ids: youtube: jv5W4jXk4P4 anchor: Developer-Advocacy-for-Data-Science---Elle-OBrien-epcbak @@ -14,6 +14,132 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Developer-Advocacy-for-Data-Science---Elle-OBrien-epcbak spotify: https://open.spotify.com/episode/6Hq0ZGPTkDk1h8orfCU78I apple: https://podcasts.apple.com/us/podcast/developer-advocacy-for-data-science-elle-obrien/id1541710331?i=1000506315396 + +description: "Discover DevRel tactics for Data Science: community growth, reproducibility, and content strategy—practical metrics, safety practices, and career growth tips." +intro: "How do you practice developer relations for data science while balancing reproducibility, community growth, and content strategy? In this episode, Elle O’Brien — a data scientist at Iterative (working on DVC and CML) and a lecturer at the University of Michigan with a PhD in neuroscience and computational modeling from UW — walks through practical DevRel for data-focused tools and teaching.

We cover her shift from a viral StyleGAN project into DevRel, the scope of a solo developer advocate (product work, docs, PRs, videos, hiring), and how she prioritizes releases versus evergreen content. Elle shares promotion tactics (Hacker News, Reddit, social), approaches to community safety and moderation, and the emotional realities of online work. She explains community metrics, role distinctions between DevRel/advocate/evangelist, and core skills like technical credibility and rapid learning. We also dig into content strategy for teaching—curriculum design, reusable video content, recording lectures as open educational resources, and practical ways to get started blogging and building a developer portfolio.

Listen to gain actionable guidance on community growth, reproducibility best practices, content planning, and the trade-offs of DevRel work in open source data science" +topics: +- developer relations +- data science +- machine learning +- open-source +dateadded: 2021-02-23 + +duration: PT00H55M15S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=0 + endOffset: 177 +- name: 'Introduction: Developer Advocacy for Data Science — Elle O''Brien (Iterative, + DVC, CML)' + startOffset: 177 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=177 + endOffset: 250 +- name: 'Background: Neuroscience research, PhD, and computational modeling' + startOffset: 250 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=250 + endOffset: 470 +- name: 'Teaching focus: Applied Data Science curriculum & research reproducibility' + startOffset: 470 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=470 + endOffset: 573 +- name: 'Career pivot: Viral StyleGAN project to DevRel role at Iterative' + startOffset: 573 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=573 + endOffset: 740 +- name: 'Role scope: product work, CML, docs, PRs, videos, and hiring' + startOffset: 740 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=740 + endOffset: 902 +- name: 'Prioritization as a solo DevRel: scheduling releases vs evergreen content' + startOffset: 902 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=902 + endOffset: 1004 +- name: 'Release promotion: real-time engagement on Hacker News, Reddit, and social + media' + startOffset: 1004 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1004 + endOffset: 1074 +- name: 'Managing toxicity: choosing communities and setting boundaries' + startOffset: 1074 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1074 + endOffset: 1187 +- name: 'DevRel job realities: content creation, community management, and support + trade-offs' + startOffset: 1187 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1187 + endOffset: 1431 +- name: 'Community feedback: being the product signal and user insight channel' + startOffset: 1431 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1431 + endOffset: 1561 +- name: 'Community metrics: signals, analytics, and full-time analysis potential' + startOffset: 1561 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1561 + endOffset: 1624 +- name: 'Role distinctions: DevRel, developer advocate, and evangelist explained' + startOffset: 1624 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1624 + endOffset: 1735 +- name: 'Risks of DevRel: online abuse, burnout, and public scrutiny' + startOffset: 1735 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1735 + endOffset: 1885 +- name: 'Safety practices: anonymity, moderation, and peer solidarity' + startOffset: 1885 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1885 + endOffset: 2068 +- name: 'Rewards of DevRel: visibility, speaking invites, and career opportunities' + startOffset: 2068 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2068 + endOffset: 2211 +- name: 'Core skills: technical credibility, rapid learning, and clear communication' + startOffset: 2211 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2211 + endOffset: 2371 +- name: 'Nontechnical pathways: learning in public and building a portfolio' + startOffset: 2371 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2371 + endOffset: 2532 +- name: 'Getting started: blogging, tutorials, Twitter, and content examples' + startOffset: 2532 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2532 + endOffset: 2605 +- name: 'Backgrounds & personality: creativity, humor, and relatability in DevRel' + startOffset: 2605 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2605 + endOffset: 2715 +- name: 'Assessing fit: hobby vs job, community inclination, and time commitment' + startOffset: 2715 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2715 + endOffset: 2886 +- name: 'Audience growth: metrics, growth-hacking versus sustainable strategies' + startOffset: 2886 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2886 + endOffset: 3019 +- name: 'Branding & engagement: mascots, tone, and consistent visuals (Divi owl)' + startOffset: 3019 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3019 + endOffset: 3126 +- name: 'Teaching & DevRel synergy: curriculum design and reusable video content' + startOffset: 3126 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3126 + endOffset: 3286 +- name: 'University plans: recording lectures and open educational resources on YouTube' + startOffset: 3286 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3286 + endOffset: 3399 +- name: 'Closing thoughts: encouraging diverse DevRels and where to follow Elle (Twitter, + YouTube, LinkedIn)' + startOffset: 3399 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3399 + endOffset: 3492 +- name: Episode Wrap-up and Farewell + startOffset: 3492 + url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3492 + endOffset: 3315 + transcript: - header: Podcast Introduction - header: 'Introduction: Developer Advocacy for Data Science — Elle O''Brien (Iterative, @@ -244,7 +370,7 @@ transcript: sec: 932 time: '15:32' who: Elle -- header: 'Release promotion: real‑time engagement on Hacker News, Reddit, and social +- header: 'Release promotion: real-time engagement on Hacker News, Reddit, and social media' - line: So it's more like a feeling. Plus, if there is a certain release, you know that you will need to prepare for this release – prepare some supporting material, @@ -298,7 +424,7 @@ transcript: time: '18:15' who: Elle - header: 'DevRel job realities: content creation, community management, and support - trade‑offs' + trade-offs' - line: Yeah, thank you. Since I was pretty curious about this role, I decided to just take a random job description of a developer advocate for a company and see what it says. I found this in some Slack, and it said in the responsibilities @@ -390,7 +516,7 @@ transcript: sec: 1431 time: '23:51' who: Elle -- header: 'Community metrics: signals, analytics, and full‑time analysis potential' +- header: 'Community metrics: signals, analytics, and full-time analysis potential' - line: Yeah, it makes sense. So basically, it's more a description of a full stack role, right? For example, in full stack data science, we have somebody who can talk to stakeholders, build data pipelines, train a model, roll the model out @@ -777,7 +903,7 @@ transcript: sec: 2772 time: '46:12' who: Elle -- header: 'Audience growth: metrics, growth‑hacking versus sustainable strategies' +- header: 'Audience growth: metrics, growth-hacking versus sustainable strategies' - line: You mentioned that people don't consider that it will involve a lot of growth hacking and things like that. But it actually does involve these things, right? sec: 2886 @@ -959,144 +1085,11 @@ transcript: sec: 3407 time: '56:47' who: Elle -- header: Episode Wrap‑up and Farewell +- header: Episode Wrap-up and Farewell - line: Yes. Thanks a lot for being here today and sharing your experience. And thanks everyone else for being here as well. Let's see each other again next week. Thanks, Elle. Goodbye. sec: 3492 time: '58:12' who: Alexey -description: 'Discover DevRel tactics for Data Science: community growth, reproducibility, - and content strategy—practical metrics, safety practices, and career growth tips.' -intro: How do you practice developer relations for data science while balancing reproducibility, - community growth, and content strategy? In this episode, Elle O’Brien — a data scientist - at Iterative (working on DVC and CML) and a lecturer at the University of Michigan - with a PhD in neuroscience and computational modeling from UW — walks through practical - DevRel for data-focused tools and teaching.

We cover her shift from a viral - StyleGAN project into DevRel, the scope of a solo developer advocate (product work, - docs, PRs, videos, hiring), and how she prioritizes releases versus evergreen content. - Elle shares promotion tactics (Hacker News, Reddit, social), approaches to community - safety and moderation, and the emotional realities of online work. She explains - community metrics, role distinctions between DevRel/advocate/evangelist, and core - skills like technical credibility and rapid learning. We also dig into content strategy - for teaching—curriculum design, reusable video content, recording lectures as open - educational resources, and practical ways to get started blogging and building a - developer portfolio.

Listen to gain actionable guidance on community growth, - reproducibility best practices, content planning, and the trade-offs of DevRel work - in open source data science. -dateadded: '2021-02-23' -duration: PT00H55M15S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=0 - endOffset: 177 -- name: 'Introduction: Developer Advocacy for Data Science — Elle O''Brien (Iterative, - DVC, CML)' - startOffset: 177 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=177 - endOffset: 250 -- name: 'Background: Neuroscience research, PhD, and computational modeling' - startOffset: 250 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=250 - endOffset: 470 -- name: 'Teaching focus: Applied Data Science curriculum & research reproducibility' - startOffset: 470 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=470 - endOffset: 573 -- name: 'Career pivot: Viral StyleGAN project to DevRel role at Iterative' - startOffset: 573 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=573 - endOffset: 740 -- name: 'Role scope: product work, CML, docs, PRs, videos, and hiring' - startOffset: 740 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=740 - endOffset: 902 -- name: 'Prioritization as a solo DevRel: scheduling releases vs evergreen content' - startOffset: 902 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=902 - endOffset: 1004 -- name: 'Release promotion: real‑time engagement on Hacker News, Reddit, and social - media' - startOffset: 1004 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1004 - endOffset: 1074 -- name: 'Managing toxicity: choosing communities and setting boundaries' - startOffset: 1074 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1074 - endOffset: 1187 -- name: 'DevRel job realities: content creation, community management, and support - trade‑offs' - startOffset: 1187 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1187 - endOffset: 1431 -- name: 'Community feedback: being the product signal and user insight channel' - startOffset: 1431 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1431 - endOffset: 1561 -- name: 'Community metrics: signals, analytics, and full‑time analysis potential' - startOffset: 1561 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1561 - endOffset: 1624 -- name: 'Role distinctions: DevRel, developer advocate, and evangelist explained' - startOffset: 1624 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1624 - endOffset: 1735 -- name: 'Risks of DevRel: online abuse, burnout, and public scrutiny' - startOffset: 1735 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1735 - endOffset: 1885 -- name: 'Safety practices: anonymity, moderation, and peer solidarity' - startOffset: 1885 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=1885 - endOffset: 2068 -- name: 'Rewards of DevRel: visibility, speaking invites, and career opportunities' - startOffset: 2068 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2068 - endOffset: 2211 -- name: 'Core skills: technical credibility, rapid learning, and clear communication' - startOffset: 2211 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2211 - endOffset: 2371 -- name: 'Nontechnical pathways: learning in public and building a portfolio' - startOffset: 2371 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2371 - endOffset: 2532 -- name: 'Getting started: blogging, tutorials, Twitter, and content examples' - startOffset: 2532 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2532 - endOffset: 2605 -- name: 'Backgrounds & personality: creativity, humor, and relatability in DevRel' - startOffset: 2605 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2605 - endOffset: 2715 -- name: 'Assessing fit: hobby vs job, community inclination, and time commitment' - startOffset: 2715 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2715 - endOffset: 2886 -- name: 'Audience growth: metrics, growth‑hacking versus sustainable strategies' - startOffset: 2886 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=2886 - endOffset: 3019 -- name: 'Branding & engagement: mascots, tone, and consistent visuals (Divi owl)' - startOffset: 3019 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3019 - endOffset: 3126 -- name: 'Teaching & DevRel synergy: curriculum design and reusable video content' - startOffset: 3126 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3126 - endOffset: 3286 -- name: 'University plans: recording lectures and open educational resources on YouTube' - startOffset: 3286 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3286 - endOffset: 3399 -- name: 'Closing thoughts: encouraging diverse DevRels and where to follow Elle (Twitter, - YouTube, LinkedIn)' - startOffset: 3399 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3399 - endOffset: 3492 -- name: Episode Wrap‑up and Farewell - startOffset: 3492 - url: https://www.youtube.com/watch?v=jv5W4jXk4P4&t=3492 - endOffset: 3315 --- diff --git a/_podcast/s14e06-data-developer-relations.md b/_podcast/devrel-open-source-machine-learning.md similarity index 95% rename from _podcast/s14e06-data-developer-relations.md rename to _podcast/devrel-open-source-machine-learning.md index d3e82846..86a3aa44 100644 --- a/_podcast/s14e06-data-developer-relations.md +++ b/_podcast/devrel-open-source-machine-learning.md @@ -1,22 +1,138 @@ --- +title: "DevRel Role for Machine Learning: ML Ecosystems, Open-Source Governance & Developer Experience with Metaflow" +short: "DevRel Role for Machine Learning" +season: 14 episode: 6 guests: - hugobowneanderson +image: images/podcast/devrel-open-source-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Data-Developer-Relations---Hugo-Bowne-Anderson-e25q88q youtube: z7BvslwVRbQ -image: images/podcast/s14e06-data-developer-relations.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Data-Developer-Relations---Hugo-Bowne-Anderson-e25q88q apple: https://podcasts.apple.com/us/podcast/data-developer-relations-hugo-bowne-anderson/id1541710331?i=1000617298688 spotify: https://open.spotify.com/episode/7bVCKqn9fLt6ETq8hxId5V?si=GZSC3NbvRuyXD85iOQo51Q youtube: https://www.youtube.com/watch?v=z7BvslwVRbQ -season: 14 -short: Data Developer Relations -title: 'Master Full-Stack ML with Metaflow: DevRel, Open-Source Governance & AI Trends' + +description: "Explore the role of developer relations for machine learning: ML ecosystems, open-source governance and developer experience with Metaflow." +intro: "How do you build effective developer relations for machine learning ecosystems while navigating open-source governance and enhancing developer experience? In this episode, Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime educator and podcast host — demonstrates Metaflow's capabilities and shares practical guidance for building reproducible ML workflows. Drawing on his background at Coiled and DataCamp and his experience teaching and creating courses, Hugo explores ML ecosystem integrations (AWS, Kubernetes, Argo), interoperability considerations, and company support models for open-source projects like Dask and Metaflow.

We dive into the DevRel career path, essential skills (technical fluency, writing, community building), organizational structures, and how developer feedback and dogfooding enhance documentation and reproducibility. Hugo discusses generative AI's impact on ML infrastructure and DevRel practices, AI-assisted content creation tools like Whisper and ChatGPT, and strategic approaches to tutorials, blogs, and conference talks. Listen to gain actionable insights on ML ecosystem development, improving developer experience with Metaflow, and aligning DevRel strategies with open-source governance to support scalable machine learning infrastructure" +dateadded: 2023-06-17 +topics: +- developer relations +- machine learning +- open-source +duration: PT00H57M52S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=0 + endOffset: 93 +- name: 'Guest Introduction: Hugo Bowne-Anderson, Outerbounds & Metaflow' + startOffset: 93 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=93 + endOffset: 134 +- name: Metaflow Sandbox Demo & Full-Stack Machine Learning Spotlight + startOffset: 134 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=134 + endOffset: 216 +- name: 'Career Path: From Biophysics Research to Data Science Education' + startOffset: 216 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=216 + endOffset: 226 +- name: Building Courses, Open-Source Collaboration & DataCamp Impact + startOffset: 226 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=226 + endOffset: 647 +- name: 'Open-Source Governance: Company Support for Projects (Dask, Metaflow)' + startOffset: 647 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=647 + endOffset: 832 +- name: 'Metaflow Integrations: AWS, Kubernetes, Argo & ML Interoperability' + startOffset: 832 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=832 + endOffset: 874 +- name: 'Path to DevRel: Education to Developer Advocacy' + startOffset: 874 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=874 + endOffset: 1083 +- name: 'DevRel Explained: Education, Documentation & the "Wisdom Layer" + startOffset: 1083 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1083 + endOffset: 1372 +- name: 'DevRel Organizational Models: Reporting Lines & Technical Alignment' + startOffset: 1372 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1372 + endOffset: 1517 +- name: 'Developer Collaboration: Feedback Loops, Documentation & Dogfooding' + startOffset: 1517 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1517 + endOffset: 1570 +- name: 'Generative AI Trends: Impacts on ML Infrastructure & DevRel' + startOffset: 1570 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1570 + endOffset: 1637 +- name: 'Marketing Partnership: SEO, Content Strategy & Audience Targeting' + startOffset: 1637 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1637 + endOffset: 1901 +- name: 'Core DevRel Skills: Technical Fluency, Writing & Community Building' + startOffset: 1901 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1901 + endOffset: 2090 +- name: 'Role Trade-offs: Content Work vs Internal Data Science' + startOffset: 2090 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2090 + endOffset: 2187 +- name: 'Teaching Reproducibility: Dogfooding and Simplifying Workflows' + startOffset: 2187 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2187 + endOffset: 2241 +- name: 'Improving Writing: Practice, Collaboration & Editorial Feedback' + startOffset: 2241 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2241 + endOffset: 2417 +- name: 'AI-Assisted Drafting: Whisper, ChatGPT & Productivity Tools' + startOffset: 2417 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2417 + endOffset: 2594 +- name: 'Content Design: Audience, Goals & Structural Outlines for Tutorials' + startOffset: 2594 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2594 + endOffset: 2769 +- name: 'Content Goals: Awareness, Support & Open-Source Strategy Decisions' + startOffset: 2769 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2769 + endOffset: 2923 +- name: 'Choosing Media: Blog Posts, Talks, Videos, Conferences & ROI' + startOffset: 2923 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2923 + endOffset: 3102 +- name: 'Scope of DevRel: Internal Enablement, External Outreach & Partnerships' + startOffset: 3102 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3102 + endOffset: 3271 +- name: 'Career Advice: GitHub Portfolios, Meetups & Experimenting in DevRel' + startOffset: 3271 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3271 + endOffset: 3372 +- name: 'Long-Form Conversations: Vanishing Gradients Podcast Overview' + startOffset: 3372 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3372 + endOffset: 3503 +- name: 'Contact & Resources: Hugo Online, Outerbounds Slack & Links' + startOffset: 3503 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3503 + endOffset: 3565 +- name: Closing Remarks & Episode Wrap-Up + startOffset: 3565 + url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3565 + endOffset: 3472 + transcript: - header: Podcast Introduction -- header: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' +- header: 'Guest Introduction: Hugo Bowne-Anderson, Outerbounds & Metaflow' - line: This week, we'll talk about developer advocacy. We have a special guest today – very special – Hugo. Hugo is the Head of Developer Relations at Outerbounds. He's also a co-host of the Vanishing Gradients podcast. He's a data scientist, @@ -28,7 +144,7 @@ transcript: sec: 93 time: '1:33' who: Alexey -- header: Metaflow Sandbox Demo & Full‑Stack Machine Learning Spotlight +- header: Metaflow Sandbox Demo & Full-Stack Machine Learning Spotlight - line: Thank you so much for having me here. It's a great honor to be here. For those who don't know, this will go live soon, but we also recorded an open-source demo of Metaflow and full-stack machine learning using the sandbox we've built recently, @@ -70,7 +186,7 @@ transcript: sec: 216 time: '3:36' who: Alexey -- header: Building Courses, Open‑Source Collaboration & DataCamp Impact +- header: Building Courses, Open-Source Collaboration & DataCamp Impact - line: Yeah, and feel free to stop me at any point. I'm not quite sure what level of granularity to go into. But I'll kind of give a bit of background that's relevant to what we're talking about today, as well. My background is in scientific research @@ -177,7 +293,7 @@ transcript: sec: 633 time: '10:33' who: Hugo -- header: 'Open‑Source Governance: Company Support for Projects (Dask, Metaflow)' +- header: 'Open-Source Governance: Company Support for Projects (Dask, Metaflow)' - line: I still don't know how exactly it works. Sometimes people can post links, sometimes they can't. Most of the time, they cannot. Only if the host can, which I guess makes sense because otherwise people might come and post spam. I'm really @@ -303,7 +419,7 @@ transcript: sec: 917 time: '15:17' who: Hugo -- header: 'DevRel Explained: Education, Documentation & the "Wisdom Layer"' +- header: 'DevRel Explained: Education, Documentation & the "Wisdom Layer" - line: What is actually DevRel? What is it? sec: 1083 time: '18:03' @@ -588,7 +704,7 @@ transcript: sec: 2087 time: '34:47' who: Alexey -- header: 'Role Trade‑offs: Content Work vs Internal Data Science' +- header: 'Role Trade-offs: Content Work vs Internal Data Science' - line: I was just gonna say, the other thing worth mentioning – and this is always a challenge for a lot of data scientists and machine learning engineers who are thinking of getting into DevRel. It's not clear in a lot of organizations. Once @@ -694,7 +810,7 @@ transcript: sec: 2264 time: '37:44' who: Hugo -- header: 'AI‑Assisted Drafting: Whisper, ChatGPT & Productivity Tools' +- header: 'AI-Assisted Drafting: Whisper, ChatGPT & Productivity Tools' - line: What helped me is having an editor who would point out, “Hey, look. This paragraph is completely not understandable. I tried to read it three times. I still don't get it. Let's work on this paragraph to really understand what you meant here @@ -805,7 +921,7 @@ transcript: sec: 2607 time: '43:27' who: Hugo -- header: 'Content Goals: Awareness, Support & Open‑Source Strategy Decisions' +- header: 'Content Goals: Awareness, Support & Open-Source Strategy Decisions' - line: How do you understand what kind of goal you have? Maybe somebody comes to you and says, “Hey, we don't have any posts in our blog. Let's create blog posts.” And then you're like “Okay, let's create.” Then you think, “Okay. What is actually @@ -985,7 +1101,7 @@ transcript: sec: 3353 time: '55:53' who: Alexey -- header: 'Long‑Form Conversations: Vanishing Gradients Podcast Overview' +- header: 'Long-Form Conversations: Vanishing Gradients Podcast Overview' - line: Well, you also have a podcast. We still have 3 minutes. Maybe you can tell us about that podcast before we finish? sec: 3372 @@ -1055,136 +1171,12 @@ transcript: sec: 3556 time: '59:16' who: Hugo -- header: Closing Remarks & Episode Wrap‑Up +- header: Closing Remarks & Episode Wrap-Up - line: Well, have a nice weekend. And for those who are not in Australia, have a nice Friday, and then a great weekend. sec: 3565 time: '59:25' who: Alexey -description: 'Master Metaflow, DevRel and full-stack ML: demo, AWS/Kubernetes integrations, - open-source governance and career tips to build reproducible ML pipelines.' -intro: How do you master full‑stack machine learning with Metaflow while balancing - open‑source governance, developer relations, and fast‑moving AI trends? In this - episode Hugo Bowne‑Anderson — Head of Developer Relations at Outerbounds, longtime - educator and podcast host — walks through a Metaflow sandbox demo and practical - guidance for building reproducible machine learning workflows. Drawing on his background - at Coiled and DataCamp and his experience teaching and creating courses, Hugo breaks - down Metaflow integrations (AWS, Kubernetes, Argo), interoperability considerations, - and company support models for projects like Dask and Metaflow.

We also - explore the path to DevRel, core DevRel skills (technical fluency, writing, community - building), organizational models, and how developer feedback and dogfooding improve - documentation and reproducibility. Hugo discusses generative AI’s impacts on ML - infrastructure and DevRel, AI‑assisted drafting tools like Whisper and ChatGPT, - and content strategy for tutorials, blogs, and talks. Listen to gain concrete takeaways - on mastering full‑stack ML with Metaflow, improving reproducibility, and aligning - DevRel and open‑source governance to support scalable machine learning infrastructure. -dateadded: '2023-06-17' -duration: PT00H57M52S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=0 - endOffset: 93 -- name: 'Guest Introduction: Hugo Bowne‑Anderson, Outerbounds & Metaflow' - startOffset: 93 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=93 - endOffset: 134 -- name: Metaflow Sandbox Demo & Full‑Stack Machine Learning Spotlight - startOffset: 134 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=134 - endOffset: 216 -- name: 'Career Path: From Biophysics Research to Data Science Education' - startOffset: 216 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=216 - endOffset: 226 -- name: Building Courses, Open‑Source Collaboration & DataCamp Impact - startOffset: 226 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=226 - endOffset: 647 -- name: 'Open‑Source Governance: Company Support for Projects (Dask, Metaflow)' - startOffset: 647 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=647 - endOffset: 832 -- name: 'Metaflow Integrations: AWS, Kubernetes, Argo & ML Interoperability' - startOffset: 832 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=832 - endOffset: 874 -- name: 'Path to DevRel: Education to Developer Advocacy' - startOffset: 874 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=874 - endOffset: 1083 -- name: 'DevRel Explained: Education, Documentation & the "Wisdom Layer"' - startOffset: 1083 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1083 - endOffset: 1372 -- name: 'DevRel Organizational Models: Reporting Lines & Technical Alignment' - startOffset: 1372 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1372 - endOffset: 1517 -- name: 'Developer Collaboration: Feedback Loops, Documentation & Dogfooding' - startOffset: 1517 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1517 - endOffset: 1570 -- name: 'Generative AI Trends: Impacts on ML Infrastructure & DevRel' - startOffset: 1570 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1570 - endOffset: 1637 -- name: 'Marketing Partnership: SEO, Content Strategy & Audience Targeting' - startOffset: 1637 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1637 - endOffset: 1901 -- name: 'Core DevRel Skills: Technical Fluency, Writing & Community Building' - startOffset: 1901 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=1901 - endOffset: 2090 -- name: 'Role Trade‑offs: Content Work vs Internal Data Science' - startOffset: 2090 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2090 - endOffset: 2187 -- name: 'Teaching Reproducibility: Dogfooding and Simplifying Workflows' - startOffset: 2187 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2187 - endOffset: 2241 -- name: 'Improving Writing: Practice, Collaboration & Editorial Feedback' - startOffset: 2241 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2241 - endOffset: 2417 -- name: 'AI‑Assisted Drafting: Whisper, ChatGPT & Productivity Tools' - startOffset: 2417 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2417 - endOffset: 2594 -- name: 'Content Design: Audience, Goals & Structural Outlines for Tutorials' - startOffset: 2594 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2594 - endOffset: 2769 -- name: 'Content Goals: Awareness, Support & Open‑Source Strategy Decisions' - startOffset: 2769 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2769 - endOffset: 2923 -- name: 'Choosing Media: Blog Posts, Talks, Videos, Conferences & ROI' - startOffset: 2923 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=2923 - endOffset: 3102 -- name: 'Scope of DevRel: Internal Enablement, External Outreach & Partnerships' - startOffset: 3102 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3102 - endOffset: 3271 -- name: 'Career Advice: GitHub Portfolios, Meetups & Experimenting in DevRel' - startOffset: 3271 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3271 - endOffset: 3372 -- name: 'Long‑Form Conversations: Vanishing Gradients Podcast Overview' - startOffset: 3372 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3372 - endOffset: 3503 -- name: 'Contact & Resources: Hugo Online, Outerbounds Slack & Links' - startOffset: 3503 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3503 - endOffset: 3565 -- name: Closing Remarks & Episode Wrap‑Up - startOffset: 3565 - url: https://www.youtube.com/watch?v=z7BvslwVRbQ&t=3565 - endOffset: 3472 --- Links: diff --git a/_podcast/s19e09-linguistics-and-fairness.md b/_podcast/fairness-in-ai-ml-engineering.md similarity index 91% rename from _podcast/s19e09-linguistics-and-fairness.md rename to _podcast/fairness-in-ai-ml-engineering.md index 64e0bfc0..7f0c4011 100644 --- a/_podcast/s19e09-linguistics-and-fairness.md +++ b/_podcast/fairness-in-ai-ml-engineering.md @@ -1,20 +1,164 @@ --- +title: "Fairness in AI/ML Engineering: Interpretability, Metrics and Sociotechnical Design" +short: "Linguistics and Fairness" +season: 19 episode: 9 guests: - tamaraatanasoska +image: images/podcast/fairness-in-ai-ml-engineering.jpg ids: - anchor: atalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 + anchor: datatalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 youtube: sXU9vMDBjmk -image: images/podcast/s19e09-linguistics-and-fairness.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Linguistics-and-Fairness---Tamara-Atanasoska-e2thdk0 apple: https://podcasts.apple.com/us/podcast/linguistics-and-fairness-tamara-atanasoska/id1541710331?i=1000684411354 spotify: https://open.spotify.com/episode/6S4a85iiRzl7NU1HykXeKT?si=FNoDtj74T2ujQKzKdDWwzA youtube: https://www.youtube.com/watch?v=sXU9vMDBjmk -season: 19 -short: Linguistics and Fairness -title: 'Fairness in AI: Using Fairlearn to Mitigate Credit Scoring Bias & Build Explainable - Models' +description: "Learn fairness, interpretability, and metrics in AI/ML engineering—practical sociotechnical design steps to evaluate bias, improve transparency, protect users." +topics: +- machine learning +- LLMs +- open-source +- tools +- data governance +- fairness +intro: "How do you reduce bias in credit scoring models without sacrificing explainability? In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., Fairlearn maintainer, and contributor to scikit-learn and skops with a background in software engineering and computational linguistics — walks through practical approaches to fairness in AI. We dig into a real credit scoring use case, empirical findings on gender disparities, and the societal harms of biased models such as debt and repossession.

Tamara explains Fairlearn’s group fairness tools, visualization and mitigation methods, and the tradeoffs between false positives, false negatives, and demographic parity. She discusses how to choose sensitive groups in domain-specific settings, the limits of automation, the need for human-in-the-loop systems, and who in an organization should decide fairness tradeoffs. The episode also covers interpretability and explainable models — inspection tools, partial dependence, and cross-library integration with scikit-learn and estimator APIs — plus practical concerns like secure model serialization and community contribution paths.

Listen to learn actionable guidance on auditing and mitigating credit scoring bias, building explainable models, and integrating Fairlearn into real-world ML workflows" +dateadded: 2025-02-24 +duration: PT00H59M14S +quotableClips: +- name: Podcast Introduction & Episode Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=0 + endOffset: 151 +- name: 'Guest Introduction: Tamara’s Open-Source Roles (Fairlearn, scikit-learn, + Skope-Rules)' + startOffset: 151 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=151 + endOffset: 198 +- name: 'Career Overview: Software Engineering to Computational Linguistics' + startOffset: 198 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=198 + endOffset: 277 +- name: 'Music Tech Experience: Ableton and Push 2 Instrument Design' + startOffset: 277 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=277 + endOffset: 401 +- name: 'Device Architecture: Laptop Computation vs Standalone Hardware' + startOffset: 401 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=401 + endOffset: 536 +- name: 'Transition to NLP & AI: Academic Interests and Motivation' + startOffset: 536 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=536 + endOffset: 604 +- name: 'Cognitive Systems Studies: Language, Neuroscience, and ML' + startOffset: 604 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=604 + endOffset: 710 +- name: 'Research Path: Returning to Study and New Projects' + startOffset: 710 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=710 + endOffset: 761 +- name: 'Music as Hobby: Balancing Creative Work and Research' + startOffset: 761 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=761 + endOffset: 824 +- name: 'Sociotechnical Framing: Modeling Language in Social Context' + startOffset: 824 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=824 + endOffset: 892 +- name: 'Fairness in AI: Credit Scoring Use Case and Real-World Impact' + startOffset: 892 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=892 + endOffset: 910 +- name: 'Empirical Findings: Gender Disparities in Credit Models (Fairlearn Study)' + startOffset: 910 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=910 + endOffset: 1094 +- name: 'Societal Harms: Debt, Repossession, and Downstream Consequences' + startOffset: 1094 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1094 + endOffset: 1291 +- name: 'Fairlearn Tools: Group Fairness, Visualization, and Mitigation Methods' + startOffset: 1291 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1291 + endOffset: 1444 +- name: 'Sensitive Group Selection: Domain-Specific Decisions in Credit Models' + startOffset: 1444 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1444 + endOffset: 1581 +- name: 'Limits of Automation: Human Judgment in Fairness Assessments' + startOffset: 1581 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1581 + endOffset: 1732 +- name: 'Metric Tradeoffs: False Positives vs False Negatives and Demographic Parity' + startOffset: 1732 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1732 + endOffset: 1893 +- name: 'Organizational Responsibility: Who Decides Fairness Tradeoffs?' + startOffset: 1893 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1893 + endOffset: 1991 +- name: 'Practitioner Education: Frameworks, Ambiguity, and Learning Objectives' + startOffset: 1991 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1991 + endOffset: 2123 +- name: 'Moderation Case Study: Cross-Functional Teams and Domain Expertise' + startOffset: 2123 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2123 + endOffset: 2233 +- name: 'Human-in-the-Loop: Essential Component for Fair AI Systems' + startOffset: 2233 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2233 + endOffset: 2358 +- name: 'Joining Probable: From Open-Source Contributions to a Role' + startOffset: 2358 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2358 + endOffset: 2457 +- name: 'Probable Work: Explainability, Language Models, and Library Integration' + startOffset: 2457 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2457 + endOffset: 2574 +- name: 'Interpretability Tools: Inspection Package and Partial Dependence' + startOffset: 2574 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2574 + endOffset: 2694 +- name: 'Cross-Library Compatibility: Fairlearn, scikit-learn, and Estimator APIs' + startOffset: 2694 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2694 + endOffset: 2780 +- name: 'Scopes Library: Secure Model Persistence and Hugging Face Integration' + startOffset: 2780 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2780 + endOffset: 2836 +- name: 'Serialization Risks: Pickle Vulnerabilities and Secure Deserialization' + startOffset: 2836 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2836 + endOffset: 3054 +- name: 'Community Involvement: PyLadies, Sprints, and Fairlearn Events' + startOffset: 3054 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3054 + endOffset: 3130 +- name: 'Contributing to Fairlearn: Discord, Good-First Issues, and Sprints' + startOffset: 3130 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3130 + endOffset: 3341 +- name: 'Development Ethos: Testing, Refactoring, and Custom Estimators' + startOffset: 3341 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3341 + endOffset: 3397 +- name: 'Project Updates: Upcoming Fairlearn Release and Maintainer Notes' + startOffset: 3397 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3397 + endOffset: 3442 +- name: 'Practical Quirk: Tokenization Issues Breaking "Fairlearn" in Transcripts' + startOffset: 3442 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3442 + endOffset: 3494 +- name: Closing Remarks, Contact Info, and Final Thoughts + startOffset: 3494 + url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3494 + endOffset: 3554 transcript: - header: Podcast Introduction & Episode Overview - line: This week, we’ll talk about linguistic fairness and a sociotechnical perspective @@ -29,8 +173,8 @@ transcript: sec: 0 time: 0:00 who: Alexey -- header: 'Guest Introduction: Tamara’s Open‑Source Roles (Fairlearn, scikit‑learn, - Skope‑Rules)' +- header: 'Guest Introduction: Tamara’s Open-Source Roles (Fairlearn, scikit-learn, + Skope-Rules)' - line: Yes, that’s correct. sec: 151 time: '2:31' @@ -252,7 +396,7 @@ transcript: sec: 834 time: '13:54' who: Tamara -- header: 'Fairness in AI: Credit Scoring Use Case and Real‑World Impact' +- header: 'Fairness in AI: Credit Scoring Use Case and Real-World Impact' - line: That sounds quite abstract. Can we narrow it down to AI? sec: 892 time: '14:52' @@ -363,7 +507,7 @@ transcript: sec: 1291 time: '21:31' who: Tamara -- header: 'Sensitive Group Selection: Domain‑Specific Decisions in Credit Models' +- header: 'Sensitive Group Selection: Domain-Specific Decisions in Credit Models' - line: Okay, so what exactly does Fairlearn, or similar tools, provide? Let’s take credit scoring as an example. Say we have a model, like a decision tree or logistic regression, that predicts loan decisions. Fairlearn then analyzes how the model @@ -497,7 +641,7 @@ transcript: sec: 1991 time: '33:11' who: Tamara -- header: 'Moderation Case Study: Cross‑Functional Teams and Domain Expertise' +- header: 'Moderation Case Study: Cross-Functional Teams and Domain Expertise' - line: I was reflecting on my personal experience with making these decisions. I worked on a moderation team for an online marketplace, deciding whether an item should go live for purchase or be blocked. We discussed factors like the model's @@ -515,7 +659,7 @@ transcript: sec: 2123 time: '35:23' who: Alexey -- header: 'Human‑in‑the‑Loop: Essential Component for Fair AI Systems' +- header: 'Human-in-the-Loop: Essential Component for Fair AI Systems' - line: 'Yes, and you mentioned something really important: the human in the loop. It’s a central component of all AI systems. If we want them to be fair, we need humans in the loop. Before any decision can have a real impact, there has to be @@ -566,7 +710,7 @@ transcript: sec: 2352 time: '39:12' who: Tamara -- header: 'Joining Probable: From Open‑Source Contributions to a Role' +- header: 'Joining Probable: From Open-Source Contributions to a Role' - line: So, how did you get involved in the project? Last time we spoke, Probable didn’t exist yet. How did it happen? You were doing LPiano back then, right? sec: 2358 @@ -650,7 +794,7 @@ transcript: sec: 2640 time: '44:00' who: Tamara -- header: 'Cross‑Library Compatibility: Fairlearn, scikit‑learn, and Estimator APIs' +- header: 'Cross-Library Compatibility: Fairlearn, scikit-learn, and Estimator APIs' - line: The most interesting part of my work has been ensuring cross-library compatibility. This means making all Fairlearn estimators compatible with Psyched Learn, and ensuring compatibility as Psyched Learn transitions to version 1.6. People should @@ -781,7 +925,7 @@ transcript: sec: 3054 time: '50:54' who: Tamara -- header: 'Contributing to Fairlearn: Discord, Good‑First Issues, and Sprints' +- header: 'Contributing to Fairlearn: Discord, Good-First Issues, and Sprints' - line: That’s interesting. You read my mind again. I was about to ask how someone can contribute to Fairlearn if they’re interested. If you live in Berlin, they can join the meetup, right? And there will be opportunities to contribute, as @@ -917,163 +1061,22 @@ transcript: sec: 3554 time: '59:14' who: Alexey -description: 'Discover how to use Fairlearn to mitigate credit scoring bias and build - explainable models: practical tools, human-in-the-loop tips, and evaluation tradeoffs.' -intro: How do you reduce bias in credit scoring models without sacrificing explainability? - In this episode, Tamara Atanasoska — an open source software engineer at :probabl.., - Fairlearn maintainer, and contributor to scikit-learn and skops with a background - in software engineering and computational linguistics — walks through practical - approaches to fairness in AI. We dig into a real credit scoring use case, empirical - findings on gender disparities, and the societal harms of biased models such as - debt and repossession.

Tamara explains Fairlearn’s group fairness tools, - visualization and mitigation methods, and the tradeoffs between false positives, - false negatives, and demographic parity. She discusses how to choose sensitive groups - in domain‑specific settings, the limits of automation, the need for human‑in‑the‑loop - systems, and who in an organization should decide fairness tradeoffs. The episode - also covers interpretability and explainable models — inspection tools, partial - dependence, and cross‑library integration with scikit‑learn and estimator APIs — - plus practical concerns like secure model serialization and community contribution - paths.

Listen to learn actionable guidance on auditing and mitigating credit - scoring bias, building explainable models, and integrating Fairlearn into real‑world - ML workflows. -dateadded: '2025-02-24' -duration: PT00H59M14S -quotableClips: -- name: Podcast Introduction & Episode Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=0 - endOffset: 151 -- name: 'Guest Introduction: Tamara’s Open‑Source Roles (Fairlearn, scikit‑learn, - Skope‑Rules)' - startOffset: 151 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=151 - endOffset: 198 -- name: 'Career Overview: Software Engineering to Computational Linguistics' - startOffset: 198 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=198 - endOffset: 277 -- name: 'Music Tech Experience: Ableton and Push 2 Instrument Design' - startOffset: 277 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=277 - endOffset: 401 -- name: 'Device Architecture: Laptop Computation vs Standalone Hardware' - startOffset: 401 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=401 - endOffset: 536 -- name: 'Transition to NLP & AI: Academic Interests and Motivation' - startOffset: 536 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=536 - endOffset: 604 -- name: 'Cognitive Systems Studies: Language, Neuroscience, and ML' - startOffset: 604 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=604 - endOffset: 710 -- name: 'Research Path: Returning to Study and New Projects' - startOffset: 710 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=710 - endOffset: 761 -- name: 'Music as Hobby: Balancing Creative Work and Research' - startOffset: 761 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=761 - endOffset: 824 -- name: 'Sociotechnical Framing: Modeling Language in Social Context' - startOffset: 824 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=824 - endOffset: 892 -- name: 'Fairness in AI: Credit Scoring Use Case and Real‑World Impact' - startOffset: 892 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=892 - endOffset: 910 -- name: 'Empirical Findings: Gender Disparities in Credit Models (Fairlearn Study)' - startOffset: 910 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=910 - endOffset: 1094 -- name: 'Societal Harms: Debt, Repossession, and Downstream Consequences' - startOffset: 1094 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1094 - endOffset: 1291 -- name: 'Fairlearn Tools: Group Fairness, Visualization, and Mitigation Methods' - startOffset: 1291 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1291 - endOffset: 1444 -- name: 'Sensitive Group Selection: Domain‑Specific Decisions in Credit Models' - startOffset: 1444 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1444 - endOffset: 1581 -- name: 'Limits of Automation: Human Judgment in Fairness Assessments' - startOffset: 1581 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1581 - endOffset: 1732 -- name: 'Metric Tradeoffs: False Positives vs False Negatives and Demographic Parity' - startOffset: 1732 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1732 - endOffset: 1893 -- name: 'Organizational Responsibility: Who Decides Fairness Tradeoffs?' - startOffset: 1893 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1893 - endOffset: 1991 -- name: 'Practitioner Education: Frameworks, Ambiguity, and Learning Objectives' - startOffset: 1991 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=1991 - endOffset: 2123 -- name: 'Moderation Case Study: Cross‑Functional Teams and Domain Expertise' - startOffset: 2123 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2123 - endOffset: 2233 -- name: 'Human‑in‑the‑Loop: Essential Component for Fair AI Systems' - startOffset: 2233 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2233 - endOffset: 2358 -- name: 'Joining Probable: From Open‑Source Contributions to a Role' - startOffset: 2358 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2358 - endOffset: 2457 -- name: 'Probable Work: Explainability, Language Models, and Library Integration' - startOffset: 2457 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2457 - endOffset: 2574 -- name: 'Interpretability Tools: Inspection Package and Partial Dependence' - startOffset: 2574 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2574 - endOffset: 2694 -- name: 'Cross‑Library Compatibility: Fairlearn, scikit‑learn, and Estimator APIs' - startOffset: 2694 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2694 - endOffset: 2780 -- name: 'Scopes Library: Secure Model Persistence and Hugging Face Integration' - startOffset: 2780 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2780 - endOffset: 2836 -- name: 'Serialization Risks: Pickle Vulnerabilities and Secure Deserialization' - startOffset: 2836 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=2836 - endOffset: 3054 -- name: 'Community Involvement: PyLadies, Sprints, and Fairlearn Events' - startOffset: 3054 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3054 - endOffset: 3130 -- name: 'Contributing to Fairlearn: Discord, Good‑First Issues, and Sprints' - startOffset: 3130 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3130 - endOffset: 3341 -- name: 'Development Ethos: Testing, Refactoring, and Custom Estimators' - startOffset: 3341 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3341 - endOffset: 3397 -- name: 'Project Updates: Upcoming Fairlearn Release and Maintainer Notes' - startOffset: 3397 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3397 - endOffset: 3442 -- name: 'Practical Quirk: Tokenization Issues Breaking "Fairlearn" in Transcripts' - startOffset: 3442 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3442 - endOffset: 3494 -- name: Closing Remarks, Contact Info, and Final Thoughts - startOffset: 3494 - url: https://www.youtube.com/watch?v=sXU9vMDBjmk&t=3494 - endOffset: 3554 ---- +context: 'Context: This episode follows Tamara’s journey from software and music-tech + engineering into computational linguistics and open-source stewardship, and uses + concrete case studies (credit-scoring fairness, moderation systems) plus tool discussions + (Fairlearn, interpretability packages, secure model serialization) to examine how + technical choices, metrics, and developer practices translate into real societal + outcomes. Recurring threads include tradeoffs in fairness metrics, the necessity + of domain expertise and human-in-the-loop processes, the engineering challenges + of interoperable, secure ML tooling, and the role of community and practitioner + education in shaping responsible ML. + Core: The unifying idea is that building fair, trustworthy AI is a sociotechnical + engineering task: it requires not just algorithms but pragmatic, community-driven + tools, secure software practices, clear interpretability, and organizational processes + that embed human judgment and domain knowledge so technical models produce just, + accountable outcomes in the real world.' +--- Links: * [Linkedin](https://www.linkedin.com/in/tamaraatanasoska/){:target="_blank"} diff --git a/_podcast/s05e09-business-acumen.md b/_podcast/feature-engineering-model-monitoring-and-data-governance.md similarity index 97% rename from _podcast/s05e09-business-acumen.md rename to _podcast/feature-engineering-model-monitoring-and-data-governance.md index cb20d1f1..c2058de7 100644 --- a/_podcast/s05e09-business-acumen.md +++ b/_podcast/feature-engineering-model-monitoring-and-data-governance.md @@ -1,12 +1,11 @@ --- -title: 'Practical Data Science & ML: Feature Engineering, Model Monitoring, Data Governance - & Storytelling' -short: Building Business Acumen for Data Professionals -guests: -- thomives -image: images/podcast/s05e09-business-acumen.jpg +title: "Practical Data Science & ML: Feature Engineering, Model Monitoring, Data Governance & Storytelling" +short: "Building Business Acumen for Data Professionals" season: 5 episode: 9 +guests: +- thomives +image: images/podcast/feature-engineering-model-monitoring-and-data-governance.jpg ids: youtube: pImYf9ML95Q anchor: Building-Business-Acumen-for-Data-Professionals---Thom-Ives-e19gq91 @@ -15,6 +14,112 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-Business-Acumen-for-Data-Professionals---Thom-Ives-e19gq91 spotify: https://open.spotify.com/episode/4dFbkQI9pF4wUDueZFqxGY apple: https://podcasts.apple.com/us/podcast/building-business-acumen-for-data-professionals-thom-ives/id1541710331?i=1000540181044 + +description: "Master feature engineering, model monitoring & data governance: ML tactics to prevent drift, boost performance, and sharpen data storytelling." +intro: "How do you move from models that look good on paper to reliable machine learning in production—while keeping data clean and stakeholders aligned? In this episode Thom Ives, founder of Integrated Machine Learning & AI and a veteran data scientist, walks through practical approaches to feature engineering, model monitoring, data governance, and data storytelling. Thom draws on a career spanning industry roles and mentoring to contrast concept-focused learning versus specialist detail work, and to explain why business acumen and role clarity matter for data teams.

You’ll hear concrete guidance on ETL reliability, closing data collection gaps, and shared responsibility for data governance and literacy. Thom breaks down the ML pipeline—from feature conditioning, scaling, selection, and engineered features to addressing collinearity with PCA and pursuing model parsimony. He also covers model selection trade-offs, spotting data drift and concept drift in production, and the maintenance needed for long-term generalizability. Finally, he emphasizes analytical storytelling and persuasion skills for influencing decisions, plus community resources and mentoring through Integrated ML & AI. Tune in for actionable tactics to improve data quality, monitoring, and stakeholder communication in real-world data science" +topics: +- data science +- machine learning +- ai +- data engineering +dateadded: 2021-10-30 + +duration: PT01H05M17S + +quotableClips: +- name: Episode Introduction & Guest Thom Ives + startOffset: 75 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=75 + endOffset: 110 +- name: Concept-focused learning vs. detail specialization + startOffset: 110 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=110 + endOffset: 201 +- name: 'Career journey: naval nuclear program, grad school, early AI' + startOffset: 201 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=201 + endOffset: 311 +- name: 'Industry roles: HP, ON Semiconductor, SaaS AI work' + startOffset: 311 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=311 + endOffset: 532 +- name: 'Mentoring & community building: integrated mentoring origins' + startOffset: 532 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=532 + endOffset: 552 +- name: Why business acumen matters for data professionals + startOffset: 552 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=552 + endOffset: 651 +- name: 'Role clarity: data scientist versus domain expert' + startOffset: 651 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=651 + endOffset: 819 +- name: Rapid delivery & customer-centric feedback (MVP / tracer bullet) + startOffset: 819 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=819 + endOffset: 1172 +- name: ETL reliability, data collection gaps, and advocating for clean data + startOffset: 1172 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1172 + endOffset: 1299 +- name: 'Shared responsibility: data governance and data literacy' + startOffset: 1299 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1299 + endOffset: 1432 +- name: 'Data-driven vs. data-informed: definitions and practical balance' + startOffset: 1432 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1432 + endOffset: 1689 +- name: Analytical skills & data storytelling before modeling + startOffset: 1689 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1689 + endOffset: 1881 +- name: 'Machine learning development pipeline: feature conditioning to modeling' + startOffset: 1881 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1881 + endOffset: 2094 +- name: Feature scaling, selection, and engineered features for business insight + startOffset: 2094 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2094 + endOffset: 2446 +- name: Addressing collinearity with PCA and pursuing parsimony + startOffset: 2446 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2446 + endOffset: 2753 +- name: 'Model selection: accuracy, variance, and generalizability' + startOffset: 2753 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2753 + endOffset: 2850 +- name: 'Monitoring models in production: data drift, concept drift, and maintenance' + startOffset: 2850 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2850 + endOffset: 2968 +- name: 'Essential business skills: explainability, persuasion, and influence' + startOffset: 2968 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2968 + endOffset: 3042 +- name: 'Relationship-building: informal check-ins, lunch & beer networking' + startOffset: 3042 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3042 + endOffset: 3349 +- name: 'Remote rapport: virtual lunches, video calls, and building camaraderie' + startOffset: 3349 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3349 + endOffset: 3516 +- name: 'Integrated ML & AI community: structure, ethos, and free resources' + startOffset: 3516 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3516 + endOffset: 3816 +- name: Joining the Slack community and accessing resources + startOffset: 3816 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3816 + endOffset: 3955 +- name: Episode wrap-up and final takeaways + startOffset: 3955 + url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3955 + endOffset: 3917 + transcript: - header: Episode Introduction & Guest Thom Ives - line: This week, we'll talk about business acumen. We have a special guest today, @@ -722,7 +827,7 @@ transcript: sec: 2968 time: '49:28' who: Alexey -- header: 'Relationship-building: informal check‑ins, lunch & beer networking' +- header: 'Relationship-building: informal check-ins, lunch & beer networking' - line: Then there’s educating why data science is important, which is something that you, as a data professional, should also be able to do. For instance saying, “Okay, you really need to be careful about this value. You really need to pay attention @@ -1030,120 +1135,6 @@ transcript: sec: 3992 time: '1:06:32' who: Thom -description: 'Master feature engineering, model monitoring & data governance: ML tactics - to prevent drift, boost performance, and sharpen data storytelling.' -intro: How do you move from models that look good on paper to reliable machine learning - in production—while keeping data clean and stakeholders aligned? In this episode - Thom Ives, founder of Integrated Machine Learning & AI and a veteran data scientist, - walks through practical approaches to feature engineering, model monitoring, data - governance, and data storytelling. Thom draws on a career spanning industry roles - and mentoring to contrast concept-focused learning versus specialist detail work, - and to explain why business acumen and role clarity matter for data teams.

- You’ll hear concrete guidance on ETL reliability, closing data collection gaps, - and shared responsibility for data governance and literacy. Thom breaks down the - ML pipeline—from feature conditioning, scaling, selection, and engineered features - to addressing collinearity with PCA and pursuing model parsimony. He also covers - model selection trade-offs, spotting data drift and concept drift in production, - and the maintenance needed for long-term generalizability. Finally, he emphasizes - analytical storytelling and persuasion skills for influencing decisions, plus community - resources and mentoring through Integrated ML & AI. Tune in for actionable tactics - to improve data quality, monitoring, and stakeholder communication in real-world - data science. -dateadded: '2021-10-30' -duration: PT01H05M17S -quotableClips: -- name: Episode Introduction & Guest Thom Ives - startOffset: 75 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=75 - endOffset: 110 -- name: Concept-focused learning vs. detail specialization - startOffset: 110 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=110 - endOffset: 201 -- name: 'Career journey: naval nuclear program, grad school, early AI' - startOffset: 201 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=201 - endOffset: 311 -- name: 'Industry roles: HP, ON Semiconductor, SaaS AI work' - startOffset: 311 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=311 - endOffset: 532 -- name: 'Mentoring & community building: integrated mentoring origins' - startOffset: 532 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=532 - endOffset: 552 -- name: Why business acumen matters for data professionals - startOffset: 552 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=552 - endOffset: 651 -- name: 'Role clarity: data scientist versus domain expert' - startOffset: 651 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=651 - endOffset: 819 -- name: Rapid delivery & customer-centric feedback (MVP / tracer bullet) - startOffset: 819 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=819 - endOffset: 1172 -- name: ETL reliability, data collection gaps, and advocating for clean data - startOffset: 1172 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1172 - endOffset: 1299 -- name: 'Shared responsibility: data governance and data literacy' - startOffset: 1299 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1299 - endOffset: 1432 -- name: 'Data-driven vs. data-informed: definitions and practical balance' - startOffset: 1432 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1432 - endOffset: 1689 -- name: Analytical skills & data storytelling before modeling - startOffset: 1689 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1689 - endOffset: 1881 -- name: 'Machine learning development pipeline: feature conditioning to modeling' - startOffset: 1881 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=1881 - endOffset: 2094 -- name: Feature scaling, selection, and engineered features for business insight - startOffset: 2094 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2094 - endOffset: 2446 -- name: Addressing collinearity with PCA and pursuing parsimony - startOffset: 2446 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2446 - endOffset: 2753 -- name: 'Model selection: accuracy, variance, and generalizability' - startOffset: 2753 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2753 - endOffset: 2850 -- name: 'Monitoring models in production: data drift, concept drift, and maintenance' - startOffset: 2850 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2850 - endOffset: 2968 -- name: 'Essential business skills: explainability, persuasion, and influence' - startOffset: 2968 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=2968 - endOffset: 3042 -- name: 'Relationship-building: informal check‑ins, lunch & beer networking' - startOffset: 3042 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3042 - endOffset: 3349 -- name: 'Remote rapport: virtual lunches, video calls, and building camaraderie' - startOffset: 3349 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3349 - endOffset: 3516 -- name: 'Integrated ML & AI community: structure, ethos, and free resources' - startOffset: 3516 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3516 - endOffset: 3816 -- name: Joining the Slack community and accessing resources - startOffset: 3816 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3816 - endOffset: 3955 -- name: Episode wrap-up and final takeaways - startOffset: 3955 - url: https://www.youtube.com/watch?v=pImYf9ML95Q&t=3955 - endOffset: 3917 --- diff --git a/_podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md b/_podcast/finops-for-data-engineers.md similarity index 95% rename from _podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md rename to _podcast/finops-for-data-engineers.md index 8125f1d0..11b1871d 100644 --- a/_podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.md +++ b/_podcast/finops-for-data-engineers.md @@ -1,20 +1,126 @@ --- +title: "FinOps for Data Engineers: Optimize Cloud Costs, BigQuery & Modern Data Stack" +short: "From Supply Chain Management to Digital Warehousing and FinOps" +season: 20 episode: 6 guests: - eddyzulkifly +image: images/podcast/finops-for-data-engineers.jpg ids: anchor: datatalksclub/episodes/From-Supply-Chain-Management-to-Digital-Warehousing-and-FinOps---Eddy-Zulkifly-e313t7b youtube: 7ePp6wuxM5s -image: images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Supply-Chain-Management-to-Digital-Warehousing-and-FinOps---Eddy-Zulkifly-e313t7b apple: https://podcasts.apple.com/us/podcast/from-supply-chain-management-to-digital-warehousing/id1541710331?i=1000702233986 spotify: https://open.spotify.com/episode/33YZpX7zE6YcBGbQK9Iclp youtube: https://www.youtube.com/watch?v=7ePp6wuxM5s -season: 20 -short: From Supply Chain Management to Digital Warehousing and FinOps -title: 'FinOps for Data Engineers: Optimize Cloud Costs with dbt, BigQuery & Modern - Data Stack' +description: "Master FinOps for data engineers: optimize cloud costs with BigQuery best practices, query tuning and governance to cut spend and boost performance." +topics: +- data engineering +- finops +- modern data stack +- tools +- career growth +intro: "How can data engineers bring FinOps practices into their day-to-day work to control cloud spend across BigQuery and the modern data stack? In this episode, Eddy Zulkifly — Staff Data Engineer at Kinaxis with a decade of experience building data platforms on Google Cloud, Azure, and AWS — breaks down practical ways to make cost optimization part of platform design and operations.

We explore core topics including cloud cost optimization for data teams, BigQuery cost controls and query efficiency, cost-aware architecture in the modern data stack, multi-cloud considerations, and monitoring and governance for predictable spend. Eddy draws on experience from Home Depot e-commerce and supply chain analytics, mentoring and teaching roles, and his work on open-source data projects to translate FinOps principles into engineering choices.

If you’re a data engineer or platform owner responsible for budgets and performance, you’ll get actionable guidance on reducing unnecessary cloud costs, improving visibility into usage, and designing pipelines that balance performance with price. Listen to learn practical steps to align data engineering practices with FinOps goals and make cloud spend more predictable." +dateadded: 2025-04-30 +duration: PT00H59M54S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=0 + endOffset: 95 +- name: 'Guest Introduction: Eddy Zulkifly, Staff Data Engineer at Kinaxis' + startOffset: 95 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=95 + endOffset: 134 +- name: 'Career Origins: Industrial Engineering, Supply Chain & Excel Macros' + startOffset: 134 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=134 + endOffset: 380 +- name: 'Career Pivot: From Business Analyst to Data Engineering' + startOffset: 380 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=380 + endOffset: 468 +- name: Analyst Skills as a Foundation for Data Engineering + startOffset: 468 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=468 + endOffset: 486 +- name: 'Docker & Terraform: Learning Curve for Data Practitioners' + startOffset: 486 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=486 + endOffset: 498 +- name: 'Tools & Low-Code Beginnings: Excel, Alteryx, Tableau' + startOffset: 498 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=498 + endOffset: 673 +- name: 'Retail & Warehousing Experience: Forecasting, Preload Optimization' + startOffset: 673 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=673 + endOffset: 1317 +- name: 'Digital Data Warehousing: Data as Inventory and Pipelines' + startOffset: 1317 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1317 + endOffset: 1356 +- name: 'Modern Data Stack: ELT, dbt, BigQuery and Orchestration' + startOffset: 1356 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1356 + endOffset: 1474 +- name: 'Operational Differences: Change Velocity, Monitoring, and Tests' + startOffset: 1474 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1474 + endOffset: 1670 +- name: 'Metric Trees & Data Specs: Translating Business Requirements for FinOps' + startOffset: 1670 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1670 + endOffset: 1796 +- name: 'Building a Digital Warehouse: Stack Choices and Open-Source Tools' + startOffset: 1796 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1796 + endOffset: 1900 +- name: 'FinOps Overview: Cloud Cost Optimization for SaaS Platforms' + startOffset: 1900 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1900 + endOffset: 2055 +- name: Vendor Negotiations & Reservation Instances for Cost Savings + startOffset: 2055 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2055 + endOffset: 2171 +- name: 'Cloud Cost Modeling: VM Sizing, Storage Tiers and Multi-Cloud Comparison' + startOffset: 2171 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2171 + endOffset: 2343 +- name: 'Demand Forecasting Analogy: Inventory Planning Applied to Cloud Capacity' + startOffset: 2343 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2343 + endOffset: 2418 +- name: FinOps Foundation, Cost Tagging & Accountability Best Practices + startOffset: 2418 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2418 + endOffset: 2681 +- name: OUCS & Standardizing Cloud Cost Reporting Across AWS/GCP/Azure + startOffset: 2681 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2681 + endOffset: 2777 +- name: 'FinOps Processes: Parallels with DevOps, DataOps and CI/CD' + startOffset: 2777 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2777 + endOffset: 2881 +- name: 'Staff Data Engineer Responsibilities: Technical & Strategic FinOps Work' + startOffset: 2881 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2881 + endOffset: 3025 +- name: 'Continuous Learning: Georgia Tech Master’s, dbt, Python and Applied Analytics' + startOffset: 3025 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3025 + endOffset: 3365 +- name: 'Career Advice: Certifications, Mentorship, Community and Time Management' + startOffset: 3365 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3365 + endOffset: 3572 +- name: Closing Remarks & Key Takeaways + startOffset: 3572 + url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3572 + endOffset: 3594 transcript: - header: Podcast Introduction - line: Let’s get started. This week, we’ll discuss Digital Data Warehousing and FinOps. @@ -971,124 +1077,15 @@ transcript: sec: 3594 time: '59:54' who: Eddy -description: Master FinOps for data engineers—optimize BigQuery costs with dbt, cloud - cost modeling, tagging and forecasting to cut spend and boost pipeline efficiency. -intro: How can data teams optimize cloud costs for analytics without slowing down - delivery? In this episode, Eddy Zulkifly, Staff Data Engineer at Kinaxis, walks through - practical FinOps strategies for data engineers working with the modern data stack. - Eddy brings a decade of experience across Google Cloud, Azure, and AWS, plus prior - roles at Home Depot and ongoing graduate studies at Georgia Tech, and explains how - his background in supply chain and analytics shapes cost-aware engineering.

- We cover building a digital data warehouse using ELT, dbt, BigQuery and orchestration; - operational differences like change velocity, monitoring, and tests; and translating - business needs into metric trees and data specs for FinOps. Eddy breaks down cloud - cost modeling—VM sizing, storage tiers, reservation instances, and multi-cloud comparisons—alongside - cost-tagging, OUCS and standardized reporting across AWS/GCP/Azure. He also shares - vendor negotiation tactics, demand-forecasting analogies for capacity planning, - and the strategic responsibilities of senior data engineers.

Listen to - learn actionable approaches to cloud cost optimization, practical dbt and BigQuery - patterns, and how to embed FinOps practices into your data platform and team workflows. -dateadded: '2025-04-30' -duration: PT00H59M54S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=0 - endOffset: 95 -- name: 'Guest Introduction: Eddy Zulkifly, Staff Data Engineer at Kinaxis' - startOffset: 95 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=95 - endOffset: 134 -- name: 'Career Origins: Industrial Engineering, Supply Chain & Excel Macros' - startOffset: 134 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=134 - endOffset: 380 -- name: 'Career Pivot: From Business Analyst to Data Engineering' - startOffset: 380 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=380 - endOffset: 468 -- name: Analyst Skills as a Foundation for Data Engineering - startOffset: 468 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=468 - endOffset: 486 -- name: 'Docker & Terraform: Learning Curve for Data Practitioners' - startOffset: 486 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=486 - endOffset: 498 -- name: 'Tools & Low-Code Beginnings: Excel, Alteryx, Tableau' - startOffset: 498 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=498 - endOffset: 673 -- name: 'Retail & Warehousing Experience: Forecasting, Preload Optimization' - startOffset: 673 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=673 - endOffset: 1317 -- name: 'Digital Data Warehousing: Data as Inventory and Pipelines' - startOffset: 1317 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1317 - endOffset: 1356 -- name: 'Modern Data Stack: ELT, dbt, BigQuery and Orchestration' - startOffset: 1356 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1356 - endOffset: 1474 -- name: 'Operational Differences: Change Velocity, Monitoring, and Tests' - startOffset: 1474 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1474 - endOffset: 1670 -- name: 'Metric Trees & Data Specs: Translating Business Requirements for FinOps' - startOffset: 1670 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1670 - endOffset: 1796 -- name: 'Building a Digital Warehouse: Stack Choices and Open-Source Tools' - startOffset: 1796 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1796 - endOffset: 1900 -- name: 'FinOps Overview: Cloud Cost Optimization for SaaS Platforms' - startOffset: 1900 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=1900 - endOffset: 2055 -- name: Vendor Negotiations & Reservation Instances for Cost Savings - startOffset: 2055 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2055 - endOffset: 2171 -- name: 'Cloud Cost Modeling: VM Sizing, Storage Tiers and Multi-Cloud Comparison' - startOffset: 2171 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2171 - endOffset: 2343 -- name: 'Demand Forecasting Analogy: Inventory Planning Applied to Cloud Capacity' - startOffset: 2343 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2343 - endOffset: 2418 -- name: FinOps Foundation, Cost Tagging & Accountability Best Practices - startOffset: 2418 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2418 - endOffset: 2681 -- name: OUCS & Standardizing Cloud Cost Reporting Across AWS/GCP/Azure - startOffset: 2681 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2681 - endOffset: 2777 -- name: 'FinOps Processes: Parallels with DevOps, DataOps and CI/CD' - startOffset: 2777 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2777 - endOffset: 2881 -- name: 'Staff Data Engineer Responsibilities: Technical & Strategic FinOps Work' - startOffset: 2881 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=2881 - endOffset: 3025 -- name: 'Continuous Learning: Georgia Tech Master’s, dbt, Python and Applied Analytics' - startOffset: 3025 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3025 - endOffset: 3365 -- name: 'Career Advice: Certifications, Mentorship, Community and Time Management' - startOffset: 3365 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3365 - endOffset: 3572 -- name: Closing Remarks & Key Takeaways - startOffset: 3572 - url: https://www.youtube.com/watch?v=7ePp6wuxM5s&t=3572 - endOffset: 3594 +context: 'Context: Eddy’s journey from industrial engineering and analyst tools to + staff data engineer frames conversations about modern data stacks, digital warehouses, + and FinOps as practical responses to real business problems. Core: The episode’s + unifying idea is that building impactful data systems requires translating domain + and analyst expertise into operational, scalable, and cost-conscious engineering—combining + the right tools (ELT, dbt, cloud platforms), disciplined practices (testing, monitoring, + CI/CD), and FinOps accountability—to deliver trusted metrics, align technical work + with business value, and enable continuous learning and adaptation.' --- - Links: * [Twitter](https://x.com/eddarief){:target="_blank"} diff --git a/_podcast/s09e04-freelancing-and-consulting-with-data-engineering.md b/_podcast/freelance-data-engineering-pricing-and-clients.md similarity index 97% rename from _podcast/s09e04-freelancing-and-consulting-with-data-engineering.md rename to _podcast/freelance-data-engineering-pricing-and-clients.md index 5d12b232..b1246386 100644 --- a/_podcast/s09e04-freelancing-and-consulting-with-data-engineering.md +++ b/_podcast/freelance-data-engineering-pricing-and-clients.md @@ -1,20 +1,150 @@ --- +title: "Freelance Data Engineering Playbook: Pricing, Client Acquisition & Tools" +short: "Freelancing and Consulting with Data Engineering" +season: 9 episode: 4 guests: - adrianbrudaru -date: 2025-11-07 +image: images/podcast/freelance-data-engineering-pricing-and-clients.jpg ids: anchor: Freelancing-and-Consulting-with-Data-Engineering---Adrian-Brudaru-e1jtkkg youtube: 9DTTrN-khCk -image: images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Freelancing-and-Consulting-with-Data-Engineering---Adrian-Brudaru-e1jtkkg apple: https://podcasts.apple.com/us/podcast/freelancing-and-consulting-with-data-engineering/id1541710331?i=1000566841525 spotify: https://open.spotify.com/episode/5M9HFWt8xmqf5HyGu40RmJ?si=C95h0CMkRiazs_ft8Z_gRg youtube: https://www.youtube.com/watch?v=9DTTrN-khCk -season: 9 -short: Freelancing and Consulting with Data Engineering -title: 'Freelance Data Engineering Playbook: Pricing, Client Acquisition & Tools' + +description: "Master freelance data engineering: pricing, client acquisition & tools to negotiate rates, scope projects, build reusable portfolios and win repeat clients." +intro: "How do you price freelance data engineering work, win steady clients, and pick the right tools for messy production problems? In this episode, Adrian Brudaru — an economist-turned-business analyst who moved to Berlin, left corporate/startup cycles to freelance for five years, and now co-founds a data company releasing open source tooling — walks through a practical playbook for freelance data engineers.

We cover pricing models (hourly rates, negotiation, occupancy and income variability), client acquisition (networking, repeat business, recruiters vs. direct contracts, Upwork pros and cons), and scoping techniques (spikes, scope documents, managing expectations). Adrian also digs into technical topics: legacy cleanup, Airflow work, and a data loading tool for volatile schemas and automatic unpacking. Along the way, he explains building a reusable portfolio, transitioning from freelancing to product or investing, working remotely vs. on-site, and how to create opportunities in local markets like Berlin.

Listen to learn concrete approaches to freelance data engineering pricing, client acquisition strategies, scoping projects, and practical tools to handle unstable schemas — so you can evaluate projects, set rates, and grow a sustainable freelance practice." +topics: +- data engineering +- freelance +- career growth +- tools +dateadded: 2022-06-18 +date: 2025-11-07 + +duration: PT01H01M16S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=0 + endOffset: 100 +- name: 'Guest Overview: Adrian’s Move to Freelancing' + startOffset: 100 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=100 + endOffset: 132 +- name: 'Career Journey: Economics & Marketing to Data Engineering' + startOffset: 132 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=132 + endOffset: 214 +- name: 'Becoming a Freelancer: Leaving Corporate Life & First Contract' + startOffset: 214 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=214 + endOffset: 333 +- name: 'Hiring Process Differences: Freelance vs Corporate Engagements' + startOffset: 333 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=333 + endOffset: 426 +- name: Income Variability & Occupancy Rate Explained + startOffset: 426 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=426 + endOffset: 567 +- name: 'Early Challenges: Impostor Syndrome and Team Fit' + startOffset: 567 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=567 + endOffset: 696 +- name: 'First Projects: Legacy Cleanup, Airflow, Data Science & ICO Work' + startOffset: 696 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=696 + endOffset: 908 +- name: 'Shift to Product: Founding a Company and Building a Prototype' + startOffset: 908 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=908 + endOffset: 1092 +- name: 'Pricing Models: Hourly Rates, Negotiation, and Market Ranges' + startOffset: 1092 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1092 + endOffset: 1277 +- name: 'Skill Growth: Learning on the Job and Generalist Advantage' + startOffset: 1277 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1277 + endOffset: 1399 +- name: 'Intermediaries: Recruitment Agencies vs Direct Client Work' + startOffset: 1399 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1399 + endOffset: 1665 +- name: 'Client Acquisition: Networking Strategies and Repeat Business' + startOffset: 1665 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1665 + endOffset: 1903 +- name: 'Scoping Work: Spikes, Scope Documents and Managing Expectations' + startOffset: 1903 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1903 + endOffset: 2101 +- name: 'Networking Tactics: In-Person Meetings and Relationship Building' + startOffset: 2101 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2101 + endOffset: 2385 +- name: 'Freelance Platforms: Upwork Pros, Cons, and Time Valuation' + startOffset: 2385 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2385 + endOffset: 2457 +- name: 'Work Location: Choosing Remote or On-Site Projects' + startOffset: 2457 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2457 + endOffset: 2492 +- name: 'Data Loading Tool: Handling Volatile Schemas & Automatic Unpacking' + startOffset: 2492 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2492 + endOffset: 2668 +- name: 'Transition Paths: From Freelancing to Product or Investing' + startOffset: 2668 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2668 + endOffset: 2777 +- name: 'Reusable Portfolio: Building Demonstrable, Reusable Assets' + startOffset: 2777 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2777 + endOffset: 2937 +- name: 'Personality Fit: Freelancing as an Introvert' + startOffset: 2937 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2937 + endOffset: 2973 +- name: 'Work Flexibility: Feasibility of Working Three Months a Year' + startOffset: 2973 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2973 + endOffset: 3087 +- name: 'Project Selection: Following Interest vs Strategic Skill Building' + startOffset: 3087 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3087 + endOffset: 3142 +- name: 'Assessing Fit: Traits That Predict Freelance Success' + startOffset: 3142 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3142 + endOffset: 3283 +- name: 'Local Market: Berlin Opportunities and How to Create Them' + startOffset: 3283 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3283 + endOffset: 3330 +- name: 'Client Expectations: Proactivity, Ownership, and Outcomes' + startOffset: 3330 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3330 + endOffset: 3443 +- name: 'Multiple Clients: Balancing Focus, Risk, and Side Gigs' + startOffset: 3443 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3443 + endOffset: 3549 +- name: 'Freelance Cooperative: Slack Group for Collaboration & Referrals' + startOffset: 3549 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3549 + endOffset: 3700 +- name: 'Contact & Resources: LinkedIn, GitHub, Slack Invite and Next Steps' + startOffset: 3700 + url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3700 + endOffset: 3676 + transcript: - header: 'Guest Overview: Adrian’s Move to Freelancing' - line: This week we'll talk about freelancing in data engineering. We have a special @@ -803,7 +933,7 @@ transcript: sec: 2389 time: '39:49' who: Adrian -- header: 'Work Location: Choosing Remote or On‑Site Projects' +- header: 'Work Location: Choosing Remote or On-Site Projects' - line: When you work with your clients, do you normally choose if you want to work remotely or on-site, or is it up to the client? sec: 2457 @@ -1274,143 +1404,6 @@ transcript: sec: 3776 time: '1:02:56' who: Adrian -intro: 'How do you price freelance data engineering work, win steady clients, and - pick the right tools for messy production problems? In this episode, Adrian Brudaru - — an economist-turned-business analyst who moved to Berlin, left corporate/startup - cycles to freelance for five years, and now co-founds a data company releasing open - source tooling — walks through a practical playbook for freelance data engineers. -

We cover pricing models (hourly rates, negotiation, occupancy and income - variability), client acquisition (networking, repeat business, recruiters vs. direct - contracts, Upwork pros and cons), and scoping techniques (spikes, scope documents, - managing expectations). Adrian also digs into technical topics: legacy cleanup, - Airflow work, and a data loading tool for volatile schemas and automatic unpacking. - Along the way, he explains building a reusable portfolio, transitioning from freelancing - to product or investing, working remotely vs. on-site, and how to create opportunities - in local markets like Berlin.

Listen to learn concrete approaches to freelance - data engineering pricing, client acquisition strategies, scoping projects, and practical - tools to handle unstable schemas — so you can evaluate projects, set rates, and - grow a sustainable freelance practice.' -description: 'Master freelance data engineering: pricing, client acquisition & tools - to negotiate rates, scope projects, build reusable portfolios and win repeat clients.' -dateadded: '2022-06-18' -duration: PT01H01M16S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=0 - endOffset: 100 -- name: 'Guest Overview: Adrian’s Move to Freelancing' - startOffset: 100 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=100 - endOffset: 132 -- name: 'Career Journey: Economics & Marketing to Data Engineering' - startOffset: 132 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=132 - endOffset: 214 -- name: 'Becoming a Freelancer: Leaving Corporate Life & First Contract' - startOffset: 214 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=214 - endOffset: 333 -- name: 'Hiring Process Differences: Freelance vs Corporate Engagements' - startOffset: 333 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=333 - endOffset: 426 -- name: Income Variability & Occupancy Rate Explained - startOffset: 426 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=426 - endOffset: 567 -- name: 'Early Challenges: Impostor Syndrome and Team Fit' - startOffset: 567 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=567 - endOffset: 696 -- name: 'First Projects: Legacy Cleanup, Airflow, Data Science & ICO Work' - startOffset: 696 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=696 - endOffset: 908 -- name: 'Shift to Product: Founding a Company and Building a Prototype' - startOffset: 908 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=908 - endOffset: 1092 -- name: 'Pricing Models: Hourly Rates, Negotiation, and Market Ranges' - startOffset: 1092 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1092 - endOffset: 1277 -- name: 'Skill Growth: Learning on the Job and Generalist Advantage' - startOffset: 1277 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1277 - endOffset: 1399 -- name: 'Intermediaries: Recruitment Agencies vs Direct Client Work' - startOffset: 1399 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1399 - endOffset: 1665 -- name: 'Client Acquisition: Networking Strategies and Repeat Business' - startOffset: 1665 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1665 - endOffset: 1903 -- name: 'Scoping Work: Spikes, Scope Documents and Managing Expectations' - startOffset: 1903 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=1903 - endOffset: 2101 -- name: 'Networking Tactics: In-Person Meetings and Relationship Building' - startOffset: 2101 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2101 - endOffset: 2385 -- name: 'Freelance Platforms: Upwork Pros, Cons, and Time Valuation' - startOffset: 2385 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2385 - endOffset: 2457 -- name: 'Work Location: Choosing Remote or On‑Site Projects' - startOffset: 2457 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2457 - endOffset: 2492 -- name: 'Data Loading Tool: Handling Volatile Schemas & Automatic Unpacking' - startOffset: 2492 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2492 - endOffset: 2668 -- name: 'Transition Paths: From Freelancing to Product or Investing' - startOffset: 2668 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2668 - endOffset: 2777 -- name: 'Reusable Portfolio: Building Demonstrable, Reusable Assets' - startOffset: 2777 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2777 - endOffset: 2937 -- name: 'Personality Fit: Freelancing as an Introvert' - startOffset: 2937 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2937 - endOffset: 2973 -- name: 'Work Flexibility: Feasibility of Working Three Months a Year' - startOffset: 2973 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=2973 - endOffset: 3087 -- name: 'Project Selection: Following Interest vs Strategic Skill Building' - startOffset: 3087 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3087 - endOffset: 3142 -- name: 'Assessing Fit: Traits That Predict Freelance Success' - startOffset: 3142 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3142 - endOffset: 3283 -- name: 'Local Market: Berlin Opportunities and How to Create Them' - startOffset: 3283 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3283 - endOffset: 3330 -- name: 'Client Expectations: Proactivity, Ownership, and Outcomes' - startOffset: 3330 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3330 - endOffset: 3443 -- name: 'Multiple Clients: Balancing Focus, Risk, and Side Gigs' - startOffset: 3443 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3443 - endOffset: 3549 -- name: 'Freelance Cooperative: Slack Group for Collaboration & Referrals' - startOffset: 3549 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3549 - endOffset: 3700 -- name: 'Contact & Resources: LinkedIn, GitHub, Slack Invite and Next Steps' - startOffset: 3700 - url: https://www.youtube.com/watch?v=9DTTrN-khCk&t=3700 - endOffset: 3676 --- Links: diff --git a/_podcast/s04e08-freelancing.md b/_podcast/freelancing-in-machine-learning.md similarity index 96% rename from _podcast/s04e08-freelancing.md rename to _podcast/freelancing-in-machine-learning.md index 361aba68..8a592776 100644 --- a/_podcast/s04e08-freelancing.md +++ b/_podcast/freelancing-in-machine-learning.md @@ -1,20 +1,11 @@ --- -title: Freelancing in Machine Learning -short: Freelancing in Machine Learning -guests: -- mikiobraun -image: images/podcast/s04e08-freelancing.jpg -topics: -- freelance -- consulting -- machine learning -- career growth -- entrepreneurship -- remote work -- business strategy -date: 2025-11-07 +title: "Freelancing in Machine Learning: Pricing, Client Acquisition & Proposals" +short: "Freelancing in Machine Learning" season: 4 episode: 8 +guests: +- mikiobraun +image: images/podcast/freelancing-in-machine-learning.jpg ids: youtube: HfF791e0HR8 anchor: Freelancing-in-Machine-Learning---Mikio-Braun-e166n7r @@ -23,6 +14,140 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Freelancing-in-Machine-Learning---Mikio-Braun-e166n7r spotify: https://open.spotify.com/episode/2oE13mUEa9k4AO5qogYdqv apple: https://podcasts.apple.com/us/podcast/freelancing-in-machine-learning-mikio-braun/id1541710331?i=1000532612872 + +description: "Learn freelancing in machine learning: pricing, client acquisition, and proposals to win ML consulting gigs, scale sustainably, and secure steady income" +intro: "How do you move from academic research or in-house ML engineering to a sustainable freelance career in machine learning — getting clients, pricing your work, and delivering production systems? In this episode, Mikio Braun, who transitioned from TU Berlin into ML roles at Zalando and GetYourGuide and now consults on machine learning production, infrastructure, and teams, walks through that path step by step.

We cover the practical parts of freelancing in machine learning: launching first clients, sourcing leads through network and referrals, and demand generation with LinkedIn, talks, and podcasts; pre-sales tactics like free intro calls, problem discovery, and clear proposals; pricing models and rate-setting strategies; financial planning, capacity management, and avoiding burnout; plus specialization, productizing consulting, and scaling options (agency, product, or return to employment). The episode also addresses administrative essentials for freelancers in Germany (registration, VAT, payments), accounting choices, professional liability, and how to compete in a global remote market. Listen for concrete advice on client-finding, scope discipline, and deliverables so you can evaluate whether freelancing in machine learning is the right next step and how to start with a safety net." +topics: +- freelance +- consulting +- machine learning +- career growth +- entrepreneurship +- strategy +dateadded: 2021-08-20 +date: 2025-11-07 + +duration: PT01H01M48S + +quotableClips: +- name: Episode Introduction & Topic Overview (Freelancing in Machine Learning) + startOffset: 0 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=0 + endOffset: 119 +- name: Guest Background — Academic Research to Industry Roles (TU Berlin → Zalando + → GetYourGuide) + startOffset: 119 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=119 + endOffset: 373 +- name: Consulting Scope — Advising on ML Production, Infrastructure, and Teams + startOffset: 373 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=373 + endOffset: 473 +- name: Freelance Launch — First Clients and Early Momentum + startOffset: 473 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=473 + endOffset: 527 +- name: Client Lead Sources — Network, Referrals, and Direct Outreach + startOffset: 527 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=527 + endOffset: 608 +- name: Personal Branding & Demand Generation (LinkedIn, Talks, Podcasts) + startOffset: 608 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=608 + endOffset: 928 +- name: Networking Tactics — Coffee Chats, Lunchclub, and Meetups + startOffset: 928 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=928 + endOffset: 1149 +- name: Intro Calls & Pre-sales — Free Meetings, Qualification, and Trust Building + startOffset: 1149 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1149 + endOffset: 1297 +- name: Problem Discovery — Diagnosing Needs vs. Prescribed Solutions + startOffset: 1297 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1297 + endOffset: 1338 +- name: Proposal Essentials — Written Summaries, Scope Alignment, and Signoff + startOffset: 1338 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1338 + endOffset: 1432 +- name: Pricing Models — Hourly, Fixed-Price, and Value-Based Tradeoffs + startOffset: 1432 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1432 + endOffset: 1777 +- name: Rate Setting Strategies — Negotiation, Risk, and Client Concerns + startOffset: 1777 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1777 + endOffset: 1912 +- name: Financial Planning — Vacation, Risk Buffer, and Expected Income + startOffset: 1912 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1912 + endOffset: 2038 +- name: Workload Management — Capacity Planning, Calendars, and Burnout Prevention + startOffset: 2038 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2038 + endOffset: 2171 +- name: Specialization Strategy — Niches, Productizing Consulting, and Predictability + startOffset: 2171 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2171 + endOffset: 2326 +- name: Client Workflow — Managing Multiple Clients and Daily Rhythms + startOffset: 2326 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2326 + endOffset: 2424 +- name: Income Comparison — Freelance Earnings vs. Full-Time Salary + startOffset: 2424 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2424 + endOffset: 2479 +- name: Freelance Tradeoffs — Freedom, Overhead, and Side Projects + startOffset: 2479 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2479 + endOffset: 2660 +- name: Capacity Decisions — Accepting, Delaying, or Declining New Projects + startOffset: 2660 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2660 + endOffset: 2715 +- name: Scaling Paths — Building an Agency, Launching a Product, or Rejoining Employment + startOffset: 2715 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2715 + endOffset: 2907 +- name: Deliverables & Outcomes — Mentoring, Workshops, Prototypes, and Team Results + startOffset: 2907 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2907 + endOffset: 3017 +- name: Skill Growth as a Freelancer — Stretch Assignments and Learning Safely + startOffset: 3017 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3017 + endOffset: 3165 +- name: Client-Finding Lessons — Clarity in Writing and Scope Discipline + startOffset: 3165 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3165 + endOffset: 3210 +- name: Administrative Setup in Germany — Freelance Registration, VAT, and Payments + startOffset: 3210 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3210 + endOffset: 3261 +- name: Accounting Choices — DIY Taxes vs. Hiring a Tax Advisor + startOffset: 3261 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3261 + endOffset: 3422 +- name: Professional Liability — Insurance, GDPR, and Contractual Safeguards + startOffset: 3422 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3422 + endOffset: 3539 +- name: Global Market Dynamics — Remote Work, Competition, and Differentiation + startOffset: 3539 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3539 + endOffset: 3662 +- name: Starter Advice — Trying Freelancing with a Safety Net + startOffset: 3662 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3662 + endOffset: 3792 +- name: Closing Remarks & Contact Information (LinkedIn, Twitter @mikiobraun) + startOffset: 3792 + url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3792 + endOffset: 3708 + transcript: - line: This week, we'll talk about freelancing and consulting. And we have a special guest today — Mikio. Actually, Mikio was my teacher six years ago. I was studying @@ -352,7 +477,7 @@ transcript: sec: 1142 time: '19:02' who: Mikio -- header: Intro Calls & Pre‑sales — Free Meetings, Qualification, and Trust Building +- header: Intro Calls & Pre-sales — Free Meetings, Qualification, and Trust Building - line: The other thing you said, "Now you get a client. The interesting part is what happens next". First, you have this intro call. You figure out if you can help the client. Right? How does it look like? Let's say you get a LinkedIn message @@ -441,7 +566,7 @@ transcript: sec: 1420 time: '23:40' who: Mikio -- header: Pricing Models — Hourly, Fixed‑Price, and Value‑Based Tradeoffs +- header: Pricing Models — Hourly, Fixed-Price, and Value-Based Tradeoffs - line: We already have a question here. How to decide on this rate? The question is about the daily rate. We had some prior chats about this, you also mentioned something about a trade-off between pay per day or per hour or per project. Maybe @@ -748,7 +873,7 @@ transcript: sec: 2423 time: '40:23' who: Alexey -- header: Income Comparison — Freelance Earnings vs. Full‑Time Salary +- header: Income Comparison — Freelance Earnings vs. Full-Time Salary - line: Yeah. I do normal 10-to-6 days. But not all of that time is billed. The actual client hours make maybe up to half of it. And then there's other stuff like, like this [podcast], or working on talks, or just learning something. @@ -1004,7 +1129,7 @@ transcript: sec: 3156 time: '52:36' who: Mikio -- header: Client‑Finding Lessons — Clarity in Writing and Scope Discipline +- header: Client-Finding Lessons — Clarity in Writing and Scope Discipline - line: We have quite a few questions. This one is very interesting. What is your most relevant learning when finding clients? What did you learn from this process of finding clients? @@ -1270,144 +1395,6 @@ transcript: sec: 3827 time: '1:03:47' who: Alexey -intro: 'How do you move from academic research or in‑house ML engineering to a sustainable - freelance career in machine learning — getting clients, pricing your work, and delivering - production systems? In this episode, Mikio Braun, who transitioned from TU Berlin - into ML roles at Zalando and GetYourGuide and now consults on machine learning production, - infrastructure, and teams, walks through that path step by step.

We cover - the practical parts of freelancing in machine learning: launching first clients, - sourcing leads through network and referrals, and demand generation with LinkedIn, - talks, and podcasts; pre‑sales tactics like free intro calls, problem discovery, - and clear proposals; pricing models and rate‑setting strategies; financial planning, - capacity management, and avoiding burnout; plus specialization, productizing consulting, - and scaling options (agency, product, or return to employment). The episode also - addresses administrative essentials for freelancers in Germany (registration, VAT, - payments), accounting choices, professional liability, and how to compete in a global - remote market. Listen for concrete advice on client‑finding, scope discipline, and - deliverables so you can evaluate whether freelancing in machine learning is the - right next step and how to start with a safety net.' -description: 'Learn freelancing in machine learning: pricing, client acquisition, - and proposals to win ML consulting gigs, scale sustainably, and secure steady income' -dateadded: '2021-08-20' -duration: PT01H01M48S -quotableClips: -- name: Episode Introduction & Topic Overview (Freelancing in Machine Learning) - startOffset: 0 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=0 - endOffset: 119 -- name: Guest Background — Academic Research to Industry Roles (TU Berlin → Zalando - → GetYourGuide) - startOffset: 119 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=119 - endOffset: 373 -- name: Consulting Scope — Advising on ML Production, Infrastructure, and Teams - startOffset: 373 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=373 - endOffset: 473 -- name: Freelance Launch — First Clients and Early Momentum - startOffset: 473 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=473 - endOffset: 527 -- name: Client Lead Sources — Network, Referrals, and Direct Outreach - startOffset: 527 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=527 - endOffset: 608 -- name: Personal Branding & Demand Generation (LinkedIn, Talks, Podcasts) - startOffset: 608 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=608 - endOffset: 928 -- name: Networking Tactics — Coffee Chats, Lunchclub, and Meetups - startOffset: 928 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=928 - endOffset: 1149 -- name: Intro Calls & Pre‑sales — Free Meetings, Qualification, and Trust Building - startOffset: 1149 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1149 - endOffset: 1297 -- name: Problem Discovery — Diagnosing Needs vs. Prescribed Solutions - startOffset: 1297 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1297 - endOffset: 1338 -- name: Proposal Essentials — Written Summaries, Scope Alignment, and Signoff - startOffset: 1338 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1338 - endOffset: 1432 -- name: Pricing Models — Hourly, Fixed‑Price, and Value‑Based Tradeoffs - startOffset: 1432 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1432 - endOffset: 1777 -- name: Rate Setting Strategies — Negotiation, Risk, and Client Concerns - startOffset: 1777 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1777 - endOffset: 1912 -- name: Financial Planning — Vacation, Risk Buffer, and Expected Income - startOffset: 1912 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=1912 - endOffset: 2038 -- name: Workload Management — Capacity Planning, Calendars, and Burnout Prevention - startOffset: 2038 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2038 - endOffset: 2171 -- name: Specialization Strategy — Niches, Productizing Consulting, and Predictability - startOffset: 2171 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2171 - endOffset: 2326 -- name: Client Workflow — Managing Multiple Clients and Daily Rhythms - startOffset: 2326 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2326 - endOffset: 2424 -- name: Income Comparison — Freelance Earnings vs. Full‑Time Salary - startOffset: 2424 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2424 - endOffset: 2479 -- name: Freelance Tradeoffs — Freedom, Overhead, and Side Projects - startOffset: 2479 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2479 - endOffset: 2660 -- name: Capacity Decisions — Accepting, Delaying, or Declining New Projects - startOffset: 2660 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2660 - endOffset: 2715 -- name: Scaling Paths — Building an Agency, Launching a Product, or Rejoining Employment - startOffset: 2715 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2715 - endOffset: 2907 -- name: Deliverables & Outcomes — Mentoring, Workshops, Prototypes, and Team Results - startOffset: 2907 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=2907 - endOffset: 3017 -- name: Skill Growth as a Freelancer — Stretch Assignments and Learning Safely - startOffset: 3017 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3017 - endOffset: 3165 -- name: Client‑Finding Lessons — Clarity in Writing and Scope Discipline - startOffset: 3165 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3165 - endOffset: 3210 -- name: Administrative Setup in Germany — Freelance Registration, VAT, and Payments - startOffset: 3210 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3210 - endOffset: 3261 -- name: Accounting Choices — DIY Taxes vs. Hiring a Tax Advisor - startOffset: 3261 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3261 - endOffset: 3422 -- name: Professional Liability — Insurance, GDPR, and Contractual Safeguards - startOffset: 3422 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3422 - endOffset: 3539 -- name: Global Market Dynamics — Remote Work, Competition, and Differentiation - startOffset: 3539 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3539 - endOffset: 3662 -- name: Starter Advice — Trying Freelancing with a Safety Net - startOffset: 3662 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3662 - endOffset: 3792 -- name: Closing Remarks & Contact Information (LinkedIn, Twitter @mikiobraun) - startOffset: 3792 - url: https://www.youtube.com/watch?v=HfF791e0HR8&t=3792 - endOffset: 3708 --- Books: diff --git a/_podcast/s12e09-staff-ai-engineer.md b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md similarity index 97% rename from _podcast/s12e09-staff-ai-engineer.md rename to _podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md index 2fc82655..10f1af46 100644 --- a/_podcast/s12e09-staff-ai-engineer.md +++ b/_podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.md @@ -1,19 +1,139 @@ --- +title: "Transitioning from Academia to Industry as a Staff AI Engineer: Interview Prep, MLOps & Onboarding" +short: "Transitioning from Academia to Industry as a Staff AI Engineer" +season: 12 episode: 9 guests: - tatianagabruseva +image: images/podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg ids: anchor: Staff-AI-Engineer---Tatiana-Gabruseva-e1v3on7 youtube: _xr1_xb736E -image: images/podcast/s12e09-staff-ai-engineer.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Staff-AI-Engineer---Tatiana-Gabruseva-e1v3on7 apple: https://podcasts.apple.com/us/podcast/staff-ai-engineer-tatiana-gabruseva/id1541710331?i=1000600246792 spotify: https://open.spotify.com/episode/4o52jMRR2cctCD8LuFFLdD?si=tBO_9KkiSWySHu7jaM-McQ youtube: https://www.youtube.com/watch?v=_xr1_xb736E -season: 12 -short: Staff AI Engineer -title: 'Staff AI Engineer: From Academia to Industry — Interview Prep, MLOps & Onboarding' + +description: "Discover Staff AI Engineer interview prep, MLOps & onboarding tactics to transition from academia—coding strategies, system design, mentorship for impact" +intro: "How do you transition from academia into a Staff AI Engineer role while nailing interview prep, MLOps, and onboarding? In this episode, Tatiana Gabruseva — a computer vision/deep learning engineer, Kaggle Competitions Master, and Senior ML Engineer at Cork University Hospital — walks through her shift from physics and healthcare research into industry engineering leadership.

We cover practical, concrete topics listeners can use: the onboarding shock she experienced at LinkedIn and how to prioritize learning; ramping up production stacks (Scala, Spark, Kubernetes); the Staff AI Engineer remit of opinion-setting, cross-functional influence, and strategy; and staff engineer archetypes (deep specialist, cross-team advisor, hands-on mentor). Tatiana explains how to translate academic leadership, grants, and research into industry roadmaps, how mentorship accelerates onboarding, and how to convince employers with applied projects and collaborations.

For candidates she shares interview prep tactics — LeetCode coding plans, ML and system design prep, mock interviews, networking and referrals, and reframing rejections — plus real-world involvement in MLOps, ETL pipelines, and heavy code review. Tune in to learn actionable steps for moving from academia to a staff engineering role and succeeding in interviews, onboarding, and production ML." +topics: +- machine learning +- career transition +- MLOps +- staff AI engineer +- career growth +dateadded: 2023-02-18 + +duration: PT00H59M23S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=0 + endOffset: 71 +- name: Episode kickoff and guest reintroduction + startOffset: 71 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=71 + endOffset: 113 +- name: 'Guest background: physics → healthcare → machine learning' + startOffset: 113 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=113 + endOffset: 204 +- name: Onboarding shock at LinkedIn and industry mindset shift + startOffset: 204 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=204 + endOffset: 343 +- name: Ramping up technical stack as a tech lead (Scala, Spark, Kubernetes) + startOffset: 343 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=343 + endOffset: 450 +- name: 'Staff AI Engineer role: opinion, strategy, and cross-functional influence' + startOffset: 450 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=450 + endOffset: 664 +- name: 'Staff engineer archetypes: deep specialist, cross-team advisor, hands-on + mentor' + startOffset: 664 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=664 + endOffset: 881 +- name: Transferring academic skills to industry leadership and roadmapping + startOffset: 881 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=881 + endOffset: 1007 +- name: 'Onboarding priorities: common mistakes and faster learning' + startOffset: 1007 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1007 + endOffset: 1065 +- name: Mentorship importance for onboarding and career growth + startOffset: 1065 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1065 + endOffset: 1148 +- name: 'Skipping mid-level roles: landing a staff position from academia' + startOffset: 1148 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1148 + endOffset: 1286 +- name: Translating research leadership and grants experience to industry impact + startOffset: 1286 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1286 + endOffset: 1530 +- name: 'Convincing employers: framing applied projects and industry collaborations' + startOffset: 1530 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1530 + endOffset: 1705 +- name: 'Interview journey: early failures, coding gaps, and commitment to prep' + startOffset: 1705 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1705 + endOffset: 1781 +- name: Referrals and networking influence on hiring outcomes + startOffset: 1781 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1781 + endOffset: 1928 +- name: Reframing rejections as learning opportunities + startOffset: 1928 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1928 + endOffset: 2080 +- name: 'Coding interview strategy: LeetCode plan, timeline, and persistence' + startOffset: 2080 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2080 + endOffset: 2384 +- name: 'ML design interviews: physics-style decomposition, blogs, and mock practice' + startOffset: 2384 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2384 + endOffset: 2616 +- name: 'System design prep: Grokking, mock interviews, and quick study tactics' + startOffset: 2616 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2616 + endOffset: 2923 +- name: Mock interviews and building a mentor network + startOffset: 2923 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2923 + endOffset: 3070 +- name: Staff involvement in MLOps, ETL, pipelines, and data team collaboration + startOffset: 3070 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3070 + endOffset: 3139 +- name: Managing heavy code review load and context switching across projects + startOffset: 3139 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3139 + endOffset: 3253 +- name: Advice for academics aiming for staff roles in industry + startOffset: 3253 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3253 + endOffset: 3460 +- name: 'Excitement of AI work: generative models, R&D freedom, and measurable impact' + startOffset: 3460 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3460 + endOffset: 3585 +- name: 'Recommended books: communication, staff engineering, and leadership' + startOffset: 3585 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3585 + endOffset: 3634 +- name: Episode closing and final thanks + startOffset: 3634 + url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3634 + endOffset: 3563 + transcript: - header: Podcast Introduction - header: Episode kickoff and guest reintroduction @@ -1083,133 +1203,6 @@ transcript: sec: 3634 time: '1:00:34' who: Alexey -description: Discover Staff AI Engineer interview prep, MLOps & onboarding tactics - to transition from academia—coding strategies, system design, mentorship for impact. -intro: 'How do you transition from academia into a Staff AI Engineer role while nailing - interview prep, MLOps, and onboarding? In this episode, Tatiana Gabruseva — a computer - vision/deep learning engineer, Kaggle Competitions Master, and Senior ML Engineer - at Cork University Hospital — walks through her shift from physics and healthcare - research into industry engineering leadership.

We cover practical, concrete - topics listeners can use: the onboarding shock she experienced at LinkedIn and how - to prioritize learning; ramping up production stacks (Scala, Spark, Kubernetes); - the Staff AI Engineer remit of opinion-setting, cross-functional influence, and - strategy; and staff engineer archetypes (deep specialist, cross-team advisor, hands-on - mentor). Tatiana explains how to translate academic leadership, grants, and research - into industry roadmaps, how mentorship accelerates onboarding, and how to convince - employers with applied projects and collaborations.

For candidates she - shares interview prep tactics — LeetCode coding plans, ML and system design prep, - mock interviews, networking and referrals, and reframing rejections — plus real-world - involvement in MLOps, ETL pipelines, and heavy code review. Tune in to learn actionable - steps for moving from academia to a staff engineering role and succeeding in interviews, - onboarding, and production ML.' -dateadded: '2023-02-18' -duration: PT00H59M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=0 - endOffset: 71 -- name: Episode kickoff and guest reintroduction - startOffset: 71 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=71 - endOffset: 113 -- name: 'Guest background: physics → healthcare → machine learning' - startOffset: 113 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=113 - endOffset: 204 -- name: Onboarding shock at LinkedIn and industry mindset shift - startOffset: 204 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=204 - endOffset: 343 -- name: Ramping up technical stack as a tech lead (Scala, Spark, Kubernetes) - startOffset: 343 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=343 - endOffset: 450 -- name: 'Staff AI Engineer role: opinion, strategy, and cross-functional influence' - startOffset: 450 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=450 - endOffset: 664 -- name: 'Staff engineer archetypes: deep specialist, cross-team advisor, hands-on - mentor' - startOffset: 664 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=664 - endOffset: 881 -- name: Transferring academic skills to industry leadership and roadmapping - startOffset: 881 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=881 - endOffset: 1007 -- name: 'Onboarding priorities: common mistakes and faster learning' - startOffset: 1007 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1007 - endOffset: 1065 -- name: Mentorship importance for onboarding and career growth - startOffset: 1065 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1065 - endOffset: 1148 -- name: 'Skipping mid-level roles: landing a staff position from academia' - startOffset: 1148 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1148 - endOffset: 1286 -- name: Translating research leadership and grants experience to industry impact - startOffset: 1286 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1286 - endOffset: 1530 -- name: 'Convincing employers: framing applied projects and industry collaborations' - startOffset: 1530 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1530 - endOffset: 1705 -- name: 'Interview journey: early failures, coding gaps, and commitment to prep' - startOffset: 1705 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1705 - endOffset: 1781 -- name: Referrals and networking influence on hiring outcomes - startOffset: 1781 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1781 - endOffset: 1928 -- name: Reframing rejections as learning opportunities - startOffset: 1928 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=1928 - endOffset: 2080 -- name: 'Coding interview strategy: LeetCode plan, timeline, and persistence' - startOffset: 2080 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2080 - endOffset: 2384 -- name: 'ML design interviews: physics-style decomposition, blogs, and mock practice' - startOffset: 2384 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2384 - endOffset: 2616 -- name: 'System design prep: Grokking, mock interviews, and quick study tactics' - startOffset: 2616 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2616 - endOffset: 2923 -- name: Mock interviews and building a mentor network - startOffset: 2923 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=2923 - endOffset: 3070 -- name: Staff involvement in MLOps, ETL, pipelines, and data team collaboration - startOffset: 3070 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3070 - endOffset: 3139 -- name: Managing heavy code review load and context switching across projects - startOffset: 3139 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3139 - endOffset: 3253 -- name: Advice for academics aiming for staff roles in industry - startOffset: 3253 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3253 - endOffset: 3460 -- name: 'Excitement of AI work: generative models, R&D freedom, and measurable impact' - startOffset: 3460 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3460 - endOffset: 3585 -- name: 'Recommended books: communication, staff engineering, and leadership' - startOffset: 3585 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3585 - endOffset: 3634 -- name: Episode closing and final thanks - startOffset: 3634 - url: https://www.youtube.com/watch?v=_xr1_xb736E&t=3634 - endOffset: 3563 --- Links: diff --git a/_podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md b/_podcast/from-academic-research-to-data-engineering-freelancing.md similarity index 93% rename from _podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md rename to _podcast/from-academic-research-to-data-engineering-freelancing.md index d95104a2..0fe670d8 100644 --- a/_podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.md +++ b/_podcast/from-academic-research-to-data-engineering-freelancing.md @@ -1,20 +1,120 @@ --- +title: "From Academic Research to Lean Data Consulting: MVP Strategy, Problem-First Thinking & Freelance Practice Building" +short: "From Simulation Algorithms to Production-Grade Data Systems" +season: 21 episode: 1 guests: - orellgarten +image: images/podcast/from-academic-research-to-data-engineering-freelancing.jpg ids: anchor: datatalksclub/episodes/From-Simulations-to-Freelance-Data-Engineering-Orells-Journey-Out-of-Academia-and-Into-Consulting---Orell-Garten-e369a6b youtube: pkcpH5N-GP8 -image: images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Simulations-to-Freelance-Data-Engineering-Orells-Journey-Out-of-Academia-and-Into-Consulting---Orell-Garten-e369a6b apple: https://podcasts.apple.com/us/podcast/from-simulations-to-freelance-data-engineering-orells/id1541710331?i=1000720245457 spotify: https://open.spotify.com/episode/5HCSIO0mO8Pr5Yv9puZ72R youtube: https://www.youtube.com/watch?v=pkcpH5N-GP8 -season: 21 -short: From Simulation Algorithms to Production-Grade Data Systems -title: 'Synthetic Medical Imaging Data for AI: Startup Data Engineering, MVPs & Freelance - Transition' +description: "Learn lean data, MVP strategy, and problem-first thinking to build a freelance consulting practice—turn research into actionable services and win clients fast." +topics: +- data engineering +- academia +- AI +- MLOps +- computer vision +- freelance +- career transition +intro: "How do you turn academic research and simulation expertise into a lean data consulting practice without getting bogged down in perfect solutions? In this episode we talk with Orell Garten, an electrical engineering graduate who focused on simulation algorithms, left a PhD during COVID, and learned through a government-funded startup program how to translate scientific research into real products.

Orell breaks down problem-first thinking, MVP strategy for data and simulation projects, and the practical steps involved in freelance practice building after academia. We explore how to apply rigorous simulation methods to client problems, prioritize minimal viable products over perfection, and navigate the transition from lab-based research to lean data consulting.

Listeners will come away with a clearer framework for deciding what to build first, how to validate assumptions with lightweight experiments, and how to position technical skills for consulting engagements. This episode is for researchers and engineers considering freelance work, consultants refining their MVP approach, and anyone interested in applying simulation methods and problem-first thinking to deliver practical data-driven solutions." +dateadded: 2025-08-05 +duration: PT01H03M31S +quotableClips: +- name: Episode Introduction & Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=0 + endOffset: 139 +- name: 'Career Background: Electrical Engineering and Simulation Algorithms' + startOffset: 139 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=139 + endOffset: 196 +- name: Transition Out of Academia During COVID + startOffset: 196 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=196 + endOffset: 282 +- name: 'Simulation Research: RF and Wave Propagation Modeling' + startOffset: 282 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=282 + endOffset: 544 +- name: 'Startup Pivot: Synthetic Medical Imaging Data for AI' + startOffset: 544 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=544 + endOffset: 582 +- name: 'Go-to-Market Lesson: Problem-First vs Technology-First' + startOffset: 582 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=582 + endOffset: 800 +- name: 'Early Data Engineering Practice: Minimal Viable Data Work' + startOffset: 800 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=800 + endOffset: 861 +- name: Simulation-HPC Integration and Secure Data Management + startOffset: 861 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=861 + endOffset: 965 +- name: 'Iteration Differences: Academia vs. Startup Timelines' + startOffset: 965 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=965 + endOffset: 1075 +- name: Scientific Method in Product Discovery and Hypothesis Testing + startOffset: 1075 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1075 + endOffset: 1174 +- name: 'Freelance Launch: From CTO Role to Consulting via LinkedIn' + startOffset: 1174 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1174 + endOffset: 1379 +- name: 'Prototype Delivery: IoT Data Engineering Proof of Concept' + startOffset: 1379 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1379 + endOffset: 1533 +- name: 'Freelance Risks: Runway, Cashflow, and Operating Expenses' + startOffset: 1533 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1533 + endOffset: 1850 +- name: 'Client Acquisition: Networking, Recruiters, and Referrals' + startOffset: 1850 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1850 + endOffset: 2062 +- name: 'Specialization: Industrial Data Integration and Custom ETL' + startOffset: 2062 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2062 + endOffset: 2340 +- name: 'MVP Workflow: Manual Extraction, CSVs, and Local Analysis' + startOffset: 2340 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2340 + endOffset: 2607 +- name: 'Preventing Overengineering: Weekly Feedback and Iteration' + startOffset: 2607 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2607 + endOffset: 2999 +- name: 'Continuous Learning: Practical Experiments and DuckDB' + startOffset: 2999 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2999 + endOffset: 3222 +- name: 'LLMs for Data Cleaning: Domain Knowledge Limitations' + startOffset: 3222 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3222 + endOffset: 3509 +- name: 'Tech Stack & Systems Thinking: Python, C++, DBT, Docker' + startOffset: 3509 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3509 + endOffset: 3653 +- name: 'Manual Data Exploration: Handling Edge Cases Before Automation' + startOffset: 3653 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3653 + endOffset: 3811 +- name: Closing Remarks and Freelancing Advice + startOffset: 3811 + url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3811 + endOffset: 3811 transcript: - header: Episode Introduction & Overview - line: This week, we'll talk about many different things. We will discuss our guest’s @@ -783,117 +883,18 @@ transcript: sec: 3811 time: '1:03:31' who: Alexey -description: 'Learn synthetic medical imaging & data engineering: build MVPs, integrate - simulation-HPC, optimize ETL, and shift to freelance with client-acquisition tactics.' -intro: 'How do you turn simulation research into usable synthetic medical imaging - data for AI, build a minimal viable data pipeline, and pivot into freelance consulting? - In this episode, Orell Garten — an electrical engineer trained in simulation algorithms - who left a PhD during COVID and explored productization through a government-funded - startup program — walks through that journey. We cover his simulation work in RF - and wave propagation, the startup pivot to synthetic medical imaging data for AI, - and the go-to-market lesson of problem-first versus technology-first.

Listen - for practical data engineering guidance: minimal viable data work, simulation–HPC - integration, secure data management, and an MVP workflow built on manual extraction, - CSVs, and local analysis. Orell also discusses scientific-method product discovery, - preventing overengineering with weekly feedback, and tool choices (Python, C++, - DBT, Docker, DuckDB). He explains launching a freelance practice via LinkedIn, prototype - delivery for IoT data engineering, client acquisition, and managing runway and cashflow. - If you’re building synthetic data pipelines, medical imaging datasets, or transitioning - to freelance data engineering, this episode delivers concrete tactics, risks to - plan for, and hands-on techniques you can apply immediately.' -dateadded: '2025-08-05' -duration: PT01H03M31S -quotableClips: -- name: Episode Introduction & Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=0 - endOffset: 139 -- name: 'Career Background: Electrical Engineering and Simulation Algorithms' - startOffset: 139 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=139 - endOffset: 196 -- name: Transition Out of Academia During COVID - startOffset: 196 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=196 - endOffset: 282 -- name: 'Simulation Research: RF and Wave Propagation Modeling' - startOffset: 282 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=282 - endOffset: 544 -- name: 'Startup Pivot: Synthetic Medical Imaging Data for AI' - startOffset: 544 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=544 - endOffset: 582 -- name: 'Go-to-Market Lesson: Problem-First vs Technology-First' - startOffset: 582 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=582 - endOffset: 800 -- name: 'Early Data Engineering Practice: Minimal Viable Data Work' - startOffset: 800 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=800 - endOffset: 861 -- name: Simulation-HPC Integration and Secure Data Management - startOffset: 861 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=861 - endOffset: 965 -- name: 'Iteration Differences: Academia vs. Startup Timelines' - startOffset: 965 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=965 - endOffset: 1075 -- name: Scientific Method in Product Discovery and Hypothesis Testing - startOffset: 1075 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1075 - endOffset: 1174 -- name: 'Freelance Launch: From CTO Role to Consulting via LinkedIn' - startOffset: 1174 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1174 - endOffset: 1379 -- name: 'Prototype Delivery: IoT Data Engineering Proof of Concept' - startOffset: 1379 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1379 - endOffset: 1533 -- name: 'Freelance Risks: Runway, Cashflow, and Operating Expenses' - startOffset: 1533 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1533 - endOffset: 1850 -- name: 'Client Acquisition: Networking, Recruiters, and Referrals' - startOffset: 1850 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=1850 - endOffset: 2062 -- name: 'Specialization: Industrial Data Integration and Custom ETL' - startOffset: 2062 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2062 - endOffset: 2340 -- name: 'MVP Workflow: Manual Extraction, CSVs, and Local Analysis' - startOffset: 2340 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2340 - endOffset: 2607 -- name: 'Preventing Overengineering: Weekly Feedback and Iteration' - startOffset: 2607 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2607 - endOffset: 2999 -- name: 'Continuous Learning: Practical Experiments and DuckDB' - startOffset: 2999 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=2999 - endOffset: 3222 -- name: 'LLMs for Data Cleaning: Domain Knowledge Limitations' - startOffset: 3222 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3222 - endOffset: 3509 -- name: 'Tech Stack & Systems Thinking: Python, C++, DBT, Docker' - startOffset: 3509 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3509 - endOffset: 3653 -- name: 'Manual Data Exploration: Handling Edge Cases Before Automation' - startOffset: 3653 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3653 - endOffset: 3811 -- name: Closing Remarks and Freelancing Advice - startOffset: 3811 - url: https://www.youtube.com/watch?v=pkcpH5N-GP8&t=3811 - endOffset: 3811 ---- +context: 'Context: An electrical-engineering researcher turned founder and freelancer + describes moving from simulation-driven academia into startups and consulting, recounting + a pivot to synthetic medical imaging, building IoT/data prototypes, client acquisition, + and practical tooling and workflows while balancing technical depth, cashflow risks, + and continuous learning. + Core: The episode’s unifying idea is a scientific, problem-first approach to data + engineering and product development—validate hypotheses quickly with minimal viable + (often manual) solutions, iterate fast using domain specialization and systems thinking, + and pragmatically balance technical rigor with business constraints to turn research + into real, sustainable products and freelance work.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/ogarten/){:target="_blank"} diff --git a/_podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md similarity index 94% rename from _podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md rename to _podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md index 3d6a4ceb..57fd1c71 100644 --- a/_podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.md +++ b/_podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.md @@ -1,20 +1,127 @@ --- +title: "From Biology to ML: Build a Data Science Portfolio with Open-Source, Computer Vision & Transformers" +short: "Career advice, learning, and featuring women in ML and AI" +season: 19 episode: 7 guests: - isabellabicalho +image: images/podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg ids: - anchor: atalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura + anchor: datatalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura youtube: GifY8Zn-pnU -image: images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Career-advice--learning--and-featuring-women-in-ML-and-AI---Isabella-Bicalho-e2s3ura apple: https://podcasts.apple.com/us/podcast/career-advice-learning-and-featuring-women-in-ml-and/id1541710331?i=1000680294201 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=rx69Xf98QZqGqgpEQgzX2w youtube: https://www.youtube.com/watch?v=GifY8Zn-pnU -season: 19 -short: Career advice, learning, and featuring women in ML and AI -title: 'From Biology to ML: Build a Data Science Portfolio with Open-Source, Computer - Vision & Transformers' +description: "Build a data science portfolio with open-source computer vision & transformers—gain hands-on projects, GitHub proof, and interview-ready ML skills." +topics: +- machine learning +- computer vision +- open-source +- bioinformatics +- career transition +intro: "How do you move from a biology background into machine learning and build a data science portfolio that actually gets noticed? In this episode, Isabella Bicalho — a machine learning engineer and data scientist with three years of hands-on AI development and roots in computational research — walks through practical approaches for showcasing skills with open-source, computer vision, and transformer projects.

We cover how to translate domain knowledge from biology into ML problem framing, the role of open-source contributions in a data science portfolio, and project ideas that demonstrate computer vision and transformer expertise. Isabella also discusses how to document work, choose reproducible experiments, and highlight impact for hiring managers or collaborators. She runs a newsletter dedicated to showcasing women’s accomplishments in data science, bringing an equity-minded perspective to building visible work.

If you’re building a data science portfolio, shifting careers into ML, or want concrete ways to leverage open-source and modern architectures like transformers and computer vision models, this conversation offers practical guidance, realistic project priorities, and tips for making your work discoverable to recruiters and the community." +dateadded: 2024-12-17 +duration: PT01H03M42S +quotableClips: +- name: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella + Bicalho)' + startOffset: 0 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=0 + endOffset: 421 +- name: 'Career Overview: Transition from Biology to Machine Learning' + startOffset: 421 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=421 + endOffset: 509 +- name: Statistics as Gateway to Machine Learning; Progression to Transformers + startOffset: 509 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=509 + endOffset: 554 +- name: 'Education: University of Maranhão and University of Marseille' + startOffset: 554 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=554 + endOffset: 674 +- name: 'INRIA Internship: Biomarkers and Immunotherapy Prediction' + startOffset: 674 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=674 + endOffset: 865 +- name: INRIA's Role in AI Research and France's AI Ecosystem + startOffset: 865 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=865 + endOffset: 955 +- name: 'Freelance Work: Recommendation System & Knowledge Graph Automation' + startOffset: 955 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=955 + endOffset: 1132 +- name: 'Career Pivot: Choosing Engineering Over a PhD' + startOffset: 1132 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1132 + endOffset: 1342 +- name: 'First Freelance Client: CV Visibility and Networking' + startOffset: 1342 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1342 + endOffset: 1419 +- name: Leveraging Open-Source & AI for Good to Gain Experience + startOffset: 1419 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1419 + endOffset: 1590 +- name: 'Hugging Face Community Course: Computer Vision Contributions & Review' + startOffset: 1590 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1590 + endOffset: 1917 +- name: 'Teaching & Communication: Simplifying ML Jargon for Learners' + startOffset: 1917 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1917 + endOffset: 2081 +- name: 'Finding Open-Source Opportunities: Communities, Docs, and Local Chapters' + startOffset: 2081 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2081 + endOffset: 2246 +- name: 'Types of Open-Source Projects: Code, Data, and Applied Solutions' + startOffset: 2246 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2246 + endOffset: 2412 +- name: 'Green Space Segmentation: Sentinel-2, CNNs vs Transformers, Practicality' + startOffset: 2412 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2412 + endOffset: 2544 +- name: Project Work as Job-Ready Experience and Portfolio Building + startOffset: 2544 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2544 + endOffset: 2608 +- name: 'Soft Skills from Collaboration: Communication and Prioritization' + startOffset: 2608 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2608 + endOffset: 2734 +- name: 'Informational Networking: Reaching Out for Role Insights' + startOffset: 2734 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2734 + endOffset: 2995 +- name: 'Onboarding New Contributors: Low Entry Barriers and Mentorship' + startOffset: 2995 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2995 + endOffset: 3094 +- name: 'AI Assistants in Learning: Benefits and Limitations of ChatGPT' + startOffset: 3094 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3094 + endOffset: 3239 +- name: 'Data Like Substack: Spotlighting Women in Data and ML' + startOffset: 3239 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3239 + endOffset: 3447 +- name: 'Featured Interviews: Bioinformatics, Fake News Detection, AI Ethics' + startOffset: 3447 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3447 + endOffset: 3736 +- name: 'Connecting with Isabella: LinkedIn and Substack Contact Info' + startOffset: 3736 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3736 + endOffset: 3822 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3822 + url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3822 + endOffset: 3822 transcript: - header: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella Bicalho)' @@ -965,126 +1072,16 @@ transcript: sec: 3822 time: '1:03:42' who: Alexey -description: Build a data science portfolio with open-source computer vision projects, - gain real job-ready experience, networking tactics and freelance tips. -intro: 'How do you pivot from biology into machine learning and build a job-ready - data science portfolio using open-source, computer vision and transformers? In this - episode Isabella Bicalho — a Machine Learning Engineer and Data Scientist with three - years of hands-on AI development and prior computational research — walks through - her path from Biology (University of Maranhão, University of Marseille) to ML, including - an INRIA internship on biomarkers and immunotherapy prediction.

We cover - practical steps for portfolio building: using open-source contributions and community - courses (Hugging Face) to get experience, real project examples like green space - segmentation with Sentinel-2 and the trade-offs between CNNs and transformers, and - applied freelance work such as recommendation systems and knowledge graph automation. - Isabella also explains how statistics became her gateway to transformers, how to - find low-barrier open-source projects (docs, data, applied code), and how collaboration - builds soft skills recruiters value.

Listen to learn concrete strategies - for creating a data science portfolio, where to find computer vision and transformer - projects, how to leverage community and mentorship, and how to communicate your - work to land roles in machine learning.' -dateadded: '2024-12-17' -duration: PT01H03M42S -quotableClips: -- name: 'Episode Introduction: Continuous Learning in Data Science (guest Isabella - Bicalho)' - startOffset: 0 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=0 - endOffset: 421 -- name: 'Career Overview: Transition from Biology to Machine Learning' - startOffset: 421 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=421 - endOffset: 509 -- name: Statistics as Gateway to Machine Learning; Progression to Transformers - startOffset: 509 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=509 - endOffset: 554 -- name: 'Education: University of Maranhão and University of Marseille' - startOffset: 554 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=554 - endOffset: 674 -- name: 'INRIA Internship: Biomarkers and Immunotherapy Prediction' - startOffset: 674 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=674 - endOffset: 865 -- name: INRIA's Role in AI Research and France's AI Ecosystem - startOffset: 865 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=865 - endOffset: 955 -- name: 'Freelance Work: Recommendation System & Knowledge Graph Automation' - startOffset: 955 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=955 - endOffset: 1132 -- name: 'Career Pivot: Choosing Engineering Over a PhD' - startOffset: 1132 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1132 - endOffset: 1342 -- name: 'First Freelance Client: CV Visibility and Networking' - startOffset: 1342 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1342 - endOffset: 1419 -- name: Leveraging Open-Source & AI for Good to Gain Experience - startOffset: 1419 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1419 - endOffset: 1590 -- name: 'Hugging Face Community Course: Computer Vision Contributions & Review' - startOffset: 1590 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1590 - endOffset: 1917 -- name: 'Teaching & Communication: Simplifying ML Jargon for Learners' - startOffset: 1917 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=1917 - endOffset: 2081 -- name: 'Finding Open-Source Opportunities: Communities, Docs, and Local Chapters' - startOffset: 2081 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2081 - endOffset: 2246 -- name: 'Types of Open-Source Projects: Code, Data, and Applied Solutions' - startOffset: 2246 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2246 - endOffset: 2412 -- name: 'Green Space Segmentation: Sentinel-2, CNNs vs Transformers, Practicality' - startOffset: 2412 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2412 - endOffset: 2544 -- name: Project Work as Job-Ready Experience and Portfolio Building - startOffset: 2544 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2544 - endOffset: 2608 -- name: 'Soft Skills from Collaboration: Communication and Prioritization' - startOffset: 2608 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2608 - endOffset: 2734 -- name: 'Informational Networking: Reaching Out for Role Insights' - startOffset: 2734 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2734 - endOffset: 2995 -- name: 'Onboarding New Contributors: Low Entry Barriers and Mentorship' - startOffset: 2995 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=2995 - endOffset: 3094 -- name: 'AI Assistants in Learning: Benefits and Limitations of ChatGPT' - startOffset: 3094 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3094 - endOffset: 3239 -- name: 'Data Like Substack: Spotlighting Women in Data and ML' - startOffset: 3239 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3239 - endOffset: 3447 -- name: 'Featured Interviews: Bioinformatics, Fake News Detection, AI Ethics' - startOffset: 3447 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3447 - endOffset: 3736 -- name: 'Connecting with Isabella: LinkedIn and Substack Contact Info' - startOffset: 3736 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3736 - endOffset: 3822 -- name: Episode Wrap-Up and Closing Remarks - startOffset: 3822 - url: https://www.youtube.com/watch?v=GifY8Zn-pnU&t=3822 - endOffset: 3822 ---- +context: 'Context: Isabella Bicalho’s episode traces a career arc from biology to + machine learning—through internships (INRIA), freelancing, open-source contributions, + teaching, and community engagement—illustrating practical projects, networking, + and pedagogical work as the vehicles for growth. + Core: The unifying idea is that continuous, community-centered, project-based learning—combining + hands-on applied work, open-source contribution, mentorship, clear communication, + and judicious use of AI tools—serves as the most effective pathway to build job-ready + skills, bridge disciplines, and create real-world impact in data science and ML.' +--- Links: * [Github](https://github.com/bellabf){:target="_blank"} diff --git a/_podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md similarity index 94% rename from _podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md rename to _podcast/from-computer-vision-research-to-autonomous-driving-ai.md index 2443fd3d..6d2ceb03 100644 --- a/_podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.md +++ b/_podcast/from-computer-vision-research-to-autonomous-driving-ai.md @@ -1,19 +1,145 @@ --- +title: "Applying Computer Vision Research to Building Production-Ready AI Systems for Real-World Deployment" +short: "Lessons from Applied AI: Tesla, Waymo, and Beyond" +season: 22 episode: 2 guests: - aishwaryajadhav +image: images/podcast/from-computer-vision-research-to-autonomous-driving-ai.jpg ids: anchor: datatalksclub/episodes/Lessons-from-Applied-AI-Tesla--Waymo--and-Beyond---Aishwarya-Jadhav-e39befu youtube: vK_SxyqIfwk -image: images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Lessons-from-Applied-AI-Tesla--Waymo--and-Beyond---Aishwarya-Jadhav-e39befu apple: https://podcasts.apple.com/us/podcast/lessons-from-applied-ai-tesla-waymo-and-beyond/id1541710331?i=1000731200298 spotify: https://open.spotify.com/episode/0h9eX7m6H2TPqOjUwb3Jw6?si=I4rKrHXpQTmS7cJBMJbUMA youtube: https://www.youtube.com/watch?v=vK_SxyqIfwk -season: 22 -short: 'Lessons from Applied AI: Tesla, Waymo, and Beyond' -title: 'Autonomous Driving AI: LiDAR vs Camera, On-Vehicle Inference & Model Compression' +description: "Master computer vision to build production-ready AI systems - learn deployment, scaling, validation and monitoring to launch reliable real-world models." +topics: +- computer vision +- academia +- autonomous driving +- MLOps +- LLMs +- production +- career growth +- career transition +intro: "How do you take computer vision research out of the lab and turn it into production-ready AI that actually works in the real world? In this episode Aishwarya Jadhav, a Machine Learning Engineer with over four years of industry experience and a Master’s from Carnegie Mellon University, walks through the challenges of applying computer vision research to production systems. Her background spans multimodal LLMs, generative AI, and computer vision, with research experience in multimodal deep learning and text information extraction and projects including assistive technologies for the visually impaired.

We cover the bridge between applied research and engineering: translating prototypes into robust, deployable models, integrating multimodal pipelines, balancing model accuracy with latency and scalability, and practical considerations for production-ready AI and real-world deployment. Listeners will gain concrete perspectives on how research informs product choices, what to prioritize when deploying computer vision systems, and how multimodal approaches and generative models fit into end-to-end solutions. This episode is useful for ML engineers, researchers, and product teams focused on building reliable, deployable computer vision and multimodal AI systems." +dateadded: 2025-10-21 +duration: PT00H59M01S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=0 + endOffset: 93 +- name: 'Guest Bio & Career Overview: Finance to Self-Driving AI' + startOffset: 93 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=93 + endOffset: 171 +- name: 'Morgan Stanley: Big Data Engineering & Transition to ML' + startOffset: 171 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=171 + endOffset: 235 +- name: 'Carnegie Mellon: Research Focus & Computer Vision Projects' + startOffset: 235 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=235 + endOffset: 339 +- name: 'AI Guide Dog: Mobile Navigation for the Visually Impaired' + startOffset: 339 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=339 + endOffset: 554 +- name: 'AI Guide Dog: Beta Testing, Iterative Development, Hardware Constraints' + startOffset: 554 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=554 + endOffset: 682 +- name: 'Sensor Tradeoffs: LiDAR, Radar, and Cost Considerations' + startOffset: 682 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=682 + endOffset: 718 +- name: 'LiDAR vs Cameras: Principles and Automotive Use Cases' + startOffset: 718 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=718 + endOffset: 885 +- name: 'Tesla''s Camera-First Perception: 360° Vision without LiDAR' + startOffset: 885 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=885 + endOffset: 966 +- name: 'Autopilot Use Cases: Driver Assistance vs Full Autonomy' + startOffset: 966 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=966 + endOffset: 1181 +- name: 'Waymo Ride-Hailing: App, Service Model, and Driverless Rides' + startOffset: 1181 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1181 + endOffset: 1197 +- name: 'Gesture Recognition for Traffic Control: Police & Construction Signals' + startOffset: 1197 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1197 + endOffset: 1337 +- name: 'On-Vehicle Inference: Performance Constraints and Optimization' + startOffset: 1337 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1337 + endOffset: 1408 +- name: 'Model Compression Techniques: Quantization and Speedups' + startOffset: 1408 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1408 + endOffset: 1445 +- name: 'Malaria Mapping: AI for Social Good Using Satellite & Topographic Data' + startOffset: 1445 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1445 + endOffset: 1623 +- name: 'Malaria Project Impact: Field Feedback and Resource Optimization' + startOffset: 1623 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1623 + endOffset: 1785 +- name: 'Validation Pipeline: Simulation, Closed Tracks, and On-Road Testing' + startOffset: 1785 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1785 + endOffset: 1862 +- name: 'Sensor Data Management: Collection, Privacy, and Scale' + startOffset: 1862 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1862 + endOffset: 1929 +- name: 'Labeling Strategy: Human Annotation and Automated Labeling' + startOffset: 1929 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1929 + endOffset: 1963 +- name: 'Model Release Cadence: Safety Checks and Staged Deployments' + startOffset: 1963 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1963 + endOffset: 2172 +- name: 'Cross-Domain Transfer: Perception Techniques for Robotics & Drones' + startOffset: 2172 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2172 + endOffset: 2238 +- name: 'Real-World Complexity: Edge Cases, Geography, and System Coordination' + startOffset: 2238 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2238 + endOffset: 2624 +- name: 'Reinforcement Learning vs Perception: Roles and Practical Constraints' + startOffset: 2624 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2624 + endOffset: 3088 +- name: 'Testing Sensitive Cases: Evaluation Stages and Inherited Tests' + startOffset: 3088 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3088 + endOffset: 3173 +- name: 'Multimodal LLMs in Autonomous Driving: Research and Practical Challenges' + startOffset: 3173 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3173 + endOffset: 3325 +- name: 'Career Pathways: Skills, Projects, and Entry Routes into Self-Driving AI' + startOffset: 3325 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3325 + endOffset: 3384 +- name: 'Practical Projects & Tools: Vision Apps, LLMs, and Coding Agents' + startOffset: 3384 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3384 + endOffset: 3515 +- name: Closing Remarks and Final Advice + startOffset: 3515 + url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3515 + endOffset: 3541 transcript: - header: Podcast Introduction - line: Hey everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -1065,141 +1191,22 @@ transcript: sec: 3541 time: '59:01' who: Alexey -description: Discover LiDAR vs camera tradeoffs and model compression for on-vehicle - inference in autonomous driving - learn quantization, edge speedups, testing tips. -intro: How should self-driving systems balance LiDAR, cameras and edge compute to - deliver safe, real-time perception? In this episode, Aishwarya Jadhav — a machine - learning engineer with a Master's from Carnegie Mellon and four years deploying - multimodal LLMs, generative AI and computer vision — walks through the practical - tradeoffs in autonomous driving AI. Drawing on her assistive-tech work (AI Guide - Dog) and research background, she explains LiDAR vs camera principles, radar and - cost constraints, and Tesla’s camera-first approach for 360° vision.

We - cover on-vehicle inference limits, model compression techniques like quantization - and speedups for edge inference, plus validation pipelines from simulation to closed - tracks and on-road testing. You’ll also hear about sensor data management, labeling - strategies, multimodal LLM challenges in autonomy, gesture recognition for traffic - control, and cross-domain transfer to robotics and drones. The conversation closes - with real-world complexity, testing sensitive cases, and actionable career pathways - and projects.

If you want concrete guidance on sensor fusion, model compression, - and deployment-ready perception systems — plus practical testing and data strategies - for self-driving AI — this episode delivers grounded, technical insight. -dateadded: '2025-10-21' -duration: PT00H59M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=0 - endOffset: 93 -- name: 'Guest Bio & Career Overview: Finance to Self-Driving AI' - startOffset: 93 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=93 - endOffset: 171 -- name: 'Morgan Stanley: Big Data Engineering & Transition to ML' - startOffset: 171 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=171 - endOffset: 235 -- name: 'Carnegie Mellon: Research Focus & Computer Vision Projects' - startOffset: 235 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=235 - endOffset: 339 -- name: 'AI Guide Dog: Mobile Navigation for the Visually Impaired' - startOffset: 339 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=339 - endOffset: 554 -- name: 'AI Guide Dog: Beta Testing, Iterative Development, Hardware Constraints' - startOffset: 554 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=554 - endOffset: 682 -- name: 'Sensor Tradeoffs: LiDAR, Radar, and Cost Considerations' - startOffset: 682 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=682 - endOffset: 718 -- name: 'LiDAR vs Cameras: Principles and Automotive Use Cases' - startOffset: 718 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=718 - endOffset: 885 -- name: 'Tesla''s Camera-First Perception: 360° Vision without LiDAR' - startOffset: 885 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=885 - endOffset: 966 -- name: 'Autopilot Use Cases: Driver Assistance vs Full Autonomy' - startOffset: 966 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=966 - endOffset: 1181 -- name: 'Waymo Ride-Hailing: App, Service Model, and Driverless Rides' - startOffset: 1181 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1181 - endOffset: 1197 -- name: 'Gesture Recognition for Traffic Control: Police & Construction Signals' - startOffset: 1197 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1197 - endOffset: 1337 -- name: 'On-Vehicle Inference: Performance Constraints and Optimization' - startOffset: 1337 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1337 - endOffset: 1408 -- name: 'Model Compression Techniques: Quantization and Speedups' - startOffset: 1408 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1408 - endOffset: 1445 -- name: 'Malaria Mapping: AI for Social Good Using Satellite & Topographic Data' - startOffset: 1445 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1445 - endOffset: 1623 -- name: 'Malaria Project Impact: Field Feedback and Resource Optimization' - startOffset: 1623 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1623 - endOffset: 1785 -- name: 'Validation Pipeline: Simulation, Closed Tracks, and On-Road Testing' - startOffset: 1785 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1785 - endOffset: 1862 -- name: 'Sensor Data Management: Collection, Privacy, and Scale' - startOffset: 1862 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1862 - endOffset: 1929 -- name: 'Labeling Strategy: Human Annotation and Automated Labeling' - startOffset: 1929 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1929 - endOffset: 1963 -- name: 'Model Release Cadence: Safety Checks and Staged Deployments' - startOffset: 1963 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=1963 - endOffset: 2172 -- name: 'Cross-Domain Transfer: Perception Techniques for Robotics & Drones' - startOffset: 2172 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2172 - endOffset: 2238 -- name: 'Real-World Complexity: Edge Cases, Geography, and System Coordination' - startOffset: 2238 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2238 - endOffset: 2624 -- name: 'Reinforcement Learning vs Perception: Roles and Practical Constraints' - startOffset: 2624 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=2624 - endOffset: 3088 -- name: 'Testing Sensitive Cases: Evaluation Stages and Inherited Tests' - startOffset: 3088 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3088 - endOffset: 3173 -- name: 'Multimodal LLMs in Autonomous Driving: Research and Practical Challenges' - startOffset: 3173 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3173 - endOffset: 3325 -- name: 'Career Pathways: Skills, Projects, and Entry Routes into Self-Driving AI' - startOffset: 3325 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3325 - endOffset: 3384 -- name: 'Practical Projects & Tools: Vision Apps, LLMs, and Coding Agents' - startOffset: 3384 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3384 - endOffset: 3515 -- name: Closing Remarks and Final Advice - startOffset: 3515 - url: https://www.youtube.com/watch?v=vK_SxyqIfwk&t=3515 - endOffset: 3541 ---- +context: 'Context — This episode moves from the guest’s finance-to-self-driving AI + career and research in computer vision to concrete projects (AI Guide Dog, malaria + mapping), deep dives on sensor and model tradeoffs (LiDAR vs cameras, on-vehicle + inference, model compression), operational realities (data collection, labeling, + validation pipelines, staged releases, edge cases), system-level questions (reinforcement + learning vs perception, multimodal LLMs), and practical career/project advice. + Core — Building trustworthy, real-world AI is an engineering-driven cycle that tightly + couples pragmatic sensor and model choices, efficient on-device inference, rigorous + data and validation pipelines, staged safe deployment, and ethical/social purpose: + the episode’s unifying idea is that successful AI systems aren’t just about better + algorithms but about integrating perception, hardware constraints, data practices, + testing, and human-centered impact into a continuous, safety-first development process + that scales across domains from autonomous vehicles to assistive tech and public-health + applications.' +--- Links: * [Linkedin](https://www.linkedin.com/in/aishwaryajadhav8/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md b/_podcast/from-data-freelancer-to-startup-open-source-products.md similarity index 94% rename from _podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md rename to _podcast/from-data-freelancer-to-startup-open-source-products.md index f2ba3879..eabef276 100644 --- a/_podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.md +++ b/_podcast/from-data-freelancer-to-startup-open-source-products.md @@ -1,12 +1,21 @@ --- +title: "From Data Freelancer to Startup: Open-Source Products and Bottom-Up Adoption" +short: "The Entrepreneurship Journey: From Freelancing to Starting a Company" +season: 17 episode: 1 guests: - adrianbrudaru -date: 2025-11-07 +image: images/podcast/from-data-freelancer-to-startup-open-source-products.jpg ids: - anchor: atatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k + anchor: datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k youtube: vOpEQiCsaLw -image: images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg +links: + anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k + apple: https://podcasts.apple.com/us/podcast/the-entrepreneurship-journey-from-freelancing-to/id1541710331?i=1000638715212 + spotify: https://open.spotify.com/episode/7wBmJHSXPHoW0mEIbNDgqr?si=z7klLtveT1ioGi6bg8hR7Q + youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw +description: "Discover how to build an open-source data product for Python devs: bootstrap, ship DLT transforms, and drive bottom-up adoption to find PMF." +intro: "How do you move from freelancing to building an open-source data company that wins via bottom-up adoption? In this episode Adrian Brudaru — an economics graduate who pivoted to business analysis in Berlin, then spent years freelancing before co-founding a data startup — walks through that transition and the practical tradeoffs he encountered.

We cover lessons from freelancing and agency work, why they chose product over agency growth, and the recurring pain of stakeholder alignment versus technical setup. Adrian explains DLT — a declarative JSON→relational transformation for data pipelines — and why the product targets Python users as a developer-focused library. Hear how workshops, documentation, and live support doubled as product validation, how scrappy bootstrapping and consulting revenue funded early payroll, and what signals indicate product–market fit for open-source tooling.

If you’re building open-source data tools, developer tooling, or plotting a bottom-up go-to-market, this episode offers concrete tactics on iteration, docs-as-product, ecosystem partnerships, and positioning against platforms like Airbyte/Fivetran — helping you prioritize engineering, adoption, and sustainable monetization." topics: - entrepreneurship - freelance @@ -15,18 +24,133 @@ topics: - leadership - career growth - consulting -links: - anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Entrepreneurship-Journey-From-Freelancing-to-Starting-a-Company---Adrian-Brudaru-e2cut0k - apple: https://podcasts.apple.com/us/podcast/the-entrepreneurship-journey-from-freelancing-to/id1541710331?i=1000638715212 - spotify: https://open.spotify.com/episode/7wBmJHSXPHoW0mEIbNDgqr?si=z7klLtveT1ioGi6bg8hR7Q - youtube: https://www.youtube.com/watch?v=vOpEQiCsaLw -season: 17 -short: 'The Entrepreneurship Journey: From Freelancing to Starting a Company' -title: 'Launch an Open-Source Data Company: Declarative JSON to Relational DLT for - Python Devs' +dateadded: 2023-12-18 +date: 2025-11-07 +duration: PT00H59M43S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=0 + endOffset: 113 +- name: 'Episode Overview: Building an Open-Source Data Company' + startOffset: 113 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=113 + endOffset: 199 +- name: 'Career Origins: 2012 Berlin Startups and Corporate Exit' + startOffset: 199 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=199 + endOffset: 243 +- name: 'Freelancing Experience: Autonomy, Savings, Diverse Projects' + startOffset: 243 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=243 + endOffset: 320 +- name: From Hourly Billing to Project-Based Work and Subcontracting + startOffset: 320 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=320 + endOffset: 438 +- name: 'Freelancing Lifestyle: Flexibility and Long-Term Boredom' + startOffset: 438 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=438 + endOffset: 526 +- name: 'Subcontracting Growth: Agency-like Management Tradeoffs' + startOffset: 526 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=526 + endOffset: 651 +- name: 'Agency Challenges: Responsibility, Incentives, and Misalignment' + startOffset: 651 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=651 + endOffset: 751 +- name: Choosing Product Building Over Growing an Agency + startOffset: 751 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=751 + endOffset: 822 +- name: 'Recurring Pain: Stakeholder Alignment vs Technical Setup' + startOffset: 822 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=822 + endOffset: 976 +- name: 'Target Users: Empowering Python Users with Dev Tooling' + startOffset: 976 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=976 + endOffset: 1071 +- name: 'Anti-patterns: Dumping JSON into Data Warehouses' + startOffset: 1071 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1071 + endOffset: 1178 +- name: 'DLT Concept: Declarative JSON→Relational Transformation' + startOffset: 1178 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1178 + endOffset: 1410 +- name: 'Product Iteration: Engine, Abstractions, and User Feedback' + startOffset: 1410 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1410 + endOffset: 1523 +- name: 'Team Formation: Meeting Co-founders Through Projects' + startOffset: 1523 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1523 + endOffset: 1659 +- name: 'Founding as Investment: Time, Risk, and Opportunity Cost' + startOffset: 1659 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1659 + endOffset: 1868 +- name: 'Bootstrapping Strategy: Savings, Consulting Revenue, and Payroll' + startOffset: 1868 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1868 + endOffset: 2060 +- name: 'Scrappy Operations: Office Squatting and Cost Management' + startOffset: 2060 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2060 + endOffset: 2160 +- name: 'Workshop Validation: Teaching as a Product Feedback Loop' + startOffset: 2160 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2160 + endOffset: 2248 +- name: 'Workshop Design: Checkpoints, Live Support, and CodeSpaces' + startOffset: 2248 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2248 + endOffset: 2450 +- name: 'Product Identity: DLT as a Developer-Focused Library' + startOffset: 2450 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2450 + endOffset: 2483 +- name: 'Documentation Investment: When Docs Become Productive Assets' + startOffset: 2483 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2483 + endOffset: 2640 +- name: 'Product–Market Fit Signals: Core Adoption and Removal Test' + startOffset: 2640 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2640 + endOffset: 2876 +- name: 'Current Focus: Leading Go-to-Market and Bottom-Up Strategy' + startOffset: 2876 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2876 + endOffset: 3053 +- name: 'Ecosystem Partnerships: DocDB Integration and Joint Demos' + startOffset: 3053 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3053 + endOffset: 3310 +- name: 'Roadmap: Paid Complement to the Open-Source Library' + startOffset: 3310 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3310 + endOffset: 3430 +- name: 'Source Generation Experiments: OpenAPI Generators for Pipelines' + startOffset: 3430 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3430 + endOffset: 3491 +- name: 'Positioning vs Platforms: Library-First vs Airbyte/Fivetran' + startOffset: 3491 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3491 + endOffset: 3641 +- name: 'Recommended Reading: "From Survival to Thrival" on PMF' + startOffset: 3641 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3641 + endOffset: 3656 +- name: Episode Wrap-Up and Next Steps + startOffset: 3656 + url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3656 + endOffset: 3583 transcript: - header: Podcast Introduction -- header: 'Episode Overview: Building an Open‑Source Data Company' +- header: 'Episode Overview: Building an Open-Source Data Company' - line: This week, we'll talk about building an open source data company, and not just building but… It's not the first time we have our guest, Adrian, on this podcast. Before, we spoke about being a data freelancer, so we'll talk about building @@ -145,7 +269,7 @@ transcript: sec: 361 time: '6:01' who: Alexey -- header: 'Freelancing Lifestyle: Flexibility and Long‑Term Boredom' +- header: 'Freelancing Lifestyle: Flexibility and Long-Term Boredom' - line: Exactly. The customer typically already wants to know how much they're roughly going to pay for what they get. They don't actually care about your hourly rate, generally. They care about the final outcome and what it costs them. @@ -175,7 +299,7 @@ transcript: sec: 521 time: '8:41' who: Alexey -- header: 'Subcontracting Growth: Agency‑like Management Tradeoffs' +- header: 'Subcontracting Growth: Agency-like Management Tradeoffs' - line: Go fishing on a Wednesday, for example. You can decide every day what you're doing more or less. Of course, you need to be civilized and do it within the boundaries of other people working with you. But you do have a lot of autonomy and freedom. @@ -323,7 +447,7 @@ transcript: sec: 976 time: '16:16' who: Adrian -- header: 'Anti‑patterns: Dumping JSON into Data Warehouses' +- header: 'Anti-patterns: Dumping JSON into Data Warehouses' - line: From what I heard from you… I know a bit about the product (the tool) you’re working on. What I heard is –when you have a data warehouse, you don't just have it for the sake of having it, you need to put some data in it. Then you have a @@ -464,7 +588,7 @@ transcript: sec: 1509 time: '25:09' who: Alexey -- header: 'Team Formation: Meeting Co‑founders Through Projects' +- header: 'Team Formation: Meeting Co-founders Through Projects' - line: So it's a classic story – we met at work. On my last project, the guy that hired me had been working for this company for six years. He had previously founded some companies. And basically, I ended up working with him to build this Growth @@ -764,7 +888,7 @@ transcript: sec: 2448 time: '40:48' who: Alexey -- header: 'Product Identity: DLT as a Developer‑Focused Library' +- header: 'Product Identity: DLT as a Developer-Focused Library' - line: Yes. It's pretty simple. It's called Data Load Tool (DLT for short). I often like to tell people, “Don't think of it as a data loading tool, think of it as a pipeline building tool.” And the reason for this is because it's a developer @@ -923,7 +1047,7 @@ transcript: sec: 2871 time: '47:51' who: Alexey -- header: 'Current Focus: Leading Go‑to‑Market and Bottom‑Up Strategy' +- header: 'Current Focus: Leading Go-to-Market and Bottom-Up Strategy' - line: Exactly. Basically, what this means is that you need to figure out what needs to happen next – figure out some kind of way to do it – and then try to get help to do more of it in a better way, if that pays off, kind of. So I'm doing a lot @@ -1046,7 +1170,7 @@ transcript: sec: 3302 time: '55:02' who: Alexey -- header: 'Roadmap: Paid Complement to the Open‑Source Library' +- header: 'Roadmap: Paid Complement to the Open-Source Library' - line: Yes, it's very hard to raise money for just research, right? Basically, we have got a product market fit with our library. Now we're working towards a paid solution. That paid solution would be something complimentary. It wouldn't limit @@ -1105,7 +1229,7 @@ transcript: sec: 3485 time: '58:05' who: Alexey -- header: 'Positioning vs Platforms: Library‑First vs Airbyte/Fivetran' +- header: 'Positioning vs Platforms: Library-First vs Airbyte/Fivetran' - line: We don't really want to go… Airbyte is a platform. We’ll never be a platform in that way. Even if we do offer some kind of orchestration, that is not our selling point. We don't want to be another Fivetran. Airbyte, currently, is kind of trying @@ -1151,7 +1275,7 @@ transcript: sec: 3643 time: '1:00:43' who: Adrian -- header: Episode Wrap‑Up and Next Steps +- header: Episode Wrap-Up and Next Steps - line: Okay. That's all we have time for today. We are a bit… We took three more minutes than we should have. Thanks a lot for joining us today and sharing your experience. I'm really curious. I think the last time we had an interview was @@ -1170,148 +1294,11 @@ transcript: sec: 3696 time: '1:01:36' who: Alexey -intro: 'How do you build an open-source data company that helps Python developers - turn messy JSON into reliable relational tables? In this episode, Adrian Brudaru - — an economics-trained, Berlin-based founder who moved from startups to freelancing - and now co‑founded a data tooling company — walks through the journey of launching - developer-focused open‑source software for data engineering.

We cover why - dumping JSON into data warehouses is an anti‑pattern and introduce the core DLT - concept: a declarative JSON→relational transformation engine aimed at Python devs. - Adrian explains product iteration (engine, abstractions, user feedback), running - workshops as a validation loop, treating documentation as a product asset, and practical - bootstrapping strategies (savings, consulting revenue, scrappy operations). He also - discusses team formation via projects, go‑to‑market tactics with a bottom‑up, library‑first - approach, ecosystem partnerships (DocDB integration and joint demos), roadmap plans - for a paid complement to the open‑source library, and experiments with source generation - like OpenAPI generators for pipelines.

Listen if you want concrete technical - and GTM guidance on building an open‑source data company, implementing declarative - JSON→relational workflows for Python, and how to validate and scale developer tooling - without prematurely becoming a platform.' -description: Discover building open-source JSON-to-Relational data pipelines in Python, - practical DLT patterns, anti-pattern fixes, bootstrap tips to speed adoption. -dateadded: '2023-12-18' -duration: PT00H59M43S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=0 - endOffset: 113 -- name: 'Episode Overview: Building an Open‑Source Data Company' - startOffset: 113 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=113 - endOffset: 199 -- name: 'Career Origins: 2012 Berlin Startups and Corporate Exit' - startOffset: 199 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=199 - endOffset: 243 -- name: 'Freelancing Experience: Autonomy, Savings, Diverse Projects' - startOffset: 243 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=243 - endOffset: 320 -- name: From Hourly Billing to Project-Based Work and Subcontracting - startOffset: 320 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=320 - endOffset: 438 -- name: 'Freelancing Lifestyle: Flexibility and Long‑Term Boredom' - startOffset: 438 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=438 - endOffset: 526 -- name: 'Subcontracting Growth: Agency‑like Management Tradeoffs' - startOffset: 526 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=526 - endOffset: 651 -- name: 'Agency Challenges: Responsibility, Incentives, and Misalignment' - startOffset: 651 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=651 - endOffset: 751 -- name: Choosing Product Building Over Growing an Agency - startOffset: 751 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=751 - endOffset: 822 -- name: 'Recurring Pain: Stakeholder Alignment vs Technical Setup' - startOffset: 822 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=822 - endOffset: 976 -- name: 'Target Users: Empowering Python Users with Dev Tooling' - startOffset: 976 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=976 - endOffset: 1071 -- name: 'Anti‑patterns: Dumping JSON into Data Warehouses' - startOffset: 1071 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1071 - endOffset: 1178 -- name: 'DLT Concept: Declarative JSON→Relational Transformation' - startOffset: 1178 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1178 - endOffset: 1410 -- name: 'Product Iteration: Engine, Abstractions, and User Feedback' - startOffset: 1410 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1410 - endOffset: 1523 -- name: 'Team Formation: Meeting Co‑founders Through Projects' - startOffset: 1523 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1523 - endOffset: 1659 -- name: 'Founding as Investment: Time, Risk, and Opportunity Cost' - startOffset: 1659 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1659 - endOffset: 1868 -- name: 'Bootstrapping Strategy: Savings, Consulting Revenue, and Payroll' - startOffset: 1868 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=1868 - endOffset: 2060 -- name: 'Scrappy Operations: Office Squatting and Cost Management' - startOffset: 2060 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2060 - endOffset: 2160 -- name: 'Workshop Validation: Teaching as a Product Feedback Loop' - startOffset: 2160 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2160 - endOffset: 2248 -- name: 'Workshop Design: Checkpoints, Live Support, and CodeSpaces' - startOffset: 2248 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2248 - endOffset: 2450 -- name: 'Product Identity: DLT as a Developer‑Focused Library' - startOffset: 2450 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2450 - endOffset: 2483 -- name: 'Documentation Investment: When Docs Become Productive Assets' - startOffset: 2483 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2483 - endOffset: 2640 -- name: 'Product–Market Fit Signals: Core Adoption and Removal Test' - startOffset: 2640 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2640 - endOffset: 2876 -- name: 'Current Focus: Leading Go‑to‑Market and Bottom‑Up Strategy' - startOffset: 2876 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=2876 - endOffset: 3053 -- name: 'Ecosystem Partnerships: DocDB Integration and Joint Demos' - startOffset: 3053 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3053 - endOffset: 3310 -- name: 'Roadmap: Paid Complement to the Open‑Source Library' - startOffset: 3310 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3310 - endOffset: 3430 -- name: 'Source Generation Experiments: OpenAPI Generators for Pipelines' - startOffset: 3430 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3430 - endOffset: 3491 -- name: 'Positioning vs Platforms: Library‑First vs Airbyte/Fivetran' - startOffset: 3491 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3491 - endOffset: 3641 -- name: 'Recommended Reading: "From Survival to Thrival" on PMF' - startOffset: 3641 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3641 - endOffset: 3656 -- name: Episode Wrap‑Up and Next Steps - startOffset: 3656 - url: https://www.youtube.com/watch?v=vOpEQiCsaLw&t=3656 - endOffset: 3583 +context: Turning hands-on consulting and hard-won data engineering experience into + a library-first, open-source company that solves a concrete pain—declarative JSON→relational + transformations for Python users—by validating through workshops and docs, iterating + with real user feedback, and scaling via bottom-up adoption, ecosystem integrations, + and paid complementary offerings rather than agency growth or platform lock-in. --- Links: diff --git a/_podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md similarity index 94% rename from _podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md rename to _podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md index 17381446..7947e092 100644 --- a/_podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.md +++ b/_podcast/from-devops-to-data-engineering-automation-open-source-volunteering.md @@ -1,19 +1,147 @@ --- +title: "From DevOps to Data Engineering: Automation, Open Source & Volunteering for Career Transitions" +short: "Career choices, transitions and promotions in and out of tech" +season: 19 episode: 8 guests: - agitajaunzeme +image: images/podcast/from-devops-to-data-engineering-automation-open-source-volunteering.jpg ids: - anchor: atalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv + anchor: datatalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv youtube: QKWu5-6_6TE -image: images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Career-choices--transitions-and-promotions-in-and-out-of-tech---Agita-Jaunzeme-e2t05nv apple: https://podcasts.apple.com/us/podcast/career-choices-transitions-and-promotions-in-and-out/id1541710331?i=1000683499310 spotify: https://open.spotify.com/episode/0UW7fLgm9fqMG64GQwvgIN?si=ZixbzDcZT2mNkVrJjZVbeA youtube: https://www.youtube.com/watch?v=QKWu5-6_6TE -season: 19 -short: Career choices, transitions and promotions in and out of tech -title: 'DevOps to Data Engineering: Automation, Open Source & Volunteering' +description: "Discover DevOps to Data Engineering strategies: open source contributions and volunteering to build pipelines, projects and a hireable portfolio." +topics: +- open-source +- data engineering +- software engineering +- practices +- career transition +- DevOps +intro: "How do you pivot from DevOps to data engineering without starting over? In this episode Agita Jaunzeme — a DevOps/DataOps engineer, manager, community builder and NGO founder — breaks down practical strategies for career transitions that center on automation, open source participation, and volunteering.

Agita draws on experience across corporate, startup, open source and non-governmental sectors and shares how automation and DevOps practices translate to data engineering and DataOps. We discuss using open source projects to build credibility, volunteering and community work to gain hands-on experience and networks, and concrete approaches to getting promoted or making purposeful career pivots. Agita also talks about designing work that aligns with passion and purpose, including founding an NGO to support expats and locals in Porto.

Listeners will come away with actionable ideas for bridging skill gaps, leveraging automation and open source contributions, and using volunteering as a pathway into data engineering roles. This episode is for DevOps professionals, aspiring data engineers, and career changers seeking pragmatic advice on transitions, promotions, and aligning work with meaningful impact." +dateadded: 2025-01-12 +duration: PT01H01M46S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=0 + endOffset: 156 +- name: Guest Welcome & Interview Agenda + startOffset: 156 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=156 + endOffset: 182 +- name: 'Career Beginnings: Trade School, Web Design, First Programming' + startOffset: 182 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=182 + endOffset: 255 +- name: 'Education & Bootcamp: Computer Science, Accenture, C++' + startOffset: 255 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=255 + endOffset: 322 +- name: 'Transition to DevOps: Configuration Management & Early Automation' + startOffset: 322 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=322 + endOffset: 377 +- name: 'Burnout and Self-Discovery: Travel and Volunteerism' + startOffset: 377 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=377 + endOffset: 456 +- name: 'Erasmus+ Volunteering: Programs, Exchanges, and Training' + startOffset: 456 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=456 + endOffset: 560 +- name: Community Management at VMware & Versatile Data Kit (Open Source) + startOffset: 560 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=560 + endOffset: 717 +- name: 'Testing Passions: Flow, Energy, and Career Fit' + startOffset: 717 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=717 + endOffset: 869 +- name: 'Automation Case Study: Scripting Repetitive Tasks and Rapid Promotion' + startOffset: 869 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=869 + endOffset: 1156 +- name: 'Problem-Solving as a Core Skill: Transferable Technical Competencies' + startOffset: 1156 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1156 + endOffset: 1263 +- name: 'Applying Corporate Processes to NGOs: Documentation & Agile Practices' + startOffset: 1263 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1263 + endOffset: 1435 +- name: 'Volunteer Management vs. Employment: Motivation and Process Design' + startOffset: 1435 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1435 + endOffset: 1507 +- name: 'Spotting Opportunities: Transitioning into Volunteer Roles' + startOffset: 1507 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1507 + endOffset: 1610 +- name: Personality Types & Team Composition (MBTI relevance) + startOffset: 1610 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1610 + endOffset: 1793 +- name: 'Personality Traits for Data Engineering: Precision, Persistence, Detail' + startOffset: 1793 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1793 + endOffset: 2092 +- name: 'Data Scientist vs. Data Engineer: Interests and Role Differences' + startOffset: 2092 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2092 + endOffset: 2185 +- name: 'Returning to Corporate via Open Source: Community + Technical Work' + startOffset: 2185 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2185 + endOffset: 2285 +- name: 'Community Manager Role: Content, DevRel Overlap, and Events' + startOffset: 2285 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2285 + endOffset: 2423 +- name: 'Product Focus: Simplifying Vision and Goal-Setting' + startOffset: 2423 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2423 + endOffset: 2584 +- name: 'Career Coaching Course: "Align Your Career With Who You Are" + startOffset: 2584 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2584 + endOffset: 2704 +- name: 'Founding an NGO: Legal Setup, Governance, and Launch' + startOffset: 2704 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2704 + endOffset: 2812 +- name: 'Inclusion in Porto: Connecting Expats and Locals' + startOffset: 2812 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2812 + endOffset: 2952 +- name: 'Meetup Activities: MBTI, Improv, and Community Events' + startOffset: 2952 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2952 + endOffset: 3131 +- name: 'Off-Grid Living Experiment: Land, Dome, and Lifestyle Shift' + startOffset: 3131 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3131 + endOffset: 3207 +- name: 'Off-Grid Utilities: Solar Power, Rainwater, and Well Plans' + startOffset: 3207 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3207 + endOffset: 3352 +- name: 'Cost Comparison: Off-Grid Life vs. Porto Renting' + startOffset: 3352 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3352 + endOffset: 3407 +- name: 'Unemployment Party: Mastermind Brainstorming Technique' + startOffset: 3407 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3407 + endOffset: 3657 +- name: Episode Closing & Final Remarks + startOffset: 3657 + url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3657 + endOffset: 3706 transcript: - header: Podcast Introduction - line: We have a special guest today, Agita. She has done a lot in her career, including @@ -685,7 +813,7 @@ transcript: sec: 2555 time: '42:35' who: Alexey -- header: 'Career Coaching Course: "Align Your Career With Who You Are"' +- header: 'Career Coaching Course: "Align Your Career With Who You Are" - line: Starting an NGO is not easy, at least not in Portugal. I’m not sure how it works in other countries, but you need at least nine people. It didn’t really start in March. I had already been doing a career coaching course for about two @@ -1034,145 +1162,21 @@ transcript: sec: 3706 time: '1:01:46' who: Agita -description: 'Learn DevOps-to-Data-Engineering career tactics: automation, open source - & volunteering to build skills, earn rapid promotions, and lead projects.' -intro: How do you move from DevOps into data engineering while using automation, open - source contributions, and volunteering to shape your career? In this episode, Agita - Jaunzeme — a DevOps/DataOps engineer, community manager, educator and NGO founder - focused on inclusion in Porto — walks through that exact path.

We trace - her journey from trade school and early programming to configuration management - and rapid promotion through scripting repetitive tasks, then into burnout, Erasmus+ - volunteering, and community work at VMware. Key topics include automation case studies, - building and contributing to open source (Versatile Data Kit), applying corporate - processes and agile documentation to NGOs, volunteer management versus employment, - spotting volunteer-to-career opportunities, and the practical differences between - data scientists and data engineers. We also cover community management, career coaching, - founding an NGO, meetup activities, and even an off-grid living experiment.

- Listen for actionable guidance on automation best practices, how open source community - work can reopen corporate doors, designing volunteer processes, and aligning technical - career moves with personal values — practical takeaways for anyone navigating a - career pivot into data engineering or community-driven tech work. -dateadded: '2025-01-12' -duration: PT01H01M46S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=0 - endOffset: 156 -- name: Guest Welcome & Interview Agenda - startOffset: 156 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=156 - endOffset: 182 -- name: 'Career Beginnings: Trade School, Web Design, First Programming' - startOffset: 182 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=182 - endOffset: 255 -- name: 'Education & Bootcamp: Computer Science, Accenture, C++' - startOffset: 255 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=255 - endOffset: 322 -- name: 'Transition to DevOps: Configuration Management & Early Automation' - startOffset: 322 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=322 - endOffset: 377 -- name: 'Burnout and Self-Discovery: Travel and Volunteerism' - startOffset: 377 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=377 - endOffset: 456 -- name: 'Erasmus+ Volunteering: Programs, Exchanges, and Training' - startOffset: 456 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=456 - endOffset: 560 -- name: Community Management at VMware & Versatile Data Kit (Open Source) - startOffset: 560 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=560 - endOffset: 717 -- name: 'Testing Passions: Flow, Energy, and Career Fit' - startOffset: 717 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=717 - endOffset: 869 -- name: 'Automation Case Study: Scripting Repetitive Tasks and Rapid Promotion' - startOffset: 869 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=869 - endOffset: 1156 -- name: 'Problem-Solving as a Core Skill: Transferable Technical Competencies' - startOffset: 1156 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1156 - endOffset: 1263 -- name: 'Applying Corporate Processes to NGOs: Documentation & Agile Practices' - startOffset: 1263 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1263 - endOffset: 1435 -- name: 'Volunteer Management vs. Employment: Motivation and Process Design' - startOffset: 1435 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1435 - endOffset: 1507 -- name: 'Spotting Opportunities: Transitioning into Volunteer Roles' - startOffset: 1507 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1507 - endOffset: 1610 -- name: Personality Types & Team Composition (MBTI relevance) - startOffset: 1610 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1610 - endOffset: 1793 -- name: 'Personality Traits for Data Engineering: Precision, Persistence, Detail' - startOffset: 1793 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=1793 - endOffset: 2092 -- name: 'Data Scientist vs. Data Engineer: Interests and Role Differences' - startOffset: 2092 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2092 - endOffset: 2185 -- name: 'Returning to Corporate via Open Source: Community + Technical Work' - startOffset: 2185 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2185 - endOffset: 2285 -- name: 'Community Manager Role: Content, DevRel Overlap, and Events' - startOffset: 2285 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2285 - endOffset: 2423 -- name: 'Product Focus: Simplifying Vision and Goal-Setting' - startOffset: 2423 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2423 - endOffset: 2584 -- name: 'Career Coaching Course: "Align Your Career With Who You Are"' - startOffset: 2584 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2584 - endOffset: 2704 -- name: 'Founding an NGO: Legal Setup, Governance, and Launch' - startOffset: 2704 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2704 - endOffset: 2812 -- name: 'Inclusion in Porto: Connecting Expats and Locals' - startOffset: 2812 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2812 - endOffset: 2952 -- name: 'Meetup Activities: MBTI, Improv, and Community Events' - startOffset: 2952 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=2952 - endOffset: 3131 -- name: 'Off-Grid Living Experiment: Land, Dome, and Lifestyle Shift' - startOffset: 3131 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3131 - endOffset: 3207 -- name: 'Off-Grid Utilities: Solar Power, Rainwater, and Well Plans' - startOffset: 3207 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3207 - endOffset: 3352 -- name: 'Cost Comparison: Off-Grid Life vs. Porto Renting' - startOffset: 3352 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3352 - endOffset: 3407 -- name: 'Unemployment Party: Mastermind Brainstorming Technique' - startOffset: 3407 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3407 - endOffset: 3657 -- name: Episode Closing & Final Remarks - startOffset: 3657 - url: https://www.youtube.com/watch?v=QKWu5-6_6TE&t=3657 - endOffset: 3706 ---- +context: 'Context: The episode traces a journey from hands-on technical beginnings + (trade school, web design, C++, DevOps, automation) through burnout and volunteering, + into community management, open source, NGO founding, career coaching, and a lifestyle + experiment (off-grid living), while exploring personality, team fit, product focus, + and practical processes applied across corporate and nonprofit settings. + + Core: This episode centers on intentionally aligning technical skills, systems-thinking + problem solving, and personal values—leveraging automation, community, and experimentation—to + design a sustainable, impact-driven career and life that bridges corporate, volunteer, + and personal worlds. + Key themes: transferable problem-solving and automation; values-driven career design; + community & open source as leverage; translating corporate processes to NGOs; personality + and team-fit for role choice; experimentation in lifestyle and governance.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/agita/){:target="_blank"} diff --git a/_podcast/s21e07-lessons-from-two-decades-of-ai.md b/_podcast/from-game-ai-to-modern-ai-agents.md similarity index 93% rename from _podcast/s21e07-lessons-from-two-decades-of-ai.md rename to _podcast/from-game-ai-to-modern-ai-agents.md index 24017ec1..f3b2fe61 100644 --- a/_podcast/s21e07-lessons-from-two-decades-of-ai.md +++ b/_podcast/from-game-ai-to-modern-ai-agents.md @@ -1,20 +1,146 @@ --- +title: "From Game AI to LLM Agents: 20-Year Evolution of Multi-Agent Systems, Evolutionary Algorithms & Modern AI Tooling" +short: "Lessons from Two Decades of AI" +season: 21 episode: 7 guests: - micheallanham +image: images/podcast/from-game-ai-to-modern-ai-agents.jpg ids: anchor: datatalksclub/episodes/Lessons-from-Two-Decades-of-AI---Micheal-Lanham-e38oarc youtube: DSxqUlumM3A -image: images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/Lessons-from-Two-Decades-of-AI---Micheal-Lanham-e38oarc apple: https://podcasts.apple.com/us/podcast/lessons-from-two-decades-of-ai-micheal-lanham/id1541710331?i=1000728604349 spotify: https://open.spotify.com/episode/7uhe5ZysRi07S6mb14nnox youtube: https://www.youtube.com/watch?v=DSxqUlumM3A -season: 21 -short: Lessons from Two Decades of AI -title: 'Build Multi-Agent AI Assistants: Game AI Roots, Evolutionary Algorithms & - Practical LLM Tooling' +description: "Discover 20 years of Game AI, Evolutionary Algorithms, and LLM agents—practical AI tooling, architecture tips, and faster deployment for real projects." +topics: +- LLMs +- AI +- machine learning +- MLOps +- software engineering +intro: "How did techniques born in game AI become the foundation for today's LLM-driven agents, and what practical lessons does that 20-year evolution offer to engineers and researchers? In this episode, AI engineer and best-selling author Micheal Lanham walks through the lineage from game AI and multi-agent systems to modern LLM agents, evolutionary algorithms, and contemporary AI tooling.

Micheal brings hands-on experience across games, graphics, GIS, enterprise software, and machine learning, and is the author of Evolutionary Deep Learning, Hands-On Reinforcement Learning for Games, and AI Agents in Action. He discusses how deep reinforcement learning, evolutionary methods, and generative AI intersect to build intelligent systems, and how industry practices from oil and gas to fintech shaped tooling and architectures for multi-agent systems.

Listeners will come away with a clearer view of the technical continuity between game AI and current agent design, practical considerations when applying evolutionary algorithms and reinforcement learning, and what modern AI tooling enables for deploying LLM agents. This episode is useful for AI practitioners, game developers, and anyone interested in the evolution of multi-agent systems, evolutionary algorithms, and agent-based AI." +dateadded: 2025-10-01 +duration: PT01H48S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=0 + endOffset: 67 +- name: 'Career Snapshot: Two Decades from Game AI to AI Agents' + startOffset: 67 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=67 + endOffset: 156 +- name: 'Early Research: Games for Cognitive Testing & Neural Networks' + startOffset: 156 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=156 + endOffset: 195 +- name: 'Industry Experience: Consulting, Product Development, Leadership' + startOffset: 195 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=195 + endOffset: 259 +- name: Evolutionary Algorithms in Industry Optimization + startOffset: 259 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=259 + endOffset: 328 +- name: 'Current Focus: Multi-Agent AI Support Assistants' + startOffset: 328 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=328 + endOffset: 345 +- name: 'Publishing Breakthrough: Reverse-Engineering Pokémon Go & AR' + startOffset: 345 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=345 + endOffset: 456 +- name: Sound Design & Waveform Analysis Applied to Games + startOffset: 456 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=456 + endOffset: 481 +- name: Reinforcement Learning Roots and Alberta Research + startOffset: 481 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=481 + endOffset: 549 +- name: 'Evolutionary Deep Learning: Hyperparameter Search & Architecture Tuning' + startOffset: 549 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=549 + endOffset: 600 +- name: 'Move to NLP: Early LLM Work and Rise of AI Agents' + startOffset: 600 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=600 + endOffset: 849 +- name: Evolutionary Algorithms for Prompt Engineering + startOffset: 849 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=849 + endOffset: 1099 +- name: 'AI Agents Book: Editions, Teaching, and Vibe Coding for Games' + startOffset: 1099 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1099 + endOffset: 1257 +- name: 'Agent Workflow Design: Minimalism and Task Decomposition' + startOffset: 1257 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1257 + endOffset: 1428 +- name: 'Flow vs Orchestration: Sequential Pipelines and Manager Agents' + startOffset: 1428 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1428 + endOffset: 1585 +- name: 'Collaboration Patterns: Parallel Agent Interaction & Use Cases' + startOffset: 1585 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1585 + endOffset: 1891 +- name: 'Agent Tooling: OpenAI Agent SDK and MCP Integration' + startOffset: 1891 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1891 + endOffset: 2005 +- name: 'Sequential Thinking Servers: Internal Reasoning & Scratchpads' + startOffset: 2005 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2005 + endOffset: 2142 +- name: 'Coding Agents in Game Development: Practical Examples' + startOffset: 2142 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2142 + endOffset: 2218 +- name: 'End-to-End Code Generation: GPT-5 Pro Case Studies' + startOffset: 2218 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2218 + endOffset: 2337 +- name: 'Generative AI in Games: Procedural Content and Infinite Playability' + startOffset: 2337 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2337 + endOffset: 2502 +- name: 'Technical Challenges: Implementing Space Invaders with Agents' + startOffset: 2502 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2502 + endOffset: 2740 +- name: 'Local Model Trend: Running LLMs on Private GPUs' + startOffset: 2740 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2740 + endOffset: 2774 +- name: Open-Source Large Models and Low-Latency Providers + startOffset: 2774 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2774 + endOffset: 2920 +- name: 'Model Specialization: Smaller Task-Focused LLMs Emerging' + startOffset: 2920 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2920 + endOffset: 3316 +- name: 'Career Advice: Transitioning to AI Engineering & LLM Skills' + startOffset: 3316 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3316 + endOffset: 3459 +- name: 'Evaluation & Monitoring: Feedback Pipelines and Tools (Arize Phoenix)' + startOffset: 3459 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3459 + endOffset: 3530 +- name: 'Publishing Details: Second Edition and Availability' + startOffset: 3530 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3530 + endOffset: 3623 +- name: Closing Remarks and Links + startOffset: 3623 + url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3623 + endOffset: 3648 transcript: - header: Podcast Introduction - line: Hi everyone, welcome to our event. This event is brought to you by DataTalks.Club, @@ -909,145 +1035,19 @@ transcript: sec: 3648 time: '1:00:48' who: Michael -description: Discover multi-agent AI, evolutionary algorithms and LLM tooling—learn - agent workflows, prompt engineering, game AI examples, code patterns & career tips. -intro: How do you design practical multi-agent AI assistants that scale from game - AI experiments to real-world LLM tooling? In this episode, Micheal Lanham — best‑selling - author and AI engineer with two decades of work across games, graphics, GIS and - machine learning — traces the path from game AI and reinforcement learning to evolutionary - algorithms and modern agent architectures. We explore his research on games for - cognitive testing, evolutionary deep learning for hyperparameter and architecture - search, and how those methods inform prompt engineering and multi-agent workflows. -

Key topics include minimalist agent workflow design and task decomposition, - flow versus orchestration, parallel collaboration patterns, agent tooling such as - the OpenAI Agent SDK and MCP integration, sequential “thinking” servers and scratchpads, - plus practical code examples from game development and GPT-5 Pro case studies. The - conversation also covers generative AI in games, local and open‑source LLM trends, - model specialization, and evaluation/monitoring pipelines. Whether you’re building - AI assistants, experimenting with evolutionary algorithms, or integrating LLM tooling - into products, this episode offers concrete techniques, tooling insights, and career - guidance for AI engineers. -dateadded: '2025-10-01' -duration: PT01H48S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=0 - endOffset: 67 -- name: 'Career Snapshot: Two Decades from Game AI to AI Agents' - startOffset: 67 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=67 - endOffset: 156 -- name: 'Early Research: Games for Cognitive Testing & Neural Networks' - startOffset: 156 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=156 - endOffset: 195 -- name: 'Industry Experience: Consulting, Product Development, Leadership' - startOffset: 195 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=195 - endOffset: 259 -- name: Evolutionary Algorithms in Industry Optimization - startOffset: 259 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=259 - endOffset: 328 -- name: 'Current Focus: Multi-Agent AI Support Assistants' - startOffset: 328 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=328 - endOffset: 345 -- name: 'Publishing Breakthrough: Reverse-Engineering Pokémon Go & AR' - startOffset: 345 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=345 - endOffset: 456 -- name: Sound Design & Waveform Analysis Applied to Games - startOffset: 456 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=456 - endOffset: 481 -- name: Reinforcement Learning Roots and Alberta Research - startOffset: 481 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=481 - endOffset: 549 -- name: 'Evolutionary Deep Learning: Hyperparameter Search & Architecture Tuning' - startOffset: 549 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=549 - endOffset: 600 -- name: 'Move to NLP: Early LLM Work and Rise of AI Agents' - startOffset: 600 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=600 - endOffset: 849 -- name: Evolutionary Algorithms for Prompt Engineering - startOffset: 849 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=849 - endOffset: 1099 -- name: 'AI Agents Book: Editions, Teaching, and Vibe Coding for Games' - startOffset: 1099 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1099 - endOffset: 1257 -- name: 'Agent Workflow Design: Minimalism and Task Decomposition' - startOffset: 1257 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1257 - endOffset: 1428 -- name: 'Flow vs Orchestration: Sequential Pipelines and Manager Agents' - startOffset: 1428 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1428 - endOffset: 1585 -- name: 'Collaboration Patterns: Parallel Agent Interaction & Use Cases' - startOffset: 1585 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1585 - endOffset: 1891 -- name: 'Agent Tooling: OpenAI Agent SDK and MCP Integration' - startOffset: 1891 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=1891 - endOffset: 2005 -- name: 'Sequential Thinking Servers: Internal Reasoning & Scratchpads' - startOffset: 2005 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2005 - endOffset: 2142 -- name: 'Coding Agents in Game Development: Practical Examples' - startOffset: 2142 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2142 - endOffset: 2218 -- name: 'End-to-End Code Generation: GPT-5 Pro Case Studies' - startOffset: 2218 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2218 - endOffset: 2337 -- name: 'Generative AI in Games: Procedural Content and Infinite Playability' - startOffset: 2337 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2337 - endOffset: 2502 -- name: 'Technical Challenges: Implementing Space Invaders with Agents' - startOffset: 2502 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2502 - endOffset: 2740 -- name: 'Local Model Trend: Running LLMs on Private GPUs' - startOffset: 2740 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2740 - endOffset: 2774 -- name: Open-Source Large Models and Low-Latency Providers - startOffset: 2774 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2774 - endOffset: 2920 -- name: 'Model Specialization: Smaller Task-Focused LLMs Emerging' - startOffset: 2920 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=2920 - endOffset: 3316 -- name: 'Career Advice: Transitioning to AI Engineering & LLM Skills' - startOffset: 3316 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3316 - endOffset: 3459 -- name: 'Evaluation & Monitoring: Feedback Pipelines and Tools (Arize Phoenix)' - startOffset: 3459 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3459 - endOffset: 3530 -- name: 'Publishing Details: Second Edition and Availability' - startOffset: 3530 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3530 - endOffset: 3623 -- name: Closing Remarks and Links - startOffset: 3623 - url: https://www.youtube.com/watch?v=DSxqUlumM3A&t=3623 - endOffset: 3648 ---- +context: 'Context: The episode follows a two-decade arc from game-AI research and + evolutionary/RL methods through industry product leadership to present work on LLM-driven + multi-agent assistants—covering technical deep dives (prompt engineering, orchestration + vs flow, sequential thinking servers, code generation, procedural content), tooling + and deployment challenges (local models, model specialization, monitoring), and + career/publishing lessons. + Core narrative: The unifying idea is that practical, production-ready AI agents + are built by applying game-AI engineering principles—minimal, modular task decomposition; + evolutionary and learning-based search; and clear orchestration patterns—to modern + LLMs and multi-agent systems, balancing creative capabilities with efficiency, tooling, + and real-world deployability.' +--- Links: * [Linkedin](https://www.linkedin.com/in/micheal-lanham-189693123/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s15e08-from-data-manager-to-data-architect.md b/_podcast/from-iot-data-engineering-to-leading-data-architect.md similarity index 96% rename from _podcast/s15e08-from-data-manager-to-data-architect.md rename to _podcast/from-iot-data-engineering-to-leading-data-architect.md index 72c1c6d3..a06f1083 100644 --- a/_podcast/s15e08-from-data-manager-to-data-architect.md +++ b/_podcast/from-iot-data-engineering-to-leading-data-architect.md @@ -1,19 +1,123 @@ --- +title: "From Hands-On IoT Data Engineering to Leading Data Architecture: Pipelines, Cloud Adaptation & Analytics Modeling" +short: "From Data Manager to Data Architect" +season: 15 episode: 8 guests: - loicmagnien +image: images/podcast/from-iot-data-engineering-to-leading-data-architect.jpg ids: - anchor: atatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 + anchor: datatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 youtube: qWG--iYO2uc -image: images/podcast/s15e08-from-data-manager-to-data-architect.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Data-Manager-to-Data-Architect---Loc-Magnien-e29rk73 apple: https://podcasts.apple.com/us/podcast/from-data-manager-to-data-architect-lo%C3%AFc-magnien/id1541710331?i=1000629678056 spotify: https://open.spotify.com/episode/7twXPni1q2RJQU2jjbCGty?si=KNCEy-0ZRrWDVchFsDCHjQ youtube: https://www.youtube.com/watch?v=qWG--iYO2uc -season: 15 -short: From Data Manager to Data Architect -title: Build & Scale End-to-End IoT Data Pipelines, Lakehouse & Core Data Modeling +description: "A real-world journey from hands-on IoT data engineering to leading data architecture: covering pipelines, cloud adaptation, analytics modeling, lakehouse design, and the senior responsibilities of a data lead." +intro: "What does it take to evolve from hands-on IoT data engineering to leading data architecture — building scalable pipelines, adapting to cloud platforms, and designing analytics models that serve entire organizations? In this episode, Loïc Magnien, Lead Data at Mylight150 with a decade spanning database management, data engineering, product ownership and architecture, shares his real-world journey from managing sensor data to architecting enterprise-scale data systems.

We explore the progression from tactical IoT work — sensor data aggregation, structural health monitoring, ETL automation for logger ingestion — to strategic architecture responsibilities including cloud fundamentals (Python, Azure), lakehouse design with bronze-silver-gold layering, and core data modeling that aligns cross-functional teams. Loïc breaks down practical patterns for building reusable ingestion and transformation templates, designing dimensions and facts that serve multiple consumers, maintaining data quality expectations across layers, and balancing hands-on engineering with stakeholder engagement. The conversation covers hiring considerations for data teams, scaling responsibilities from individual contributor to lead, leveraging tools like DBT and LLMs for technology scouting, and making pragmatic tradeoffs between reusable components and project-specific solutions.

Listen to discover actionable guidance on architecture outcomes, agile delivery through proofs of concept, and building core models that drive business alignment — plus insights on the senior leadership skills needed to succeed as a data architect in IoT and analytics environments." +topics: +- data engineering +- career transition +- MLOps +dateadded: 2023-10-01 + +duration: PT01H27S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=0 + endOffset: 105 +- name: 'Career overview: From data manager to data lead' + startOffset: 105 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=105 + endOffset: 204 +- name: 'Early role: Sensor data aggregation & structural health monitoring' + startOffset: 204 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=204 + endOffset: 344 +- name: 'Data management vs analyst: responsibilities and data discovery' + startOffset: 344 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=344 + endOffset: 441 +- name: 'Automation to data engineering: ETL, scripting, and process automation' + startOffset: 441 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=441 + endOffset: 561 +- name: 'End-to-end IoT pipelines: loggers, ingestion, and reporting' + startOffset: 561 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=561 + endOffset: 687 +- name: 'Domain expertise: civil engineering aiding data diagnosis' + startOffset: 687 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=687 + endOffset: 891 +- name: 'Adapting to cloud & IoT: learning Python, Azure, and cloud fundamentals' + startOffset: 891 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=891 + endOffset: 1261 +- name: 'Hiring mindset: evaluating experience, scale, and cloud adaptability' + startOffset: 1261 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1261 + endOffset: 1367 +- name: 'Data architect role: seniority, end-to-end ownership, and modeling' + startOffset: 1367 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1367 + endOffset: 1640 +- name: 'Architecture outcome: team alignment and optimized data processes' + startOffset: 1640 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1640 + endOffset: 1796 +- name: 'Lakehouse layering: bronze, silver, gold and data quality expectations' + startOffset: 1796 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1796 + endOffset: 1978 +- name: 'Analytics modeling: dimensions, facts, metrics, and stakeholder discovery' + startOffset: 1978 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1978 + endOffset: 2160 +- name: 'Core model strategy: supporting multiple consumers and departments' + startOffset: 2160 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2160 + endOffset: 2230 +- name: 'Role balance: hands-on engineering vs stakeholder engagement over time' + startOffset: 2230 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2230 + endOffset: 2551 +- name: 'Empowerment & prioritization: scaling teams and aligning with business goals' + startOffset: 2551 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2551 + endOffset: 2653 +- name: 'Staying technical: one-on-ones, demos, and hands-on proofs of concept' + startOffset: 2653 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2653 + endOffset: 3045 +- name: 'Technology scouting: DBT, LLMs, newsletters and community curation' + startOffset: 3045 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3045 + endOffset: 3208 +- name: 'Agile delivery: draft specs, proof of concept pipelines, and iteration' + startOffset: 3208 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3208 + endOffset: 3432 +- name: 'Reusable templates: ingestion, transformation, and datamart patterns' + startOffset: 3432 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3432 + endOffset: 3574 +- name: 'Design tradeoffs: reusable components vs project-specific solutions' + startOffset: 3574 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3574 + endOffset: 3651 +- name: 'Follow-up: guest contact and LinkedIn connection' + startOffset: 3651 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3651 + endOffset: 3691 +- name: Episode recap & closing + startOffset: 3691 + url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3691 + endOffset: 3627 + transcript: - header: Podcast Introduction - header: 'Career overview: From data manager to data lead' @@ -937,118 +1041,6 @@ transcript: sec: 3732 time: '1:02:12' who: Loïc -description: Master end-to-end IoT data pipelines, lakehouse & data modeling, learn - ETL, ingestion patterns and core model strategies to scale analytics and speed delivery. -intro: How do you build and scale end-to-end IoT data pipelines and a lakehouse that - supports reliable core data modeling across teams? In this episode, Loïc Magnien, - Lead Data at Mylight150 with a decade in database management, data engineering, - product ownership and architecture, walks through practical patterns for IoT pipelines, - lakehouse design and analytics modeling. We cover sensor data aggregation and structural - health monitoring, ETL and automation for ingestion from loggers, cloud fundamentals - (Python, Azure), and the move from data management to data architect responsibilities. - Loïc explains lakehouse layering (bronze, silver, gold) and data quality expectations, - how to design dimensions, facts and metrics to serve multiple consumers, and strategies - for reusable ingestion, transformation and datamart templates. He also discusses - hiring and team scale, balancing hands-on engineering with stakeholder engagement, - using DBT and LLMs for technology scouting, and pragmatic tradeoffs between reusable - components and project-specific solutions. Listen to learn actionable guidance on - architecture outcomes, agile delivery with proofs of concept, and building core - models that align teams and business goals. -dateadded: '2023-10-01' -duration: PT01H27S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=0 - endOffset: 105 -- name: 'Career overview: From data manager to data lead' - startOffset: 105 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=105 - endOffset: 204 -- name: 'Early role: Sensor data aggregation & structural health monitoring' - startOffset: 204 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=204 - endOffset: 344 -- name: 'Data management vs analyst: responsibilities and data discovery' - startOffset: 344 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=344 - endOffset: 441 -- name: 'Automation to data engineering: ETL, scripting, and process automation' - startOffset: 441 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=441 - endOffset: 561 -- name: 'End-to-end IoT pipelines: loggers, ingestion, and reporting' - startOffset: 561 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=561 - endOffset: 687 -- name: 'Domain expertise: civil engineering aiding data diagnosis' - startOffset: 687 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=687 - endOffset: 891 -- name: 'Adapting to cloud & IoT: learning Python, Azure, and cloud fundamentals' - startOffset: 891 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=891 - endOffset: 1261 -- name: 'Hiring mindset: evaluating experience, scale, and cloud adaptability' - startOffset: 1261 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1261 - endOffset: 1367 -- name: 'Data architect role: seniority, end-to-end ownership, and modeling' - startOffset: 1367 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1367 - endOffset: 1640 -- name: 'Architecture outcome: team alignment and optimized data processes' - startOffset: 1640 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1640 - endOffset: 1796 -- name: 'Lakehouse layering: bronze, silver, gold and data quality expectations' - startOffset: 1796 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1796 - endOffset: 1978 -- name: 'Analytics modeling: dimensions, facts, metrics, and stakeholder discovery' - startOffset: 1978 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=1978 - endOffset: 2160 -- name: 'Core model strategy: supporting multiple consumers and departments' - startOffset: 2160 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2160 - endOffset: 2230 -- name: 'Role balance: hands-on engineering vs stakeholder engagement over time' - startOffset: 2230 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2230 - endOffset: 2551 -- name: 'Empowerment & prioritization: scaling teams and aligning with business goals' - startOffset: 2551 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2551 - endOffset: 2653 -- name: 'Staying technical: one-on-ones, demos, and hands-on proofs of concept' - startOffset: 2653 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=2653 - endOffset: 3045 -- name: 'Technology scouting: DBT, LLMs, newsletters and community curation' - startOffset: 3045 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3045 - endOffset: 3208 -- name: 'Agile delivery: draft specs, proof of concept pipelines, and iteration' - startOffset: 3208 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3208 - endOffset: 3432 -- name: 'Reusable templates: ingestion, transformation, and datamart patterns' - startOffset: 3432 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3432 - endOffset: 3574 -- name: 'Design tradeoffs: reusable components vs project-specific solutions' - startOffset: 3574 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3574 - endOffset: 3651 -- name: 'Follow-up: guest contact and LinkedIn connection' - startOffset: 3651 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3651 - endOffset: 3691 -- name: Episode recap & closing - startOffset: 3691 - url: https://www.youtube.com/watch?v=qWG--iYO2uc&t=3691 - endOffset: 3627 --- Links: diff --git a/_podcast/s19e05-large-hadron-collider-and-mentorship.md b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md similarity index 94% rename from _podcast/s19e05-large-hadron-collider-and-mentorship.md rename to _podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md index 5062a31f..7f396bab 100644 --- a/_podcast/s19e05-large-hadron-collider-and-mentorship.md +++ b/_podcast/from-large-hadron-collider-to-data-science-research-software-engineering.md @@ -1,20 +1,139 @@ --- +title: "From Collider Physics to Data Science: Research Software Engineering, Interview Prep & Mentorship" +short: "Large Hadron Collider and Mentorship" +season: 19 episode: 5 guests: - anastasiakaravdina +image: images/podcast/from-large-hadron-collider-to-data-science-research-software-engineering.jpg ids: - anchor: atalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth + anchor: datatalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth youtube: kV0ZDy2UtJA -image: images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Large-Hadron-Collider-and-Mentorship--Anastasia-Karavdina-e2rc2bj/a-abl5fth apple: https://podcasts.apple.com/us/podcast/large-hadron-collider-and-mentorship-anastasia-karavdina/id1541710331?i=1000677930293 spotify: https://open.spotify.com/episode/6AZ26Q8O4VBkC9YtUNzhab?si=75154323e14d4dca youtube: https://www.youtube.com/watch?v=kV0ZDy2UtJA -season: 19 -short: Large Hadron Collider and Mentorship -title: 'From Collider Physics to Data Science: Research Software Engineering, Interview - Prep & Mentorship' +description: "Discover how collider physics skills power data science careers and research software engineering; gain interview prep tactics, mentorship tips, and career growth." +topics: +- machine learning +- data science +- MLOps +- software engineering +- career transition +- academia +intro: "How do you move from collider physics to industry data science while keeping rigorous research software engineering practices, succeeding in interviews, and giving or getting effective mentorship? In this episode Anastasia Karavdina — a particle physicist turned data scientist who worked on Large Hadron Collider experiments and later built AI solutions at Blue Yonder and Kaufland e-commerce — walks through that journey.

We start with collider basics (particle acceleration, detector imaging, event volumes, and roles in large collaborations) to show the data scale and statistical thinking that map to industry. Anastasia explains dual hardware-and-analysis roles, how multivariate analysis translates to machine learning, and concrete research software engineering practices like version control and CI/CD. She also covers interview prep (technical fit, behavioral stories, cultural fit in Germany) and evolving hiring expectations, plus supply chain AI use cases. Finally, she discusses mentoring — how she started, structuring mentorship, paid vs. free options, and platforms like MentorCruise.

Listen to learn practical steps for translating physics expertise into data science careers, applying RSE workflows, preparing interview narratives, and finding mentorship to accelerate your next move." +dateadded: 2024-12-17 +duration: PT01H01M22S +quotableClips: +- name: Episode Opening & Guest Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=0 + endOffset: 106 +- name: Guest Background Snapshot + startOffset: 106 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=106 + endOffset: 242 +- name: 'Origins: From Novokuznetsk, Siberia' + startOffset: 242 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=242 + endOffset: 318 +- name: Relocation & Life in Hamburg + startOffset: 318 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=318 + endOffset: 362 +- name: 'Collider Purpose: Exploring Fundamental Particles' + startOffset: 362 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=362 + endOffset: 450 +- name: Particle Acceleration & Detector Imaging + startOffset: 450 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=450 + endOffset: 575 +- name: Collider Magnets & Data Capture Scale + startOffset: 575 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=575 + endOffset: 675 +- name: Event Volume & Statistical Analysis in Particle Physics + startOffset: 675 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=675 + endOffset: 760 +- name: Roles & Specializations in Large Research Collaborations + startOffset: 760 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=760 + endOffset: 993 +- name: 'Dual Roles: Hardware Development and Data Analysis' + startOffset: 993 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=993 + endOffset: 1066 +- name: Scientific Goals & Safety Myths (Higgs, Dark Matter, Black Holes) + startOffset: 1066 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1066 + endOffset: 1235 +- name: Translating Research Skills into Industry Data Science + startOffset: 1235 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1235 + endOffset: 1420 +- name: 'Research Software Engineering: Version Control & CI/CD Practices' + startOffset: 1420 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1420 + endOffset: 1471 +- name: 'Jargon Translation: Multivariate Analysis to Machine Learning' + startOffset: 1471 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1471 + endOffset: 1590 +- name: 'Interview Challenges: Position Fit & Evolving Hiring Expectations' + startOffset: 1590 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1590 + endOffset: 1680 +- name: 'Blue Yonder & Supply Chain AI: From Physics to Enterprise ML' + startOffset: 1680 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1680 + endOffset: 1765 +- name: 'Career Shift: From Industry Roles to Mentoring Focus' + startOffset: 1765 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1765 + endOffset: 2043 +- name: Behavioral Interview Strategies & Cultural Fit in Germany + startOffset: 2043 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2043 + endOffset: 2273 +- name: Preparing Stories & Practicing Leadership Principles + startOffset: 2273 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2273 + endOffset: 2486 +- name: 'Becoming a Mentor: Motivation, Rewards & Burnout' + startOffset: 2486 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2486 + endOffset: 2650 +- name: 'Mentoring Origins: Accidental Start & Finding the Right Fit' + startOffset: 2650 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2650 + endOffset: 2711 +- name: 'Mentoring Defined: Process, Goals & Time Commitment' + startOffset: 2711 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2711 + endOffset: 2965 +- name: 'Approaching Mentors: Clear Goals & Manageable Requests' + startOffset: 2965 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2965 + endOffset: 3052 +- name: 'Paid Mentorship vs. Free Communities: Pros & Cons' + startOffset: 3052 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3052 + endOffset: 3344 +- name: 'Mentorship as Career Leverage: Promotions & Leadership Experience' + startOffset: 3344 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3344 + endOffset: 3582 +- name: Finding Mentoring Communities & Platforms (MentorCruise, others) + startOffset: 3582 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3582 + endOffset: 3652 +- name: Episode Wrap-up & Key Takeaways + startOffset: 3652 + url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3652 + endOffset: 3682 transcript: - header: Episode Opening & Guest Introduction - line: This week, we’ll talk about your career. Specifically, your transition from @@ -896,139 +1015,16 @@ transcript: sec: 3682 time: '1:01:22' who: Alexey -description: Discover research software engineering, data science, and mentorship - strategies from a collider physicist - interview prep, CI/CD practices, and career-shift - tips. -intro: 'How do you pivot from collider physics to industry data science while mastering - research software engineering, interview prep, and mentorship? In this episode, Anastasia - Karavdina — a particle physicist turned data scientist with experience at Large - Hadron Collider experiments, Blue Yonder, and Kaufland e‑commerce — walks through - that transition and the concrete skills that made it possible.

We unpack - collider physics basics (particle acceleration, detector imaging, event volumes, - statistical analysis), roles in large research collaborations, and how hardware - development and data analysis intersect. Anastasia explains how research software - engineering practices — version control, CI/CD, and reproducible workflows — translate - into enterprise machine learning and supply chain AI. She also covers interview - challenges (position fit, evolving hiring expectations, behavioral interviews and - cultural fit in Germany), how to prepare leadership stories, and practical tactics - for moving into ML engineer and data science roles. Finally, she discusses mentoring: - motivation, boundaries, paid vs free options, and platforms like MentorCruise.

- Listen to gain actionable guidance on translating high‑energy physics expertise - into data science, improving technical interview performance, and building effective - mentorship relationships.' -dateadded: '2024-12-17' -duration: PT01H01M22S -quotableClips: -- name: Episode Opening & Guest Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=0 - endOffset: 106 -- name: Guest Background Snapshot - startOffset: 106 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=106 - endOffset: 242 -- name: 'Origins: From Novokuznetsk, Siberia' - startOffset: 242 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=242 - endOffset: 318 -- name: Relocation & Life in Hamburg - startOffset: 318 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=318 - endOffset: 362 -- name: 'Collider Purpose: Exploring Fundamental Particles' - startOffset: 362 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=362 - endOffset: 450 -- name: Particle Acceleration & Detector Imaging - startOffset: 450 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=450 - endOffset: 575 -- name: Collider Magnets & Data Capture Scale - startOffset: 575 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=575 - endOffset: 675 -- name: Event Volume & Statistical Analysis in Particle Physics - startOffset: 675 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=675 - endOffset: 760 -- name: Roles & Specializations in Large Research Collaborations - startOffset: 760 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=760 - endOffset: 993 -- name: 'Dual Roles: Hardware Development and Data Analysis' - startOffset: 993 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=993 - endOffset: 1066 -- name: Scientific Goals & Safety Myths (Higgs, Dark Matter, Black Holes) - startOffset: 1066 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1066 - endOffset: 1235 -- name: Translating Research Skills into Industry Data Science - startOffset: 1235 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1235 - endOffset: 1420 -- name: 'Research Software Engineering: Version Control & CI/CD Practices' - startOffset: 1420 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1420 - endOffset: 1471 -- name: 'Jargon Translation: Multivariate Analysis to Machine Learning' - startOffset: 1471 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1471 - endOffset: 1590 -- name: 'Interview Challenges: Position Fit & Evolving Hiring Expectations' - startOffset: 1590 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1590 - endOffset: 1680 -- name: 'Blue Yonder & Supply Chain AI: From Physics to Enterprise ML' - startOffset: 1680 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1680 - endOffset: 1765 -- name: 'Career Shift: From Industry Roles to Mentoring Focus' - startOffset: 1765 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=1765 - endOffset: 2043 -- name: Behavioral Interview Strategies & Cultural Fit in Germany - startOffset: 2043 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2043 - endOffset: 2273 -- name: Preparing Stories & Practicing Leadership Principles - startOffset: 2273 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2273 - endOffset: 2486 -- name: 'Becoming a Mentor: Motivation, Rewards & Burnout' - startOffset: 2486 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2486 - endOffset: 2650 -- name: 'Mentoring Origins: Accidental Start & Finding the Right Fit' - startOffset: 2650 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2650 - endOffset: 2711 -- name: 'Mentoring Defined: Process, Goals & Time Commitment' - startOffset: 2711 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2711 - endOffset: 2965 -- name: 'Approaching Mentors: Clear Goals & Manageable Requests' - startOffset: 2965 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=2965 - endOffset: 3052 -- name: 'Paid Mentorship vs. Free Communities: Pros & Cons' - startOffset: 3052 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3052 - endOffset: 3344 -- name: 'Mentorship as Career Leverage: Promotions & Leadership Experience' - startOffset: 3344 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3344 - endOffset: 3582 -- name: Finding Mentoring Communities & Platforms (MentorCruise, others) - startOffset: 3582 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3582 - endOffset: 3652 -- name: Episode Wrap-up & Key Takeaways - startOffset: 3652 - url: https://www.youtube.com/watch?v=kV0ZDy2UtJA&t=3652 - endOffset: 3682 ---- +context: 'Context: A physicist’s journey from building and analyzing massive collider + experiments to applying those technical, collaborative, and software-engineering + skills in industry—culminating in a deliberate turn toward mentoring others through + career transitions and leadership challenges. + Core theme: Experimental physics training—rooted in tackling large-scale data, complex + systems, rigorous software and teamwork practices—is a powerful, transferable foundation, + and mentorship is the essential bridge that translates that expertise into effective + industry roles, career progression, and leadership.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/dr-anastasia-karavdina/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s11e07-from-digital-marketing-to-analytics-engineering.md b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md similarity index 95% rename from _podcast/s11e07-from-digital-marketing-to-analytics-engineering.md rename to _podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md index 7fd8798c..edc5a5d2 100644 --- a/_podcast/s11e07-from-digital-marketing-to-analytics-engineering.md +++ b/_podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.md @@ -1,19 +1,145 @@ --- +title: "Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook" +short: "From Digital Marketing to Analytics Engineering" +season: 11 episode: 7 guests: - nikolamaksimovic +image: images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg ids: anchor: From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s youtube: GawJ7mG5ElQ -image: images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s apple: https://podcasts.apple.com/us/podcast/from-digital-marketing-to-analytics-engineering-nikola/id1541710331?i=1000586740912 spotify: https://open.spotify.com/episode/5VwS6ijaToirTzR7Xd5Phw?si=OsOVLOzBSt2sIgvbRS3krg youtube: https://www.youtube.com/watch?v=GawJ7mG5ElQ -season: 11 -short: From Digital Marketing to Analytics Engineering -title: 'Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook' + +description: "Discover DBT, SQL & data modeling tactics for pivoting into analytics engineering: learn migration, tooling, A/B testing, and a career playbook to get hired." +intro: "How do you transition from digital marketing into analytics engineering—and master DBT, SQL, and data modeling in the process? In this episode, Nikola Maksimovic shares his complete career transformation journey, from startup marketing roles in London and Berlin to growth marketing at Ecosia, and ultimately his pandemic-driven pivot into BI and analytics engineering. Nikola reveals the step-by-step learning path that worked for him—SQL fundamentals, hands-on BI projects, strategic conversations with internal data teams—plus the essential technical skills that got him hired: advanced SQL, data pipeline understanding, and Python foundations.

You'll get an inside look at real analytics engineering work: spearheading a company-wide DBT migration, navigating data modeling decisions (wide vs narrow tables, incremental strategies), and working with modern data stacks including Snowplow, DBT, Looker/LookML, Redshift, Airflow, Airbyte, and Redash. We also explore A/B testing frameworks, product analytics implementation, and the nuanced differences between analytics engineer and data analyst roles. Nikola shares his proven transition playbook (Excel → SQL → dashboards → meaningful projects), networking tactics that opened doors, mentorship approaches, and the communities and resources that accelerated his learning.

Whether you're in marketing, operations, or any non-technical role considering a move into data, this episode provides a concrete roadmap with actionable steps, realistic timelines, and insider insights to help you successfully pivot into analytics engineering." +topics: +- data science +- analytics engineering +- career transition +- tools +dateadded: 2022-11-19 + +duration: PT00H54M34S + +quotableClips: +- name: 'Episode Overview: Switching from Marketing to Analytics Engineering' + startOffset: 0 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=0 + endOffset: 32 +- name: 'Early Career & Startup Experience: London, Berlin, Movinga' + startOffset: 32 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=32 + endOffset: 64 +- name: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' + startOffset: 64 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=64 + endOffset: 173 +- name: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' + startOffset: 173 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=173 + endOffset: 438 +- name: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' + startOffset: 438 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=438 + endOffset: 525 +- name: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' + startOffset: 525 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=525 + endOffset: 593 +- name: 'Internal Pathway: Conversations with BI Team and Required Skills' + startOffset: 593 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=593 + endOffset: 662 +- name: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' + startOffset: 662 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=662 + endOffset: 770 +- name: 'Transition Phase: Balancing Marketing Work and BI Projects' + startOffset: 770 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=770 + endOffset: 854 +- name: 'Current Responsibilities: Analytics Engineering, Product Support & A/B Testing' + startOffset: 854 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=854 + endOffset: 1114 +- name: 'Data Modeling in Practice: DBT Migration and Transformation Layers' + startOffset: 1114 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1114 + endOffset: 1234 +- name: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, + Redash' + startOffset: 1234 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1234 + endOffset: 1328 +- name: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' + startOffset: 1328 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1328 + endOffset: 1392 +- name: 'Looker & LookML Experience: Reporting and Dashboard Building' + startOffset: 1392 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 + endOffset: 1491 +- name: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' + startOffset: 1491 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 + endOffset: 1506 +- name: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational + Fit' + startOffset: 1506 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1506 + endOffset: 1720 +- name: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' + startOffset: 1720 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1720 + endOffset: 1828 +- name: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' + startOffset: 1828 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1828 + endOffset: 2026 +- name: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' + startOffset: 2026 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 + endOffset: 2130 +- name: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and Udemy + SQL' + startOffset: 2130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 + endOffset: 2307 +- name: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' + startOffset: 2307 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2307 + endOffset: 2376 +- name: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' + startOffset: 2376 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2376 + endOffset: 2510 +- name: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' + startOffset: 2510 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2510 + endOffset: 2709 +- name: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' + startOffset: 2709 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2709 + endOffset: 3023 +- name: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' + startOffset: 3023 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3023 + endOffset: 3130 +- name: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally + Optimistic)' + startOffset: 3130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 + endOffset: 3226 +- name: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' + startOffset: 3226 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 + endOffset: 3274 + transcript: - header: 'Episode Overview: Switching from Marketing to Analytics Engineering' - line: This week, we'll talk about switching careers from marketing to analytics @@ -440,7 +566,7 @@ transcript: sec: 1487 time: '24:47' who: Nikola -- header: 'Infrastructure Choices: Self‑Hosted Tooling vs DBT Cloud' +- header: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' - line: Do you host all these things yourself? For example, when it comes to DBT, do you use their cloud? sec: 1491 @@ -630,7 +756,7 @@ transcript: sec: 2127 time: '35:27' who: Alexey -- header: 'Nontraditional Background: Classics to Data — Just‑In‑Time Learning and +- header: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and Udemy SQL' - line: No, I studied classics, which are Latin and ancient Greek. [laughs] sec: 2130 @@ -940,7 +1066,7 @@ transcript: sec: 3221 time: '53:41' who: Nikola -- header: 'Contact & Wrap‑Up: Finding Nikola on LinkedIn and Episode Close' +- header: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' - line: Profoundly Optimistic is also a good name. [both laugh] If somebody has questions for you, how can they find you? Is it LinkedIn or are there some other ways to contact you? @@ -963,139 +1089,6 @@ transcript: sec: 3274 time: '54:34' who: Nikola -description: 'Discover DBT, SQL & data modeling tactics for pivoting into analytics - engineering: learn migration, tooling, A/B testing, and a career playbook to get - hired.' -intro: 'How do you move from marketing into analytics engineering—and learn DBT, SQL, - and data modeling along the way? In this episode, Nikola Maksimovic walks through - that exact career pivot, from early startup roles in London and Berlin to growth - marketing at Ecosia and a pandemic‑era shift toward BI and analytics engineering. - Nikola outlines the practical learning path—SQL courses, small BI projects, conversations - with internal BI teams—and the core technical skills you’ll need: advanced SQL, - data pipeline familiarity, and Python basics.

We dig into real-world analytics - engineering work: leading a DBT migration, data modeling tradeoffs (wide vs narrow - tables, incrementalization), tooling stacks like Snowplow, DBT, Looker/LookML, Redshift, - Airflow, Airbyte and Redash, plus A/B testing and product analytics use cases. Nikola - also shares a transition playbook (Excel → SQL → dashboards → projects), mentorship - and networking strategies, and recommended reading and communities.

Listen - to get a practical career playbook and actionable guidance on SQL, DBT, data modeling, - and the organizational fit between analytics engineer and data analyst. Find Nikola - on LinkedIn (nikola-maksimovic-40188183).' -dateadded: '2022-11-19' -duration: PT00H54M34S -quotableClips: -- name: 'Episode Overview: Switching from Marketing to Analytics Engineering' - startOffset: 0 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=0 - endOffset: 32 -- name: 'Early Career & Startup Experience: London, Berlin, Movinga' - startOffset: 32 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=32 - endOffset: 64 -- name: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' - startOffset: 64 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=64 - endOffset: 173 -- name: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' - startOffset: 173 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=173 - endOffset: 438 -- name: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' - startOffset: 438 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=438 - endOffset: 525 -- name: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' - startOffset: 525 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=525 - endOffset: 593 -- name: 'Internal Pathway: Conversations with BI Team and Required Skills' - startOffset: 593 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=593 - endOffset: 662 -- name: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' - startOffset: 662 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=662 - endOffset: 770 -- name: 'Transition Phase: Balancing Marketing Work and BI Projects' - startOffset: 770 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=770 - endOffset: 854 -- name: 'Current Responsibilities: Analytics Engineering, Product Support & A/B Testing' - startOffset: 854 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=854 - endOffset: 1114 -- name: 'Data Modeling in Practice: DBT Migration and Transformation Layers' - startOffset: 1114 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1114 - endOffset: 1234 -- name: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, - Redash' - startOffset: 1234 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1234 - endOffset: 1328 -- name: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' - startOffset: 1328 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1328 - endOffset: 1392 -- name: 'Looker & LookML Experience: Reporting and Dashboard Building' - startOffset: 1392 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 - endOffset: 1491 -- name: 'Infrastructure Choices: Self‑Hosted Tooling vs DBT Cloud' - startOffset: 1491 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 - endOffset: 1506 -- name: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational - Fit' - startOffset: 1506 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1506 - endOffset: 1720 -- name: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' - startOffset: 1720 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1720 - endOffset: 1828 -- name: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' - startOffset: 1828 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1828 - endOffset: 2026 -- name: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' - startOffset: 2026 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 - endOffset: 2130 -- name: 'Nontraditional Background: Classics to Data — Just‑In‑Time Learning and Udemy - SQL' - startOffset: 2130 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 - endOffset: 2307 -- name: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' - startOffset: 2307 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2307 - endOffset: 2376 -- name: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' - startOffset: 2376 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2376 - endOffset: 2510 -- name: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' - startOffset: 2510 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2510 - endOffset: 2709 -- name: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' - startOffset: 2709 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2709 - endOffset: 3023 -- name: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' - startOffset: 3023 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3023 - endOffset: 3130 -- name: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally - Optimistic)' - startOffset: 3130 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 - endOffset: 3226 -- name: 'Contact & Wrap‑Up: Finding Nikola on LinkedIn and Episode Close' - startOffset: 3226 - url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 - endOffset: 3274 --- Links: diff --git a/_podcast/from-marketing-to-product-owner-in-search.md b/_podcast/from-marketing-to-product-owner-in-search.md new file mode 100644 index 00000000..edc5a5d2 --- /dev/null +++ b/_podcast/from-marketing-to-product-owner-in-search.md @@ -0,0 +1,1096 @@ +--- +title: "Marketing to Analytics Engineering: DBT, SQL, Data Modeling & Career Playbook" +short: "From Digital Marketing to Analytics Engineering" +season: 11 +episode: 7 +guests: +- nikolamaksimovic +image: images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg +ids: + anchor: From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s + youtube: GawJ7mG5ElQ +links: + anchor: https://anchor.fm/datatalksclub/episodes/From-Digital-Marketing-to-Analytics-Engineering---Nikola-Maksimovic-e1qr75s + apple: https://podcasts.apple.com/us/podcast/from-digital-marketing-to-analytics-engineering-nikola/id1541710331?i=1000586740912 + spotify: https://open.spotify.com/episode/5VwS6ijaToirTzR7Xd5Phw?si=OsOVLOzBSt2sIgvbRS3krg + youtube: https://www.youtube.com/watch?v=GawJ7mG5ElQ + +description: "Discover DBT, SQL & data modeling tactics for pivoting into analytics engineering: learn migration, tooling, A/B testing, and a career playbook to get hired." +intro: "How do you transition from digital marketing into analytics engineering—and master DBT, SQL, and data modeling in the process? In this episode, Nikola Maksimovic shares his complete career transformation journey, from startup marketing roles in London and Berlin to growth marketing at Ecosia, and ultimately his pandemic-driven pivot into BI and analytics engineering. Nikola reveals the step-by-step learning path that worked for him—SQL fundamentals, hands-on BI projects, strategic conversations with internal data teams—plus the essential technical skills that got him hired: advanced SQL, data pipeline understanding, and Python foundations.

You'll get an inside look at real analytics engineering work: spearheading a company-wide DBT migration, navigating data modeling decisions (wide vs narrow tables, incremental strategies), and working with modern data stacks including Snowplow, DBT, Looker/LookML, Redshift, Airflow, Airbyte, and Redash. We also explore A/B testing frameworks, product analytics implementation, and the nuanced differences between analytics engineer and data analyst roles. Nikola shares his proven transition playbook (Excel → SQL → dashboards → meaningful projects), networking tactics that opened doors, mentorship approaches, and the communities and resources that accelerated his learning.

Whether you're in marketing, operations, or any non-technical role considering a move into data, this episode provides a concrete roadmap with actionable steps, realistic timelines, and insider insights to help you successfully pivot into analytics engineering." +topics: +- data science +- analytics engineering +- career transition +- tools +dateadded: 2022-11-19 + +duration: PT00H54M34S + +quotableClips: +- name: 'Episode Overview: Switching from Marketing to Analytics Engineering' + startOffset: 0 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=0 + endOffset: 32 +- name: 'Early Career & Startup Experience: London, Berlin, Movinga' + startOffset: 32 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=32 + endOffset: 64 +- name: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' + startOffset: 64 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=64 + endOffset: 173 +- name: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' + startOffset: 173 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=173 + endOffset: 438 +- name: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' + startOffset: 438 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=438 + endOffset: 525 +- name: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' + startOffset: 525 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=525 + endOffset: 593 +- name: 'Internal Pathway: Conversations with BI Team and Required Skills' + startOffset: 593 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=593 + endOffset: 662 +- name: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' + startOffset: 662 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=662 + endOffset: 770 +- name: 'Transition Phase: Balancing Marketing Work and BI Projects' + startOffset: 770 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=770 + endOffset: 854 +- name: 'Current Responsibilities: Analytics Engineering, Product Support & A/B Testing' + startOffset: 854 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=854 + endOffset: 1114 +- name: 'Data Modeling in Practice: DBT Migration and Transformation Layers' + startOffset: 1114 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1114 + endOffset: 1234 +- name: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, + Redash' + startOffset: 1234 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1234 + endOffset: 1328 +- name: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' + startOffset: 1328 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1328 + endOffset: 1392 +- name: 'Looker & LookML Experience: Reporting and Dashboard Building' + startOffset: 1392 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1392 + endOffset: 1491 +- name: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' + startOffset: 1491 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1491 + endOffset: 1506 +- name: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational + Fit' + startOffset: 1506 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1506 + endOffset: 1720 +- name: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' + startOffset: 1720 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1720 + endOffset: 1828 +- name: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' + startOffset: 1828 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=1828 + endOffset: 2026 +- name: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' + startOffset: 2026 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2026 + endOffset: 2130 +- name: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and Udemy + SQL' + startOffset: 2130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2130 + endOffset: 2307 +- name: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' + startOffset: 2307 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2307 + endOffset: 2376 +- name: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' + startOffset: 2376 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2376 + endOffset: 2510 +- name: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' + startOffset: 2510 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2510 + endOffset: 2709 +- name: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' + startOffset: 2709 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=2709 + endOffset: 3023 +- name: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' + startOffset: 3023 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3023 + endOffset: 3130 +- name: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally + Optimistic)' + startOffset: 3130 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3130 + endOffset: 3226 +- name: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' + startOffset: 3226 + url: https://www.youtube.com/watch?v=GawJ7mG5ElQ&t=3226 + endOffset: 3274 + +transcript: +- header: 'Episode Overview: Switching from Marketing to Analytics Engineering' +- line: This week, we'll talk about switching careers from marketing to analytics + engineering. We have a special guest today, Nikola. Nikki started her career as + a performance marketing specialist and quickly realized that she needs to rely + on data to make good decisions. That's how her data journey started and she eventually + became an analytics engineer. In this interview, we will find out how that happened. + Welcome to our event. + sec: 0 + time: 0:00 + who: Alexey +- line: Thank you very much for having me. + sec: 30 + time: 0:30 + who: Nikola +- header: 'Early Career & Startup Experience: London, Berlin, Movinga' +- line: I want to mention – this is something new – questions for this interview were + prepared by Leat Shemesh, and Victoria Perez Mola, so thanks a lot for your help + in preparing the questions. If anyone here who is listening and wants to help + us prepare for more interviews in the future, please reach out to me. Okay, let's + start. Before we go into our main topic of switching to analytics engineering, + let's start with your background. Can you tell us about your career journey so + far? + sec: 32 + time: 0:32 + who: Alexey +- header: 'Marketing Role at Ecosia: Generalist Tasks and Responsibility Growth' +- line: Yeah, of course. I actually studied in the UK, in London, and I moved over + to Berlin soon after graduating from my Bachelor's quite spontaneously. I found + myself just in the data startup scene, like many English-speaking people do, [chuckles] + because it was pretty much the only available route. So I started out working + for Movinga, which was a big removals startup, backed by Rocket Internet. I was + working in the operations team there. I kind of had my first taste of working + at a startup there. + sec: 64 + time: '1:04' + who: Nikola +- line: Of course, as you can imagine, that was very intense – fast growth, lots of + change. It was kind of a baptism of fire for six months. After that, I found a + job at Ecosia. I was really following Ecosia really closely because I really was + inspired by the business model and the mission. For those who don't know, Ecosia + is the search engine that uses its profits to plant trees. It's essentially a + purpose company, which means that profits are basically entirely used towards + financing the tree planting project. Then I suddenly saw a job for a generalist + marketing role, which I applied for. My first role at Ecosia was actually sort + of more generic marketing. + sec: 64 + time: '1:04' + who: Nikola +- line: Was it something that you also did at Movinga? Was it something different? + sec: 166 + time: '2:46' + who: Alexey +- header: 'Performance Marketing: Rapid Feedback Loops and Data-Driven Optimization' +- line: It was completely different. But it was a generalist kind of junior role, + where you're helping write press releases, think up campaigns, reach out to potential + partners – this kind of work. I've done quite a lot of that through university + when I've been volunteering for an organization that helps students get into volunteering. + Through that kind of work, and through more not professional work, but rather + just more organizing, political work, event planning and stuff I've done at university, + that's kind of where I had built up those organizational and marketing skills + from. That's essentially what I ended up really speaking about largely at the + interview. + sec: 173 + time: '2:53' + who: Nikola +- line: We were a really small company with 15 people when I joined. So it was really + one of those early-stage startups. We were doing whatever job needed to be done. + Sometimes it was replying to user feedback, other times it was helping test a + new app design – all sorts of things. At some point, I felt like I really wanted + to go deeper into an area and I basically started running the paid campaigns that + we started doing after I joined the company. First it was on Facebook, but later + on we expanded onto YouTube and Instagram. And I really enjoyed that. I found + it very helpful to really focus in on a specific area. + sec: 173 + time: '2:53' + who: Nikola +- line: Something I found very gratifying about performance marketing was that you + get results very quickly, so you can kind of really see what's working and what's + not working. As opposed to other areas of marketing where something like a press + campaign or brand activation, where it's not necessarily clear what impact that + might have had right away. Sometimes it's really difficult or almost impossible + to measure, which I found very frustrating. [chuckles] But with performance marketing, + you're given the data immediately and you can analyze that and make a decision + in minutes on how to move forward. I got really, really into that. + sec: 173 + time: '2:53' + who: Nikola +- line: Of course, there are so many online resources for performance marketing, and + in general. It's a relatively new discipline as well, in the grand scheme of the + history of marketing. So I was really able to dive into that by myself, largely. + I was given a lot of responsibility at the company as well, so I learned that + way. I did that for two years. At some point, I also started to kind of think, + “Okay. Well, I feel like I've kind of understood this. It maybe has its limits + in terms of what's interesting or not.” The part that I really enjoyed was looking + at the data coming in, analyzing what the click-through rates are saying, what + the conversion rates are and what that means. “How can we optimize this campaign + based on the data that we're getting? How does it compare with historical trends?” + All of this sort of work I really enjoyed. + sec: 173 + time: '2:53' + who: Nikola +- line: At the same time around this time, the company had switched to Looker from + Tableau. At the time, we only had one data person at Ecosia. I helped her with + the migration to Looker just as a side project. Since I was kind of the person + who was most comfortable with data and reporting and numbers and measuring KPIs + and whatnot in the marketing team, I took on building out the marketing team reporting. + And I really enjoyed that. + sec: 173 + time: '2:53' + who: Nikola +- line: It was your initiative, right? Nobody told you, “Hey, you should do this.” + You were just like “Okay, this sounds interesting. I really like this topic. And + I kind of learned everything that was there about performance marketing, so let + me try to also run this new tool.” + sec: 424 + time: '7:04' + who: Alexey +- header: 'Career Pivot During Pandemic: Moving Toward BI and Analytics' +- line: Yeah, exactly. I think at this point, it wasn't really clear to me that I + wanted to necessarily move into the data team. I just wanted to maybe have more + focus on numbers and data in general, but probably still within the marketing + team. Eventually, I think the big shift that happened was – the pandemic hit. + Like a lot of people, it just forced me to consider what I was doing and whether + I was happy in my role, and I found that I really wasn't. At this point, I decided + to yet make the shift into the BI team. I think, at this point, I had already + done a SQL course some months before with a view to going down the marketing/analytics + route. But with the pandemic, I really realized that I wanted to move away from + the purely marketing focus and go towards BI. + sec: 438 + time: '7:18' + who: Nikola +- line: These SQL courses – did you have a plan that you wanted to work in the BI + team eventually? Or it was like, “Okay, let me see what I should do in order to + do my job better.”? + sec: 509 + time: '8:29' + who: Alexey +- header: 'Preparing for BI: SQL Course and Marketing-Analyst Bridge' +- line: I think I remember speaking to my colleague in the BI team, who was in the + context of being a marketing analyst person. Initially, the idea was kind of that + I'd sit between marketing and BI. But I think it's because I really didn't think + it was possible for me to move departments. That hadn't really happened in the + company before. There wasn't really an example of that to me. So I think I was + rather thinking, “Well, what's possible? What could I do?” And it was this marketing + analyst role. But yeah, I definitely took the SQL course in order to move closer + towards the data side. + sec: 525 + time: '8:45' + who: Nikola +- line: So then you realized, “Okay, maybe I'm not really super happy with the job + I'm doing in the marketing department and there is this BI team.” So did you just + approach them and ask, “Can I just join you and start working with you?” Or how + did this happen? + sec: 576 + time: '9:36' + who: Alexey +- header: 'Internal Pathway: Conversations with BI Team and Required Skills' +- line: Eventually – yes. I think eventually the conversation was already there, as + I mentioned before, around how to become more into this marketing analyst role. + Already, my colleague was giving me lots of advice. It was probably through that + process and those conversations that the possibility of me moving into the BI + team came up, to be honest. I don't remember exactly who brought it up. But what + I remember is my colleague in BI saying, “Well, these are the things that we really + need you to have. Once you have those things, there's no reason why you shouldn't + be able to join the team as a junior analyst.” + sec: 593 + time: '9:53' + who: Nikola +- line: Do you remember what these things were? SQL, I suppose, is one. + sec: 642 + time: '10:42' + who: Alexey +- line: Yes, SQL was the main thing. Then learning and understanding the data pipeline + that we had was another. + sec: 645 + time: '10:45' + who: Nikola +- line: So it wasn't a list of courses that you have to take, but rather, “Okay, these + are the things we’re working on. Try to figure out what's happening there.” + sec: 655 + time: '10:55' + who: Alexey +- header: 'Core Skills: Advanced SQL, Data Pipeline Familiarity, Python Basics' +- line: Yeah, exactly. One of the things was like, “Python would be great.” I ended + up doing a Python course, but barely actually using it. It's been useful to have. + Of all the things, the most useful practically was jumping into… once you know + SQL and you can write and read SQL, you'll still need to get good at reading and + writing SQL. You start coming across much more complicated SQL queries and you're + like, “[expletive], there's like a nested loop here. Where is this coming from?” + sec: 662 + time: '11:02' + who: Nikola +- line: Then improving SQL to be able to read and understand much more complex data + models – that was a big part of the journey. It was really about understanding + what our models were, how everything fit together in the wider scheme of the pipeline, + and how it came to be. Because I had no idea even how a tracker really worked + – I just sort of knew that there was this thing called the Snowplow Tracker that + collected the data. But it was all sorts of not very detailed knowledge. So really + going in and understanding how things really work to get the data from one point + to the other and transform it. + sec: 662 + time: '11:02' + who: Nikola +- line: Did you need to keep doing your old job of marketing specialist, or could + you completely just immerse yourself in BI? Or was there some in-between period + where you had to do both? + sec: 749 + time: '12:29' + who: Alexey +- header: 'Transition Phase: Balancing Marketing Work and BI Projects' +- line: To be honest, as part of the performance marketing role, I was really acting + like a kind of marketing analyst, in a way – building the reporting for the teams + and for the people who are doing other jobs, I was helping them build reports + and managing that. So I was already kind of doing a lot of that kind of work. + There was a transition period where the first projects that I worked on were more + marketing-focused. I think one of the main projects was helping establish how + to measure brand campaigns, looking into that and building dashboards based on + that, and a wider topic around that. + sec: 770 + time: '12:50' + who: Nikola +- line: I think it was kind of a transition period, but at some point, I just handed + over the main performance marketing tasks, which are managing the campaigns. It + was quite a good moment because the pandemic meant that we were already hitting + a slight stagnation point with some of our campaigns. Then the pandemic hit and + it was really difficult to record new ads, as well, in quarantine. There was a + kind of natural slowing down of that side of the work anyway, so it was a good + moment to pivot. + sec: 770 + time: '12:50' + who: Nikola +- header: 'Current Responsibilities: Analytics Engineering, Product Support & A/B + Testing' +- line: And what do you do now? What do your responsibilities include? + sec: 854 + time: '14:14' + who: Alexey +- line: As I've mentioned, I'm working as an analytics engineer, but also as a data + analyst. We are still a relatively small team. We are four people in total. For + reference, the company size is just over 100. None of us have a particularly specialized + role. We kind of do a little bit of everything at the moment. Our team lead is + on extended leave, so I'm acting as interim team lead. A lot of work is really + working with the new CPO who's just come in, reassessing the KPI that we have + at the company and how we measure them. Of course, I think it’s quite common when + a new C-level comes in to rehash the dashboards and rework the core reporting + to suit the new requirements, so a lot of work has been recently done on that. + sec: 860 + time: '14:20' + who: Nikola +- line: There are two of us that are in these analyst roles and we work very closely + with product managers. We're focusing very closely on supporting the various product + teams with experimentation, building out new features, A/B testing, evaluating + those, and when necessary, building out our data model to reflect those new changes. + I think the day to day is really a mixture of supporting the teams – sometimes + ad hoc analysis is needed. For example, there is a new feature being developed + and there's some hypothesis around the kinds of users they want to reach and how + big those cohorts might be, jumping into the data and taking a look at that. Other + work is maybe more on an initiative of our own. For example, recently, we ran + a big RFM analysis (recency, frequency, monetary) user behavior analysis, which + was a bigger project. There's many ways to do it and we took some time to experiment + with different options. That's been a larger project over some months with several + presentations of insights. + sec: 860 + time: '14:20' + who: Nikola +- line: There are those bits of work where we're not necessarily working directly + for an individual product manager, but working on wider pieces of analysis and + insight that's beneficial for the company as a whole. I just wanted to add that + we've also recently started doing a few small data science projects in the team, + just on the side, which I myself am not directly involved with. But one of my + colleagues is. We're trying to basically run some NLP models on trying to improve + how we understand queries that our users make, and try and essentially build better + query categorization so we can ultimately serve better results. It's been really + nice that we've been able to pick up some more data-sciencey topics in the team + and not work exclusively on reporting and internal. + sec: 860 + time: '14:20' + who: Nikola +- line: 'This query understanding – it''s about understanding intent, right? Why a + user is searching for some information: Do they want to come in and navigate to + a certain website? Do they want to get some information? Do they want to buy something?' + sec: 1079 + time: '17:59' + who: Alexey +- line: Yeah, exactly. Specifically, it's around being able to segment various queries + into the correct categories. So “does this query or query group fall into the + category of ‘travel’ or ‘shopping’ or ‘transport’ or etc.?” + sec: 1094 + time: '18:14' + who: Nikola +- header: 'Data Modeling in Practice: DBT Migration and Transformation Layers' +- line: So a different kind of characterization. When you were describing what kind + of duties you have and what kind of things you work on, you mentioned that you're + working on KPIs, dashboards, supporting product teams with experiments, ad hoc + analytics. + sec: 1114 + time: '18:34' + who: Alexey +- line: You also mentioned a data model. Up to the data model, I think I understood, + more or less, what you are doing. But what is a “data model”? Why do you need + to build a data model? Why do you need to update it? + sec: 1114 + time: '18:34' + who: Alexey +- line: We built a data model in DBT based on something called the domain model. Basically, + we began two or so years ago, maybe even longer now. We migrated to DBT. In that + moment, we basically rewrote all our queries basically to build all our tables + – the whole database was rebuilt from scratch. It had evolved over time. We have + something like six installed tables or something ridiculous. + sec: 1147 + time: '19:07' + who: Nikola +- line: Six what tables? + sec: 1190 + time: '19:50' + who: Alexey +- line: Install. + sec: 1191 + time: '19:51' + who: Nikola +- line: The data model is about describing what kind of data you have – all this schema + and definitions, right? + sec: 1195 + time: '19:55' + who: Alexey +- line: Yeah, sorry. For the data model, what I mean is – what we have in DBT, essentially, + is all about different transformation logic for the entire business, from the + most basic staging layer down through to the presentation tables that we then + use for analysis. + sec: 1200 + time: '20:00' + who: Nikola +- header: 'Analytics Tooling Stack: Snowplow, DBT, Looker, Redshift, Airflow, Airbyte, + Redash' +- line: I’m just trying to understand what kind of tools you use. You mentioned three + tools already. You mentioned Snowplow, which is a tool for tracking – to understand + what kind of actions users perform and save intersections. Then you also mentioned + DBT, which is a tool for transformation. You have some data sitting somewhere + and you need to change it slightly, rework, aggregate it, and then put it in such + a form that you can use it for reports. You also have Looker, which is a tool + for dashboards. What else do you use? You probably use some sort of database (a + data warehouse) right? Maybe some other tools too? + sec: 1234 + time: '20:34' + who: Alexey +- line: Yeah, exactly. We use AWS services, so we use S3 and Redshift, and also Spectrum + as well to query Athena. We play around a lot with so-called “hot and cold storage” + so keeping data in Redshift versus keeping it in S3 in parquet files. That's due + to cost optimization. That's what we use for our lake (warehouse). And then we + use Airflow as well, as our orchestration tool and for our extracting and loading + operations. + sec: 1278 + time: '21:18' + who: Nikola +- line: Was it a part of your job to set up all these tools? + sec: 1323 + time: '22:03' + who: Alexey +- header: 'DBT Implementation: Leading a Migration Project and Data Modeling Learnings' +- line: It was part of my job to set up DBT. That was one of the first big projects. + I'd been in the team for maybe six months or so and then we began the migration + to DBT. We actually worked with a data consultancy, a small one, that helped us + because we were essentially three people. I led that project – it was one of my + first big projects, which was great. It was a really big learning curve. + sec: 1328 + time: '22:08' + who: Nikola +- line: I got to learn not only about DBT (the tool itself) but also data modeling + theory and practices and different ways of doing things – what makes sense depending + on the size of your data and your goals and needs. That was really great. So DBT + is the main one. Looker as well, as I mentioned, I helped to migrate to and implement + in the company. + sec: 1328 + time: '22:08' + who: Nikola +- line: This was before you actually joined the BI team, right? So you started this + in marketing looking at this tool. + sec: 1386 + time: '23:06' + who: Alexey +- header: 'Looker & LookML Experience: Reporting and Dashboard Building' +- line: Actually I strangely learned LookML before I learned SQL, which is a slightly + strange, I think, way of doing it. [chuckles] But there we go, that's how it happened. + And Airflow was set up by my colleague who has more of a data engineering role + within the team. That was also set up relatively recently – in the last two, three + years or so. Those are the main tools. We recently started using Airbyte. Some + people might be familiar with that. It was basically to be able to extract from + some kind of common API's data sources. We haven't used it extensively. + sec: 1392 + time: '23:12' + who: Nikola +- line: So far, we often find that we've got a lot of options, but specifically what + we need often doesn't necessarily have the connection yet. But I think it's a + nice tool – relatively easy to use. We've also recently started using Redash, + which is an open source visualization tool that we use for more ad hoc queries, + to be able to have the visualization attached to them as well. + sec: 1392 + time: '23:12' + who: Nikola +- line: It seems like most of the tools are open source, apart from AWS. Is Looker + open source? + sec: 1466 + time: '24:26' + who: Alexey +- line: No, I don't think so. + sec: 1472 + time: '24:32' + who: Nikola +- line: But the rest are, right? Snowplow is open source. DBT is open source. Airbyte + is open source. Redash – I don’t know. Is it? + sec: 1474 + time: '24:34' + who: Alexey +- line: Redash is open source as well. + sec: 1481 + time: '24:41' + who: Nikola +- line: So you like open source. Don’t you? + sec: 1484 + time: '24:44' + who: Alexey +- line: Yes. [laughs] Exactly. + sec: 1487 + time: '24:47' + who: Nikola +- header: 'Infrastructure Choices: Self-Hosted Tooling vs DBT Cloud' +- line: Do you host all these things yourself? For example, when it comes to DBT, + do you use their cloud? + sec: 1491 + time: '24:51' + who: Alexey +- line: No, we host everything ourselves. That's just the general decision of the + engineering department. + sec: 1497 + time: '24:57' + who: Nikola +- header: 'Role Definition: Analytics Engineer vs Data Analyst — Overlap & Organizational + Fit' +- line: When you joined the BI team were you already called an analytics engineer, + or you just realized over time that, “Okay, this is what I should call myself.”? + sec: 1506 + time: '25:06' + who: Alexey +- line: My official role is Analytics Engineer and Data Analyst, because I really + do both. We’re not the size of a BI team that it's possible for someone to want + too much to do. But I think initially, it was… I don't know what the title was + initially, BI Analyst or something – Data Analyst. At that point, even the term + Analytics Engineer really wasn't common. I think I really only learned about that + in the process of implementing DBT, which was in 2020. + sec: 1517 + time: '25:17' + who: Nikola +- line: Really, some time has passed since DBT has obviously become huge in the data + community. I think this role of an engineer is also becoming much more common. + But I think at the time, when I joined the team, that wasn't even an option. I + don't think anyone even thought of that. I don't think the people in the BI team + were actually calling themselves that, even though that's essentially the job + they were doing. Over time, as we all became familiar with that new term and realized + that it basically described what we were doing – so that was taken on. + sec: 1517 + time: '25:17' + who: Nikola +- line: Do you think there's some hype in that role? I mean, there was no such thing + before and now, all of a sudden, everyone’s talking about analytics engineering. + sec: 1596 + time: '26:36' + who: Alexey +- line: Yeah. To be honest, if you have a small BI team of six or less people – I + guess it depends on your company, and your product and the business model – but + I think it's a little bit overhyped. Ultimately, I still think that you need quite + a large organization to be able to comfortably segment data analysts and analytics + engineers – they have so much crossover anyway. I can see that in larger organizations, + it's really helpful to have that separation. But I think in smaller ones, it's + not that helpful, at least in my experience, which is simply this is one company. + I can't speak for others, but I found that it's helpful in terms of your own personal + progress, because you can align yourself with this role and say, “Okay, yes. This + is what I do. This is somewhere where I could improve and an area that I could + spend more time on, but I'm not necessarily sure.” + sec: 1605 + time: '26:45' + who: Nikola +- line: I think for most small/medium-sized companies, I don't think it's necessary + to get really bogged down into the differences between the two. Ultimately, you’re + still going to need very overlapping skills. You need to be very analytical, very + comfortable with your KPIs, what the business model is, the domain model – all + of that work, which is not limited to an analytics engineer and a good data analyst + needs all of those things. I think there's maybe a little bit of hype. But again, + as I said, it depends on the organization size. If you have a huge company with + a data Department of 20, 30, 40 people, then of course, it just makes structural + sense to split out and focus. + sec: 1605 + time: '26:45' + who: Nikola +- header: 'DBT''s Influence: How DBT Shapes the Analytics Engineering Role' +- line: Do you think it's synonymous to using DBT? Like “You use DBT, therefore, you’re + an analytics engineer.” And “If you’re an analytics engineer, then you use DBT.”? + Are they the same thing? Or can you be an analytics engineer without using DBT? + sec: 1720 + time: '28:40' + who: Alexey +- line: It's a good question. I feel like DBT themselves have really promoted this + concept, right? + sec: 1740 + time: '29:00' + who: Nikola +- line: I think, yeah. It’s coming from them. + sec: 1747 + time: '29:07' + who: Alexey +- line: Exactly. [chuckles] In a way, yeah – it kind of is synonymous. I, at least, + haven't seen many job applications for an analytics engineer that haven't been + like “Your job is to work with DBT.” [chuckles] I'd be interested in how that + role could look with a different stack. I imagine there are people who are working + under the title of data engineer or data analyst who do the work of an analytics + engineer, but just don't call themselves that in other companies that maybe don’t + use DBT. + sec: 1749 + time: '29:09' + who: Nikola +- line: In the company where I work, we don't have DBT. We have a homegrown DBT kind + of replacement. But it was before DBT was popular. As many other companies, we + kind of invented DBT, which is like an Airflow-based way to schedule SQL queries. + I don't think any of our analysts who use this to call themselves analytics engineers. + I'm wondering, are there any tools that do the same thing as DBT apart from these + homegrown tools like we have? Is there any such thing on the market? + sec: 1788 + time: '29:48' + who: Alexey +- header: 'Data Modeling Theory: Wide vs Narrow Tables and Incrementalization Tradeoffs' +- line: I don't know, to be honest. [laughs] I haven't had the time to really look + into it. I think at the moment, DBT is on such a growth trajectory. I see so many + job ads that are looking for people to help them set up DBT. I think it's really + taking off, so I don't presently know. Like you said, we were previously using + SQL Runner, which is like Snowplow. It’s kind of similar. That’s exactly what + you described, basically. An orchestration tool for SQL queries, where you can + specify the order and whatnot. Incrementalization strategies were not invented + by DBT. There's many ways to set those up and there’s other kinds of setups. + sec: 1828 + time: '30:28' + who: Nikola +- line: In terms of analytics engineering, I think for me the focus is on the wider + architecture of the data model, and with data analysts for example, perhaps there’s + not so much focus on that. For me, that's where the analytics engineering role + is, really important. Once you start collecting from various different data sources + you have all of these issues around consistency and, of course, freshness. All + of these various concerns are where an analytics engineer really needs to shine + – to understand how everything fits together in this wider ecosystem. Perhaps + an analyst doesn't necessarily need to understand all the transformations and + how everything connects to each other, but an analytics engineer really does. + sec: 1828 + time: '30:28' + who: Nikola +- line: I think this focus on data modeling theory is much more important. In that + way, it's slightly more like a theoretical role in many ways, which I think is + often not really talked about. Often the focus is on the technical side, which + it is, but I think it's really important to understand, as an analytics engineer, + the different kinds of data modeling frameworks and what's possible. Whether having + a wider table or a narrower table – in which case should you go for one versus + the other? When should you choose a certain kind of incrementalization strategy + and when not? So I think that's part of the role that is very specific. I guess + it’s becoming more and more important, as there is so much more data that companies + in general are collecting. By virtue of more companies, smaller companies, different + kinds of companies, and the traditional big enterprises start using and collecting + data and building up data departments, then, of course, this becomes more of a + need. + sec: 1828 + time: '30:28' + who: Nikola +- header: 'Learning Data Modeling: Practical Resources, Blog Posts and Mentorship' +- line: About this data modeling theory that you mentioned, and selecting whether + it should be a wide table or a narrow table – if I wanted to learn more about + this, where would I go? What kind of resources do you have about this? + sec: 2026 + time: '33:46' + who: Alexey +- line: That is a good question. I really struggled a little bit with this, because + there's really a lot of quite… I wouldn't even call it “advanced” stuff. But the + textbooks that you can buy on data are very dry. [laughs] I'll just be honest. + sec: 2041 + time: '34:01' + who: Nikola +- line: Kimball and this kind of stuff, right? + sec: 2057 + time: '34:17' + who: Alexey +- line: Yeah, Kimball. There's loads of textbooks. + sec: 2060 + time: '34:20' + who: Nikola +- line: It’s something I studied at university but never actually saw this book outside + of university. + sec: 2061 + time: '34:21' + who: Alexey +- line: Exactly. To be honest, I've given them a good shot and I found that I just + learned by doing. I learned through talking to the people who were my mentors + or seniors – who are experts and I just asked as many questions as I could. I + was never afraid to just ask stupid questions (and repeat questions if I needed + to) until it made sense. + sec: 2067 + time: '34:27' + who: Nikola +- line: Sometimes if I had the basic knowledge and had something that I wanted to + understand, I would go and just research online. There are increasingly a lot + of really good blog posts and newsletters that are available. I think increasingly + there are more and more resources that are a lot more accessible to people who + haven't necessarily studied computer science or data science or statistics or + these sorts of subjects at university. + sec: 2067 + time: '34:27' + who: Nikola +- line: You didn't study that, right? Did you? + sec: 2127 + time: '35:27' + who: Alexey +- header: 'Nontraditional Background: Classics to Data — Just-In-Time Learning and + Udemy SQL' +- line: No, I studied classics, which are Latin and ancient Greek. [laughs] + sec: 2130 + time: '35:30' + who: Nikola +- line: That was your education? + sec: 2136 + time: '35:36' + who: Alexey +- line: That was my Bachelor's, yeah. + sec: 2138 + time: '35:38' + who: Nikola +- line: Interesting. So you speak Ancient Greek and Latin? + sec: 2142 + time: '35:42' + who: Alexey +- line: No… I can read it. + sec: 2144 + time: '35:44' + who: Nikola +- line: Interesting. Okay. This just made our interview even more interesting. [both + laugh] How do you go from studying Ancient Greek and Latin to being an analytics + engineer? You learn basically everything you needed yourself, right? + sec: 2148 + time: '35:48' + who: Alexey +- line: Yeah, exactly. Um… + sec: 2166 + time: '36:06' + who: Nikola +- line: By “yourself” I mean not as a part of any official curriculum. + sec: 2169 + time: '36:09' + who: Alexey +- line: Yep. To be honest, I did this SQL course on Udemy that cost me 12 euros. And + it was great. It was really, really good. It was quite long. I can't remember + exactly, but I think it was just called The Complete Guide to SQL and it's run + by this American dude called Colt Steele. It's just a very strange name. He's + got loads of good Python courses as well that I did. I just did that in my spare + time. And to be honest, it was really great that it cost me all of 12 euros and + I haven't done a single other SQL course since. + sec: 2174 + time: '36:14' + who: Nikola +- line: Sometimes I do think, “Oh, should I go and pay for one of these fancy courses + in data science or something because it's nice to have structure and whatnot.” + But then I'm like, “Ah. If I just motivated myself, I could do it.” [laughs] There's + so much stuff online. But it's just a case of me being quite lucky to find a good + course right away. I think there are some not very good courses out there. It's + a little bit of hit and miss. One thing that's really great about software engineering + in general and computer science is that if you don't have a lot of resources, + you can really teach yourself. There are a lot of resources online. + sec: 2174 + time: '36:14' + who: Nikola +- line: At the same time, as I said, practicing is really the thing that makes the + difference and I was very lucky that I was already at a company where I knew the + domain very well, the business model very well, the KPIs. I kind of had all of + that already covered and could just focus on developing the SQL skills and data + modeling, etc. I can imagine that someone who is maybe approaching this as a career + change and maybe taking some time out to do it – it may be a little bit more difficult + because you don't have that context of a specific business or a specific problem + that you can hold in your mind as you think about these problems and have an example + that you can apply the theory to. + sec: 2174 + time: '36:14' + who: Nikola +- header: 'Product Analytics Focus: Growth, Retention, RFM Analysis and NLP Experiments' +- line: Yeah, there is a thing called “just in time learning,” and I think you took + this to the extreme. So without any formal education in computer science or analytics, + you just focused on a specific problem, which in your case was marketing and then + you were like “Okay, how do I set up Looker to do this thing?” By the way, are + the tasks that you do now still more or less related to marketing? You mentioned + RFM analysis. I think it's still somewhat related, right? + sec: 2307 + time: '38:27' + who: Alexey +- line: Not really, to be honest. No. At the moment, I'm really working very closely + with the product team. We are focusing on growing, acquiring more users, retaining + more users – which are all of course interlinked goals of the marketing team. + It's not directly relevant, but my direct stakeholders are the product managers. + sec: 2338 + time: '38:58' + who: Nikola +- line: Okay. So I guess your background in marketing really helped you, right? + sec: 2370 + time: '39:30' + who: Alexey +- header: 'Domain Knowledge Advantage: Marketing Funnel, User Journey & Empathy' +- line: Yeah, it really did. I’ve noticed how just in everyday work, I definitely + see an edge that I have because I'm very comfortable with things like a marketing + funnel and a conversion funnel or web acquisition funnel. For example, a product + manager might be focusing specifically on a part of the funnel or a whole funnel + as part of the user journey and as a marketing person, you think about the user + journey all the time. What are the touch points of the user? How do they feel + at this moment? What are they thinking at this moment? What have they done? Where + have they come from? You have this quite close empathy with the user, and specifically + the journey. + sec: 2376 + time: '39:36' + who: Nikola +- line: At the same time, your goals in marketing are to constantly optimize and grow + and get more users or higher retention or more signups or whatever it might be. + So you have this growth mindset that I think is very useful when you come to advising + people from a data point of view because you can ask the question, “Yes, you've + got some good feedback from the users on this feature. But, ultimately, the top + line hasn't moved at all. We did this because we wanted to grow (whatever this + KPI is).” It definitely does help, largely in the realm of understanding the user + journey. It means that you can really hold this user perspective in your head, + but also the data perspective together with it, and advise with those two things + in your head. + sec: 2376 + time: '39:36' + who: Nikola +- line: If somebody wants to follow your journey – so somebody who's working in marketing + (or not necessarily in marketing, but they really want to go into data and start + doing analytics engineering) and they are experts in their domain – what would + you suggest for them to do? + sec: 2491 + time: '41:31' + who: Alexey +- header: 'Transition Playbook: Excel, SQL, Dashboard Practice and Small Projects' +- line: Firstly, I would say [chuckles] Excel is your best friend. Excel is great, + ultimately. [laughs] I know everyone hates it, but it really doesn't get the credit + it deserves. I still have people in the company who really should and don't know + how to make a pivot table. They are quite annoying to make in Excel. The most + difficult pivot table you will make will be in Excel. If you can do it there and + be comfortable (understand what's happening with columns and rows) that’s the + first place to go. So be really, really comfortable with Excel, play around with + functions, pivot tables, and just explore. Look at different ways of trying to + take a dataset that you feel comfortable with – it might just be something really + simple like daily signups by country – and just, in Excel, start playing around + with that and asking questions. + sec: 2510 + time: '41:50' + who: Nikola +- line: Then, of course, SQL is the most important thing. Learn SQL, try and find + some datasets online that you can play around with and practice SQL. That's really, + really useful. But ultimately, where I found a little bit of a gap in the self-learning + was between the online SQL resources and finding advanced SQL queries that made + sense – that weren't written by someone on the other side of the world about a + company that had no connection to, didn't learn from the business models and was + written in a way that, for example, wasn't the style that was going to be written + in my team. It ended up just being a little bit confusing and extra work to try + and understand. So if there's a way to access some of the SQL code that the BI + team are using – maybe you can ask them to share a couple of SQL queries they + use to make the main tables – that's definitely something to do. + sec: 2510 + time: '41:50' + who: Nikola +- line: If your company is using Looker, that's great. That's amazing – to get familiar + with that. Really, just start building, building, building dashboards. Explore + it. Become really comfortable with filtering, pivoting – those sorts of things. + There are a lot of resources from Looker online as well. I think from Tableau + as well, or whatever visualization tool you're using – it doesn't really matter. + Just become comfortable with the basic features of those. Those would be the main + things, I think. Then go from there. Find someone who can be your, if not mentor, + then your champion – an ally, I guess, in the data team. Ask them, “What do I + need to do? What skills are still missing? How do I do them? Do you think it's + possible?” Ask them what they would recommend if you're in an existing company + and you're looking to move to that role. I think that would be my suggestions. + sec: 2510 + time: '41:50' + who: Nikola +- header: 'Mentorship & Sponsorship: Internal Champions, Confidence and Representation' +- line: How important do you think it is to have a mentor or champion in this journey? + For you, from what I understood, it was quite important. It was crucial. That + person was a marketing analyst, if I remember correctly, that actually helped + you. She told you what you should do, what kind of things you should focus on, + and then she also was helpful for you to actually transition to the team. Right? + sec: 2709 + time: '45:09' + who: Alexey +- line: Exactly. She was the BI analyst (the data analyst) – the only one that we + had at the time. Actually, sorry we already had two people in the data team and + she was one of them. For me, it was very useful and important. To be honest, though, + it depends on the company, your position in the company, how comfortable you feel, + what level of power (so to speak) you have in the company. + sec: 2734 + time: '45:34' + who: Nikola +- line: Also, for me, as a woman, I think transitioning from marketing into a more + technical role (I was going to move to the engineering department, there was a + meeting) I felt an element of imposter syndrome. I thought, “Oh, what am I doing? + Can I really do this?” I think it really helped me to have another female, basically, + mentor to champion me and encourage me and say, “Yeah, you can do this. Definitely, + you can do this. You just need to do this, this and that. You can definitely do + that. Once you've done that, we can find a way.” So it depends. I think if you + have a lot of motivation and you're very clear on what you want, and you're confident, + then I don't think it's necessarily needed. + sec: 2734 + time: '45:34' + who: Nikola +- line: But particularly for minorities, there's a lot of support groups outside of + work like, PyLadies and lots of different various support groups for minorities + in tech, which are great to be inspired by. But I think having that one person + in your company who you can relate to can be really helpful just in terms of building + up your own confidence. It's definitely something that helped me also to not just + transition into the team but, once I was in the team, to accelerate quite quickly. + sec: 2734 + time: '45:34' + who: Nikola +- line: Yes, I was junior when I joined, but my career path up to being a mid-level + analyst and now intern team lead was a lot quicker because I had to fight and + be like, “Well, I have been doing analytics work for years before. I haven't actually + picked all of this up from scratch.” So having the confidence to make that clear + and argue it – it was really helpful having someone to champion me. I would recommend + finding one person in your company who can be that for you. + sec: 2734 + time: '45:34' + who: Nikola +- line: Did you take part in any of these support groups that you mentioned like PyLadies? + Or did you have mentors or people who you constantly talked to outside of the + company? Or was it mostly that person and the rest of the team that you talked + to in order to learn? + sec: 2904 + time: '48:24' + who: Alexey +- line: In my case, it was mainly my two teammates who were the BI team when I joined. + They were incredible. So supportive. They really encouraged me a lot and helped + me hugely. They were very excited for me to join the team and made me feel very + welcome like I deserved to be there. This was very useful because at times, I + was like, “Oh, what am I doing here? This is too hard.” But in terms of external + support, not really, to be honest. I have two very close friends who worked in + data, and it was nice to talk to them and have their advice as well – to have + different perspectives from different companies. + sec: 2925 + time: '48:45' + who: Nikola +- line: Particularly as someone who's been at a company for a very long time, I definitely + feel the need to speak to people in different places and see like, “Oh, is it + also like this where you are? Is this a specific issue that only we're facing + or is this a general thing?” Having that perspective has also been really useful + in order to just benchmark certain issues that you come across. [chuckles] I think + having a few more external mentors or support would be great. In the coming year, + I'll probably look for a mentor just to help with kicking off the next phase of + development. + sec: 2925 + time: '48:45' + who: Nikola +- header: 'Networking Channels: LinkedIn, Meetups and DBT Slack for Mentors' +- line: Do you have any ideas where you can look for these mentors? Would it be conferences, + meetup groups or someplace online? + sec: 3023 + time: '50:23' + who: Alexey +- line: Probably a combination of LinkedIn, asking the networks of people that I know + if they have anyone they recommend. Meetups as well. I think that's probably the + best way to go. + sec: 3032 + time: '50:32' + who: Nikola +- line: Is there an analytics engineering meetup in Berlin? + sec: 3050 + time: '50:50' + who: Alexey +- line: I'm not sure. There's definitely a Snowplow meetup that I think has just started + up again (or about to) In terms of the engineering, I'm not sure, to be honest. + I know that there are some data meetups. I'm not sure if that's specifically analytics + engineering. I have kept an eye open on the DBT Slack group, which is extensive + and actually great. They have some city-specific groups and Berlin has yet to + make its appearance. Perhaps in the future, there might be a DBT Berlin. + sec: 3056 + time: '50:56' + who: Nikola +- line: Yeah, I think there should be. One of the people who helped me with the questions + is Victoria. Victoria was a guest on this podcast over a year ago and now she + works at DBT. I think she is or will be organizing something soon. Maybe she will + tell us about that. I see that it's almost time to finish. I wanted to ask you + one last thing. + sec: 3102 + time: '51:42' + who: Alexey +- header: 'Reading List: Analytics Newsletters & Blogs (DBT roundup, Lenny’s, Locally + Optimistic)' +- line: You mentioned that you are subscribed to some newsletters. There are good + blog posts, good newsletters, and these newsletters are quite useful for you. + What kind of newsletters are you subscribed to? If I want to keep an eye on what's + happening in this area, what kind of newsletters should I subscribe to? + sec: 3130 + time: '52:10' + who: Alexey +- line: That's a good question. There's one I'm subscribed to (an analytics engineering + one) that I think is called “The Roundup” or something. Analytics Engineering + Roundup. It might be the DBT newsletter, actually. There's another one that I + just subscribed to like a week or two ago. It’s called Lenny's Newsletter. + sec: 3152 + time: '52:32' + who: Nikola +- line: Lenny's Newsletter. Lenny's the name of the person. + sec: 3184 + time: '53:04' + who: Alexey +- line: I've only just subscribed to it recently. I think it was slightly more product + analytics focused. Then there is a blog that I'm sure most of your readers will + know about. I've just forgotten the name of it. It's called something like Profoundly + Optimistic or something… Locally Optimistic, yeah! Yeah that one. + sec: 3189 + time: '53:09' + who: Nikola +- line: Yeah. They have a guest coming in as well. + sec: 3218 + time: '53:38' + who: Alexey +- line: From time to time, I'll check that one. + sec: 3221 + time: '53:41' + who: Nikola +- header: 'Contact & Wrap-Up: Finding Nikola on LinkedIn and Episode Close' +- line: Profoundly Optimistic is also a good name. [both laugh] If somebody has questions + for you, how can they find you? Is it LinkedIn or are there some other ways to + contact you? + sec: 3226 + time: '53:46' + who: Alexey +- line: Yeah, LinkedIn would be best. They can just message me directly there. + sec: 3244 + time: '54:04' + who: Nikola +- line: Okay, Niki. Thank you very much. Thanks for joining us today. It's been a + while since we started this conversation. So finally, we had this interview. Thanks + a lot for joining us today, for telling us about your journey, for sharing all + the experience and expertise you have. And thanks, everyone, also for joining + us, for being active here. Have a great rest of the week. + sec: 3248 + time: '54:08' + who: Alexey +- line: Thank you for having me. + sec: 3274 + time: '54:34' + who: Nikola +--- + +Links: + +* [Nikola's LinkedIn account](https://www.linkedin.com/in/nikola-maksimovic-40188183/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s07e09-from-math-teacher-to-analytics-engineer.md b/_podcast/from-math-graduate-to-data-analytics.md similarity index 96% rename from _podcast/s07e09-from-math-teacher-to-analytics-engineer.md rename to _podcast/from-math-graduate-to-data-analytics.md index 3dda99eb..11d1ee55 100644 --- a/_podcast/s07e09-from-math-teacher-to-analytics-engineer.md +++ b/_podcast/from-math-graduate-to-data-analytics.md @@ -1,38 +1,152 @@ --- +title: "How to Break into Data Analytics: Networking, Portfolio, SQL & Interview Prep" +short: "From Math Teacher to Analytics Engineer" +season: 7 episode: 9 guests: - juanpablo -intro: 'How do you actually break into data analytics — and what combination of networking, - portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan - Pablo Murillo, an AI and data professional now at Google with prior roles as an - Amazon Business Intelligence Engineer and data scientist at T‑Mobile, walks through - a practical path from math grad to analytics roles.

We cover the full playbook: - where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, - EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present - projects for hiring managers. Juan addresses bootcamp trade‑offs, networking wins - from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, - finding contract or pro bono work, and resume/STAR interview prep. He also discusses - role realities for BI and analytics engineering and employer branding to build credibility. -

Listen for actionable steps and specific tactics—how to structure three - portfolio projects, message templates for outreach, and interview preparation tips—to - help you break into data analytics, improve SQL interview readiness, and turn public - work into job opportunities.' -topics: -- career switch -- data analytics -- career growth +image: images/podcast/from-math-graduate-to-data-analytics.jpg ids: anchor: From-Math-Teacher-to-Analytics-Engineer---Juan-Pablo-e1fplc1 youtube: qh6-HDhw2xY -image: images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Math-Teacher-to-Analytics-Engineer---Juan-Pablo-e1fplc1 apple: https://podcasts.apple.com/us/podcast/from-math-teacher-to-analytics-engineer-juan-pablo/id1541710331?i=1000554506607 spotify: https://open.spotify.com/episode/153XI6DvtNWHYzSAv2UTqw youtube: https://www.youtube.com/watch?v=qh6-HDhw2xY -season: 7 -short: From Math Teacher to Analytics Engineer -title: 'How to Break into Data Analytics: Networking, Portfolio, SQL & Interview Prep' + +description: "Discover data analytics: build a portfolio, master SQL & networking, interview prep, cold outreach and project READMEs to land job offers faster." +intro: "How do you actually break into data analytics — and what combination of networking, portfolio work, SQL skills, and interview prep gets you hired? In this episode, Juan Pablo Murillo, an AI and data professional now at Google with prior roles as an Amazon Business Intelligence Engineer and data scientist at T-Mobile, walks through a practical path from math grad to analytics roles.

We cover the full playbook: where SQL fits in the skills roadmap, building a data analytics portfolio (rpubs, EDA, visualizations, basic ML), portfolio hosting and repo hygiene, and how to present projects for hiring managers. Juan addresses bootcamp trade-offs, networking wins from meetups, LinkedIn tactics for visibility, cold outreach and DIY internships, finding contract or pro bono work, and resume/STAR interview prep. He also discusses role realities for BI and analytics engineering and employer branding to build credibility.

Listen for actionable steps and specific tactics—how to structure three portfolio projects, message templates for outreach, and interview preparation tips—to help you break into data analytics, improve SQL interview readiness, and turn public work into job opportunities." +topics: +- career transition +- data analytics +- career growth +dateadded: 2022-03-19 + +duration: PT01H03M20S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=0 + endOffset: 111 +- name: 'Background & Motivation: Math Graduate to Data Analytics' + startOffset: 111 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=111 + endOffset: 197 +- name: 'Early Roles & Mentoring: Consulting, T-Mobile, Amazon Path' + startOffset: 197 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=197 + endOffset: 246 +- name: 'Community & Resources: Amplifying Learning Platforms' + startOffset: 246 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=246 + endOffset: 325 +- name: Math Foundations & Machine Learning Relevance + startOffset: 325 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=325 + endOffset: 504 +- name: 'Transition Path: Biostatistics, R, SAS and Discovering SQL' + startOffset: 504 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=504 + endOffset: 787 +- name: 'Bootcamp Trade-offs: Cost, Network and a Nine-Month Job Search' + startOffset: 787 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=787 + endOffset: 972 +- name: 'Networking Wins: Meetups Leading to First Offer' + startOffset: 972 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=972 + endOffset: 1110 +- name: 'Building Credibility: Employer Brand & Social Proof' + startOffset: 1110 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1110 + endOffset: 1217 +- name: 'Uncrowded Doors: Alternative Job-Hunting Strategies' + startOffset: 1217 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1217 + endOffset: 1286 +- name: 'LinkedIn Tactics: Active Posting, Commenting & Visibility' + startOffset: 1286 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1286 + endOffset: 1427 +- name: 'Resume Readiness: Quick Sharing and On-the-Spot Opportunities' + startOffset: 1427 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1427 + endOffset: 1463 +- name: 'Portfolio Essentials: rpubs, EDA, Visualizations & Basic ML' + startOffset: 1463 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1463 + endOffset: 1599 +- name: 'Portfolio Strategy: Three Projects and Publicizing Work' + startOffset: 1599 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1599 + endOffset: 1699 +- name: 'Meetup Tactics: Spotting and Approaching Hiring Managers' + startOffset: 1699 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1699 + endOffset: 1947 +- name: 'Cold Outreach & DIY Internships: 200 Messages and Trial Offers' + startOffset: 1947 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1947 + endOffset: 2106 +- name: 'Finding Contract Work: Dice, Recruiter Calls and Freelance Tradeoffs' + startOffset: 2106 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2106 + endOffset: 2292 +- name: 'Pro Bono & Nonprofit Projects: Catchafire for Real Experience' + startOffset: 2292 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2292 + endOffset: 2512 +- name: 'Messaging Strategy: Personalization, Alumni Hooks & Templates' + startOffset: 2512 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2512 + endOffset: 2666 +- name: 'Consistency & Visibility: Posting Frequency and the Algorithm' + startOffset: 2666 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2666 + endOffset: 2718 +- name: 'Portfolio Hosting Options: Zyro, GitHub, WordPress, Hashnode' + startOffset: 2718 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2718 + endOffset: 2899 +- name: 'Project Presentation: Clean README, Docs and Repo Organization' + startOffset: 2899 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2899 + endOffset: 2974 +- name: 'Skills Roadmap & Interview Prep: SQL, Python, Visualization' + startOffset: 2974 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2974 + endOffset: 3171 +- name: 'Role Realities: BI / Analytics Engineer Work at Amazon' + startOffset: 3171 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3171 + endOffset: 3362 +- name: 'Career Advice: Consistency, Soft Skills and STAR Format' + startOffset: 3362 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3362 + endOffset: 3475 +- name: 'Online Networking: Hopin Random Date & Virtual Meetups' + startOffset: 3475 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3475 + endOffset: 3599 +- name: 'Communicating Impact: Summarizing Projects for Hiring Managers' + startOffset: 3599 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3599 + endOffset: 3666 +- name: 'Project Hygiene: Version Control and Shared Repositories' + startOffset: 3666 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3666 + endOffset: 3751 +- name: Personal Branding & Contact Info + startOffset: 3751 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3751 + endOffset: 3793 +- name: Episode Closing + startOffset: 3793 + url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3793 + endOffset: 3800 + transcript: - header: Podcast Introduction - line: This week, we'll talk about transitioning to analytics. And we have a special @@ -329,7 +443,7 @@ transcript: sec: 1201 time: '20:01' who: Alexey -- header: 'Uncrowded Doors: Alternative Job‑Hunting Strategies' +- header: 'Uncrowded Doors: Alternative Job-Hunting Strategies' - line: The market is tough for people without experience. If you're in that group, you have to hustle. You have to look for alternative ways to get in front of a hiring manager. You have to think outside the box. You have to look for the uncrowded @@ -1054,131 +1168,6 @@ transcript: sec: 3802 time: '1:03:22' who: Juan Pablo -description: 'Discover data analytics: build a portfolio, master SQL & networking, - interview prep, cold outreach and project READMEs to land job offers faster.' -dateadded: '2022-03-19' -duration: PT01H03M20S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=0 - endOffset: 111 -- name: 'Background & Motivation: Math Graduate to Data Analytics' - startOffset: 111 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=111 - endOffset: 197 -- name: 'Early Roles & Mentoring: Consulting, T-Mobile, Amazon Path' - startOffset: 197 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=197 - endOffset: 246 -- name: 'Community & Resources: Amplifying Learning Platforms' - startOffset: 246 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=246 - endOffset: 325 -- name: Math Foundations & Machine Learning Relevance - startOffset: 325 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=325 - endOffset: 504 -- name: 'Transition Path: Biostatistics, R, SAS and Discovering SQL' - startOffset: 504 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=504 - endOffset: 787 -- name: 'Bootcamp Trade-offs: Cost, Network and a Nine-Month Job Search' - startOffset: 787 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=787 - endOffset: 972 -- name: 'Networking Wins: Meetups Leading to First Offer' - startOffset: 972 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=972 - endOffset: 1110 -- name: 'Building Credibility: Employer Brand & Social Proof' - startOffset: 1110 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1110 - endOffset: 1217 -- name: 'Uncrowded Doors: Alternative Job‑Hunting Strategies' - startOffset: 1217 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1217 - endOffset: 1286 -- name: 'LinkedIn Tactics: Active Posting, Commenting & Visibility' - startOffset: 1286 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1286 - endOffset: 1427 -- name: 'Resume Readiness: Quick Sharing and On-the-Spot Opportunities' - startOffset: 1427 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1427 - endOffset: 1463 -- name: 'Portfolio Essentials: rpubs, EDA, Visualizations & Basic ML' - startOffset: 1463 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1463 - endOffset: 1599 -- name: 'Portfolio Strategy: Three Projects and Publicizing Work' - startOffset: 1599 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1599 - endOffset: 1699 -- name: 'Meetup Tactics: Spotting and Approaching Hiring Managers' - startOffset: 1699 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1699 - endOffset: 1947 -- name: 'Cold Outreach & DIY Internships: 200 Messages and Trial Offers' - startOffset: 1947 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=1947 - endOffset: 2106 -- name: 'Finding Contract Work: Dice, Recruiter Calls and Freelance Tradeoffs' - startOffset: 2106 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2106 - endOffset: 2292 -- name: 'Pro Bono & Nonprofit Projects: Catchafire for Real Experience' - startOffset: 2292 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2292 - endOffset: 2512 -- name: 'Messaging Strategy: Personalization, Alumni Hooks & Templates' - startOffset: 2512 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2512 - endOffset: 2666 -- name: 'Consistency & Visibility: Posting Frequency and the Algorithm' - startOffset: 2666 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2666 - endOffset: 2718 -- name: 'Portfolio Hosting Options: Zyro, GitHub, WordPress, Hashnode' - startOffset: 2718 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2718 - endOffset: 2899 -- name: 'Project Presentation: Clean README, Docs and Repo Organization' - startOffset: 2899 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2899 - endOffset: 2974 -- name: 'Skills Roadmap & Interview Prep: SQL, Python, Visualization' - startOffset: 2974 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=2974 - endOffset: 3171 -- name: 'Role Realities: BI / Analytics Engineer Work at Amazon' - startOffset: 3171 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3171 - endOffset: 3362 -- name: 'Career Advice: Consistency, Soft Skills and STAR Format' - startOffset: 3362 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3362 - endOffset: 3475 -- name: 'Online Networking: Hopin Random Date & Virtual Meetups' - startOffset: 3475 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3475 - endOffset: 3599 -- name: 'Communicating Impact: Summarizing Projects for Hiring Managers' - startOffset: 3599 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3599 - endOffset: 3666 -- name: 'Project Hygiene: Version Control and Shared Repositories' - startOffset: 3666 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3666 - endOffset: 3751 -- name: Personal Branding & Contact Info - startOffset: 3751 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3751 - endOffset: 3793 -- name: Episode Closing - startOffset: 3793 - url: https://www.youtube.com/watch?v=qh6-HDhw2xY&t=3793 - endOffset: 3800 --- Links: diff --git a/_podcast/s03e06-from-physics-to-machine-learning.md b/_podcast/from-physics-to-computer-vision-career-transition.md similarity index 97% rename from _podcast/s03e06-from-physics-to-machine-learning.md rename to _podcast/from-physics-to-computer-vision-career-transition.md index aadef128..03d24358 100644 --- a/_podcast/s03e06-from-physics-to-machine-learning.md +++ b/_podcast/from-physics-to-computer-vision-career-transition.md @@ -1,12 +1,11 @@ --- -title: 'Switch to Computer Vision & Deep Learning: Roadmap, Kaggle Projects, Mentors - & Interview Prep' -short: From Physics to Machine Learning -guests: -- tatianagabruseva -image: images/podcast/s03e06-from-physics-to-machine-learning.jpg +title: "Switch to Computer Vision & Deep Learning: Roadmap, Kaggle Projects, Mentors & Interview Prep" +short: "From Physics to Machine Learning" season: 3 episode: 6 +guests: +- tatianagabruseva +image: images/podcast/from-physics-to-computer-vision-career-transition.jpg ids: youtube: wJPi6Ip9PX0 anchor: From-Physics-to-Machine-Learning---Tatiana-Gabruseva-e10r4pl @@ -15,6 +14,127 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/From-Physics-to-Machine-Learning---Tatiana-Gabruseva-e10r4pl spotify: https://open.spotify.com/episode/4Kk7xXfD5t2VHnLDHpdW1y apple: https://podcasts.apple.com/us/podcast/from-physics-to-machine-learning-tatiana-gabruseva/id1541710331?i=1000521740775 + +description: "Master computer vision & deep learning with a clear roadmap: Kaggle projects, mentorship strategies and interview prep to land roles and build deployed models." +intro: "How do you switch into computer vision and deep learning from a non-industry background — and build a portfolio that lands interviews? In this episode, Tatiana Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master now working as a Senior ML Engineer at Cork University Hospital, maps a practical career-change roadmap. Drawing on her move from a physics PhD during maternity leave, Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), hands-on projects (Kaggle competitions, internships, Omdena-style collaborations, end-to-end pet projects with data collection, labeling, deployment and Docker), and where to start Kaggle with minimal Python.

You’ll hear tactical advice on mentorship — finding and nurturing long-term mentors — plus networking, team building for competitions and papers, and overcoming impostor syndrome with mock interviews and LeetCode practice. She also shares prioritization strategies (Pareto, outsourcing), mental rehearsal techniques, boundary setting, and self-care to avoid burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, and connect with the data science community to accelerate a switch into computer vision and deep learning" +topics: +- career transition +- physics +- deep learning +- machine learning +- career growth +- academia +- mentorship +dateadded: 2021-05-14 + +duration: PT01H06M13S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=0 + endOffset: 117 +- name: 'Career origin: physics PhD to computer vision deep learning' + startOffset: 117 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=117 + endOffset: 152 +- name: 'Transition catalyst: maternity leave, online courses and internship' + startOffset: 152 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=152 + endOffset: 260 +- name: 'Career-change summary: sharing a Twitter thread of practical lessons' + startOffset: 260 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=260 + endOffset: 347 +- name: 'Network makeover: building supportive data science circles' + startOffset: 347 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=347 + endOffset: 470 +- name: Overcoming fears and age stereotypes in career change + startOffset: 470 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=470 + endOffset: 533 +- name: 'Eliminating distractions: focused time management during maternity leave' + startOffset: 533 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=533 + endOffset: 649 +- name: 'Impostor syndrome remedy: interviews and mock interviewing practice' + startOffset: 649 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=649 + endOffset: 892 +- name: 'Selective attention: focusing on positive signals and mentors' + startOffset: 892 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=892 + endOffset: 956 +- name: 'Team building: finding teammates for Kaggle competitions and papers' + startOffset: 956 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=956 + endOffset: 1264 +- name: 'Prioritization: Pareto principle, outsourcing and avoiding perfectionism' + startOffset: 1264 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1264 + endOffset: 1425 +- name: 'Mental rehearsal: initial creation, visualization and Sankalpa technique' + startOffset: 1425 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1425 + endOffset: 1688 +- name: 'Mentorship strategies: finding and nurturing long-term mentors' + startOffset: 1688 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1688 + endOffset: 1902 +- name: 'Boundary setting: learning to say no and protect your time' + startOffset: 1902 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1902 + endOffset: 2065 +- name: 'Embracing failure: treating setbacks as growth opportunities' + startOffset: 2065 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2065 + endOffset: 2250 +- name: 'Self-care tactics: sleep, support systems and avoiding burnout' + startOffset: 2250 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2250 + endOffset: 2554 +- name: 'Kaggle vs internships and Omdena-style projects: pros and cons' + startOffset: 2554 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2554 + endOffset: 2800 +- name: 'End-to-end pet projects: data collection, labeling, deployment and Docker' + startOffset: 2800 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2800 + endOffset: 2969 +- name: 'Learning roadmap: Python, ML/DL courses, SQL, algorithms and system design' + startOffset: 2969 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2969 + endOffset: 3220 +- name: 'Starting Kaggle with minimal Python: practical beginner advice' + startOffset: 3220 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3220 + endOffset: 3284 +- name: 'Improving focus: meditation, analytical practice and achieving flow' + startOffset: 3284 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3284 + endOffset: 3476 +- name: 'Astroinformatics overview: ML applications in astronomy' + startOffset: 3476 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3476 + endOffset: 3569 +- name: 'Physics background advantage: math, problem solving and modeling' + startOffset: 3569 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3569 + endOffset: 3753 +- name: 'Leaving academia: lab constraints, maternity leaves and cloud credits' + startOffset: 3753 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3753 + endOffset: 3874 +- name: 'Interview preparation: LeetCode, mock interviews and system design prep' + startOffset: 3874 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3874 + endOffset: 4058 +- name: 'Where to connect: LinkedIn, Twitter and DataTalks.Club follow-up' + startOffset: 4058 + url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=4058 + endOffset: 3973 + transcript: - header: Podcast Introduction - header: 'Career origin: physics PhD to computer vision deep learning' @@ -1111,132 +1231,6 @@ transcript: sec: 4090 time: '1:08:10' who: Alexey -description: 'Master computer vision & deep learning with a clear roadmap: Kaggle - projects, mentorship strategies and interview prep to land roles and build deployed - models.' -intro: How do you switch into computer vision and deep learning from a non‑industry - background — and build a portfolio that lands interviews? In this episode, Tatiana - Gabruseva, a Computer Vision/Deep Learning engineer and Kaggle Competitions Master - now working as a Senior ML Engineer at Cork University Hospital, maps a practical - career-change roadmap. Drawing on her move from a physics PhD during maternity leave, - Tatiana covers learning paths (Python, ML/DL courses, SQL, algorithms, system design), - hands‑on projects (Kaggle competitions, internships, Omdena‑style collaborations, - end‑to‑end pet projects with data collection, labeling, deployment and Docker), - and where to start Kaggle with minimal Python.

You’ll hear tactical advice - on mentorship — finding and nurturing long‑term mentors — plus networking, team - building for competitions and papers, and overcoming impostor syndrome with mock - interviews and LeetCode practice. She also shares prioritization strategies (Pareto, - outsourcing), mental rehearsal techniques, boundary setting, and self‑care to avoid - burnout. Listen for concrete steps to build portfolio projects, prepare for interviews, - and connect with the data science community to accelerate a switch into computer - vision and deep learning. -dateadded: '2021-05-14' -duration: PT01H06M13S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=0 - endOffset: 117 -- name: 'Career origin: physics PhD to computer vision deep learning' - startOffset: 117 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=117 - endOffset: 152 -- name: 'Transition catalyst: maternity leave, online courses and internship' - startOffset: 152 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=152 - endOffset: 260 -- name: 'Career-change summary: sharing a Twitter thread of practical lessons' - startOffset: 260 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=260 - endOffset: 347 -- name: 'Network makeover: building supportive data science circles' - startOffset: 347 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=347 - endOffset: 470 -- name: Overcoming fears and age stereotypes in career change - startOffset: 470 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=470 - endOffset: 533 -- name: 'Eliminating distractions: focused time management during maternity leave' - startOffset: 533 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=533 - endOffset: 649 -- name: 'Impostor syndrome remedy: interviews and mock interviewing practice' - startOffset: 649 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=649 - endOffset: 892 -- name: 'Selective attention: focusing on positive signals and mentors' - startOffset: 892 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=892 - endOffset: 956 -- name: 'Team building: finding teammates for Kaggle competitions and papers' - startOffset: 956 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=956 - endOffset: 1264 -- name: 'Prioritization: Pareto principle, outsourcing and avoiding perfectionism' - startOffset: 1264 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1264 - endOffset: 1425 -- name: 'Mental rehearsal: initial creation, visualization and Sankalpa technique' - startOffset: 1425 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1425 - endOffset: 1688 -- name: 'Mentorship strategies: finding and nurturing long-term mentors' - startOffset: 1688 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1688 - endOffset: 1902 -- name: 'Boundary setting: learning to say no and protect your time' - startOffset: 1902 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=1902 - endOffset: 2065 -- name: 'Embracing failure: treating setbacks as growth opportunities' - startOffset: 2065 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2065 - endOffset: 2250 -- name: 'Self-care tactics: sleep, support systems and avoiding burnout' - startOffset: 2250 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2250 - endOffset: 2554 -- name: 'Kaggle vs internships and Omdena-style projects: pros and cons' - startOffset: 2554 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2554 - endOffset: 2800 -- name: 'End-to-end pet projects: data collection, labeling, deployment and Docker' - startOffset: 2800 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2800 - endOffset: 2969 -- name: 'Learning roadmap: Python, ML/DL courses, SQL, algorithms and system design' - startOffset: 2969 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=2969 - endOffset: 3220 -- name: 'Starting Kaggle with minimal Python: practical beginner advice' - startOffset: 3220 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3220 - endOffset: 3284 -- name: 'Improving focus: meditation, analytical practice and achieving flow' - startOffset: 3284 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3284 - endOffset: 3476 -- name: 'Astroinformatics overview: ML applications in astronomy' - startOffset: 3476 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3476 - endOffset: 3569 -- name: 'Physics background advantage: math, problem solving and modeling' - startOffset: 3569 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3569 - endOffset: 3753 -- name: 'Leaving academia: lab constraints, maternity leaves and cloud credits' - startOffset: 3753 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3753 - endOffset: 3874 -- name: 'Interview preparation: LeetCode, mock interviews and system design prep' - startOffset: 3874 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=3874 - endOffset: 4058 -- name: 'Where to connect: LinkedIn, Twitter and DataTalks.Club follow-up' - startOffset: 4058 - url: https://www.youtube.com/watch?v=wJPi6Ip9PX0&t=4058 - endOffset: 3973 --- Links: diff --git a/_podcast/s21e05-from-astronomy-to-applied-ml.md b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md similarity index 95% rename from _podcast/s21e05-from-astronomy-to-applied-ml.md rename to _podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md index b9839ef3..d8ee6be4 100644 --- a/_podcast/s21e05-from-astronomy-to-applied-ml.md +++ b/_podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.md @@ -1,19 +1,152 @@ --- +title: "From Radio Astronomy to Applied ML: MEERKAT Data Pipelines, Multi-Wavelength Cross-Matching & Production-Grade ML Systems" +short: "From Astronomy to Applied ML" +season: 21 episode: 5 guests: - danielegbo +image: images/podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.jpg ids: anchor: datatalksclub/episodes/From-Astronomy-to-Applied-ML---Daniel-Egbo-e38ha20 youtube: b92gwrsVQtg -image: images/podcast/s21e05-from-astronomy-to-applied-ml.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Astronomy-to-Applied-ML---Daniel-Egbo-e38ha20 apple: https://podcasts.apple.com/us/podcast/from-astronomy-to-applied-ml-daniel-egbo/id1541710331?i=1000728601772 spotify: https://open.spotify.com/episode/0hV7d1zSKO7ykGDZxjXyJ8 youtube: https://www.youtube.com/watch?v=b92gwrsVQtg -season: 21 -short: From Astronomy to Applied ML -title: 'Detecting Radio-Emitting Stars with MEERKAT: Building ML & Cloud Data Pipelines' +description: "Discover MEERKAT radio astronomy pipelines and machine learning: build production ML, master multi-wavelength cross-match, accelerate discovery." +topics: +- astroinformatics +- MLOps +- LLMs +- data engineering +- machine learning +- academia +- career transition +intro: "How do you transform raw radio astronomy observations into reliable, production-grade machine learning systems that enable multi-wavelength science? In this episode we talk with Daniel Egbo — an astrophysicist turned machine learning engineer and AI ambassador (Arize, Tavily) and PhD candidate at the University of Cape Town — about bridging radio astronomy and applied ML. Daniel explains the challenges of working with MEERKAT data pipelines, strategies for multi-wavelength cross-matching, and the engineering practices needed to take models from research to production.

You’ll hear about end-to-end ML and LLM applications with an emphasis on reliability, practical evaluation, and knowledge-retrieval assistants, plus how data science techniques apply to astronomy workflows. Whether you’re building pipelines for radio telescopes, tackling cross-matching across optical and radio catalogs, or aiming to deploy robust production-grade ML systems, this episode offers concrete perspectives on data handling, evaluation, and operationalizing models in scientific contexts. Listen to gain actionable insights for integrating astrophysical datasets with modern ML tooling and improving model reliability in real-world deployments." +dateadded: 2025-09-30 +duration: PT01H04M35S +quotableClips: +- name: Podcast Introduction & Lunar Eclipse Anecdote + startOffset: 0 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=0 + endOffset: 73 +- name: 'Career Overview: From Nigeria to PhD in Cape Town' + startOffset: 73 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=73 + endOffset: 252 +- name: 'MEERKAT and SKA: Radio Telescope Project Overview' + startOffset: 252 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=252 + endOffset: 289 +- name: 'Electromagnetic Spectrum: Radio to Gamma Explained' + startOffset: 289 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=289 + endOffset: 379 +- name: 'Research Goal: Identifying Radio-Emitting Stars in MEERKAT Data' + startOffset: 379 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=379 + endOffset: 405 +- name: Telescope Types and Observing Constraints (Optical, Infrared, X-ray) + startOffset: 405 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=405 + endOffset: 480 +- name: Radio Telescope Site Requirements and Space-based X-ray Observatories + startOffset: 480 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=480 + endOffset: 639 +- name: 'Data Workflow: Detecting Point Sources in Radio Images' + startOffset: 639 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=639 + endOffset: 710 +- name: Cross-matching Multi-wavelength Catalogs and Positional Astronomy + startOffset: 710 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=710 + endOffset: 815 +- name: 'Positional Uncertainty: 2D Projection, Foreground/Background Confusion' + startOffset: 815 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=815 + endOffset: 930 +- name: 'Physics-based Verification: Using Prior Observations to Confirm Sources' + startOffset: 930 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=930 + endOffset: 995 +- name: Radio Stars Rarity and Sensitivity Improvements with New Telescopes + startOffset: 995 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=995 + endOffset: 1074 +- name: Building Curated Datasets as Foundation for Future Machine Learning + startOffset: 1074 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1074 + endOffset: 1291 +- name: 'Early ML Journey: Dataset Scale, Cloud Needs, and Inspiration' + startOffset: 1291 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1291 + endOffset: 1473 +- name: 'Python Astronomy Tooling: Astropy, NumPy, SciPy for Big Data' + startOffset: 1473 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1473 + endOffset: 1547 +- name: 'Cloud Computing Practices: JupyterHub and Remote Analysis' + startOffset: 1547 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1547 + endOffset: 1618 +- name: 'ML ZoomCamp Impact: Transitioning to Reusable Code and Production Practices' + startOffset: 1618 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1618 + endOffset: 1886 +- name: 'Edge Deployment Internship: Testing Models on Intel Hardware' + startOffset: 1886 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1886 + endOffset: 2018 +- name: 'LLM Exploration: LangChain, Hugging Face, RAG and Vector Databases' + startOffset: 2018 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2018 + endOffset: 2568 +- name: 'Course Projects: Orchestration with Kestra, Airflow, MinIO and Spark' + startOffset: 2568 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2568 + endOffset: 2648 +- name: Airflow 3.0 Setup Experience and Astronomer CLI Learnings + startOffset: 2648 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2648 + endOffset: 2715 +- name: 'End-to-End Pipeline Example: MySQL → MinIO → Spark → Warehouse (dbt next)' + startOffset: 2715 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2715 + endOffset: 2859 +- name: 'AI Training Ecosystem: LangChain Academy, Arize, NVIDIA Deep Learning Institute' + startOffset: 2859 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2859 + endOffset: 3020 +- name: 'Student Benefits: Free NVIDIA Courses and Deploying on GPUs (A100/H100)' + startOffset: 3020 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3020 + endOffset: 3121 +- name: 'BRICS Astronomy Bootcamp: Beginner-Friendly Data Analytics Program' + startOffset: 3121 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3121 + endOffset: 3312 +- name: 'Sharing Projects: Colab Notebooks, Public Portfolios and GitHub Visibility' + startOffset: 3312 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3312 + endOffset: 3479 +- name: 'Career Advice: Learn Python, Do Structured Projects, Leverage Domain Knowledge' + startOffset: 3479 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3479 + endOffset: 3621 +- name: 'Tools & Sponsors: Data Load Tool for Pipelines and Community Support' + startOffset: 3621 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3621 + endOffset: 3669 +- name: 'Learning Resources: Astropy Tutorials, Course GitHub and YouTube Archive' + startOffset: 3669 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3669 + endOffset: 3742 +- name: 'Closing Remarks: Encouragement to Share Progress and Course Availability' + startOffset: 3742 + url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3742 + endOffset: 3875 transcript: - header: Podcast Introduction & Lunar Eclipse Anecdote - line: Hi everyone, welcome to our event. This event is brought to you by Data Talks @@ -1153,148 +1286,15 @@ transcript: sec: 3875 time: '1:04:35' who: Alexey -description: Discover MEERKAT radio-emitting stars using ML & cloud pipelines — learn - Astropy tools, catalog cross-matching, and production deployment at scale. -intro: 'How do you find rare radio-emitting stars in massive MEERKAT datasets—and - turn that search into reliable machine learning and cloud data pipelines? In this - episode Daniel Egbo, an astrophysicist turned ML engineer and PhD candidate at the - University of Cape Town, walks through the practical intersection of astronomy, - ML, and cloud engineering. We cover MEERKAT and SKA context, the electromagnetic - spectrum, and the core research goal: detecting point sources in radio images and - confirming them via multi-wavelength cross-matching and physics-based verification. - Daniel explains positional uncertainty, foreground/background confusion, and why - curated datasets are essential for future ML. He also shares tooling and infrastructure - practices—Astropy, NumPy/SciPy, JupyterHub, cloud compute, orchestration with Airflow/Kestra, - MinIO and Spark—and outlines an end-to-end pipeline pattern (MySQL → MinIO → Spark - → warehouse). Listeners will come away with concrete methods for building reproducible - astronomical data workflows, practical machine learning readiness steps, and resources - for learning and deployment (edge testing, LLMs, and community courses) to apply - to radio telescope and astronomical data projects.' -dateadded: '2025-09-30' -duration: PT01H04M35S -quotableClips: -- name: Podcast Introduction & Lunar Eclipse Anecdote - startOffset: 0 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=0 - endOffset: 73 -- name: 'Career Overview: From Nigeria to PhD in Cape Town' - startOffset: 73 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=73 - endOffset: 252 -- name: 'MEERKAT and SKA: Radio Telescope Project Overview' - startOffset: 252 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=252 - endOffset: 289 -- name: 'Electromagnetic Spectrum: Radio to Gamma Explained' - startOffset: 289 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=289 - endOffset: 379 -- name: 'Research Goal: Identifying Radio-Emitting Stars in MEERKAT Data' - startOffset: 379 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=379 - endOffset: 405 -- name: Telescope Types and Observing Constraints (Optical, Infrared, X-ray) - startOffset: 405 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=405 - endOffset: 480 -- name: Radio Telescope Site Requirements and Space-based X-ray Observatories - startOffset: 480 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=480 - endOffset: 639 -- name: 'Data Workflow: Detecting Point Sources in Radio Images' - startOffset: 639 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=639 - endOffset: 710 -- name: Cross-matching Multi-wavelength Catalogs and Positional Astronomy - startOffset: 710 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=710 - endOffset: 815 -- name: 'Positional Uncertainty: 2D Projection, Foreground/Background Confusion' - startOffset: 815 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=815 - endOffset: 930 -- name: 'Physics-based Verification: Using Prior Observations to Confirm Sources' - startOffset: 930 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=930 - endOffset: 995 -- name: Radio Stars Rarity and Sensitivity Improvements with New Telescopes - startOffset: 995 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=995 - endOffset: 1074 -- name: Building Curated Datasets as Foundation for Future Machine Learning - startOffset: 1074 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1074 - endOffset: 1291 -- name: 'Early ML Journey: Dataset Scale, Cloud Needs, and Inspiration' - startOffset: 1291 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1291 - endOffset: 1473 -- name: 'Python Astronomy Tooling: Astropy, NumPy, SciPy for Big Data' - startOffset: 1473 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1473 - endOffset: 1547 -- name: 'Cloud Computing Practices: JupyterHub and Remote Analysis' - startOffset: 1547 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1547 - endOffset: 1618 -- name: 'ML ZoomCamp Impact: Transitioning to Reusable Code and Production Practices' - startOffset: 1618 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1618 - endOffset: 1886 -- name: 'Edge Deployment Internship: Testing Models on Intel Hardware' - startOffset: 1886 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=1886 - endOffset: 2018 -- name: 'LLM Exploration: LangChain, Hugging Face, RAG and Vector Databases' - startOffset: 2018 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2018 - endOffset: 2568 -- name: 'Course Projects: Orchestration with Kestra, Airflow, MinIO and Spark' - startOffset: 2568 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2568 - endOffset: 2648 -- name: Airflow 3.0 Setup Experience and Astronomer CLI Learnings - startOffset: 2648 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2648 - endOffset: 2715 -- name: 'End-to-End Pipeline Example: MySQL → MinIO → Spark → Warehouse (dbt next)' - startOffset: 2715 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2715 - endOffset: 2859 -- name: 'AI Training Ecosystem: LangChain Academy, Arize, NVIDIA Deep Learning Institute' - startOffset: 2859 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=2859 - endOffset: 3020 -- name: 'Student Benefits: Free NVIDIA Courses and Deploying on GPUs (A100/H100)' - startOffset: 3020 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3020 - endOffset: 3121 -- name: 'BRICS Astronomy Bootcamp: Beginner-Friendly Data Analytics Program' - startOffset: 3121 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3121 - endOffset: 3312 -- name: 'Sharing Projects: Colab Notebooks, Public Portfolios and GitHub Visibility' - startOffset: 3312 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3312 - endOffset: 3479 -- name: 'Career Advice: Learn Python, Do Structured Projects, Leverage Domain Knowledge' - startOffset: 3479 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3479 - endOffset: 3621 -- name: 'Tools & Sponsors: Data Load Tool for Pipelines and Community Support' - startOffset: 3621 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3621 - endOffset: 3669 -- name: 'Learning Resources: Astropy Tutorials, Course GitHub and YouTube Archive' - startOffset: 3669 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3669 - endOffset: 3742 -- name: 'Closing Remarks: Encouragement to Share Progress and Course Availability' - startOffset: 3742 - url: https://www.youtube.com/watch?v=b92gwrsVQtg&t=3742 - endOffset: 3875 +context: 'Modern astrophysical discovery—illustrated by the challenge of finding rare + radio-emitting stars—depends on the seamless integration of domain knowledge, careful + physics-informed data curation, and production-grade, scalable data/ML workflows: + from telescope instrumentation and multi-wavelength cross-matching through positional-uncertainty + analysis, to cloud-native pipelines, reproducible tooling, and deployment. The episode’s + through-line is that building curated, interpretable datasets and end-to-end infrastructure + (not just models) is the essential bridge that turns complex observational data + into reliable science, practical education, and transferable career skills.' --- - Links: * [Linkedin](https://www.linkedin.com/in/egbodaniel/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md b/_podcast/from-semiconductor-data-to-applied-machine-learning.md similarity index 95% rename from _podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md rename to _podcast/from-semiconductor-data-to-applied-machine-learning.md index f4ae25f8..6c8d50fc 100644 --- a/_podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.md +++ b/_podcast/from-semiconductor-data-to-applied-machine-learning.md @@ -1,22 +1,144 @@ --- +title: "From Classical Guitar to Production ML: Nonlinear Career Path Through Semiconductors, Yield Analytics & Community-Driven Learning" +short: "From Semiconductors to Machine Learning: A Career in Data and Teaching" +season: 21 episode: 8 guests: - dashelruizperez +image: images/podcast/from-semiconductor-data-to-applied-machine-learning.jpg ids: anchor: datatalksclub/episodes/From-Semiconductors-to-Machine-Learning-A-Career-in-Data-and-Teaching-e395t53 youtube: B2tzuUg5uZs -image: images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Semiconductors-to-Machine-Learning-A-Career-in-Data-and-Teaching-e395t53 apple: https://podcasts.apple.com/us/podcast/from-semiconductors-to-machine-learning-a-career-in/id1541710331?i=1000731197034 spotify: https://open.spotify.com/episode/1znRtNRf5IUYcBblJYH53r youtube: https://www.youtube.com/watch?v=B2tzuUg5uZs -season: 21 -short: 'From Semiconductors to Machine Learning: A Career in Data and Teaching' -title: 'Predictive Maintenance & Yield Analytics for Semiconductors: Deploy ML with - Flask, Docker & MLOps' +description: "Discover a nonlinear path from classical guitar to production ML, semiconductors & yield analytics. Learn actionable career tactics and community-driven learning." +topics: +- machine learning +- MLOps +- data science +- tools +- career transition +intro: "How do you move from playing classical guitar to applying machine learning in semiconductor yield analytics? In this episode Dashel Ruiz Perez — a data analyst, ML engineer, and educator — walks us through a nonlinear career path that spans nearly a decade at Microchip Technology and now teaching programming and data skills through ThriveDX. With roles across production, process, yield, and software engineering, Dashel explains how hands-on production experience informs production analytics and ML engineering work in semiconductor manufacturing.

We cover practical topics including translating manufacturing problems into data science projects, building models for yield optimization, and the role of software engineering in production analytics. Dashel also discusses learning pathways — from degrees in computer science and data analytics at Western Governors University to graduating from ML Zoomcamp — and how community-driven learning accelerates skill acquisition.

Listeners will gain actionable guidance on career transition strategies, concrete examples of applying machine learning and data analytics in semiconductor contexts, and resources for growing technical skills through community and formal training. This episode is useful for engineers, data analysts, and anyone considering a switch into ML, AI, or semiconductor yield analytics." +dateadded: 2025-10-21 +duration: PT01H13M08S +quotableClips: +- name: Podcast Introduction & DataTalksClub + startOffset: 0 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=0 + endOffset: 111 +- name: 'Guest Overview: Multidisciplinary Career Snapshot' + startOffset: 111 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=111 + endOffset: 178 +- name: 'Career Pivot: From Classical Guitarist to Tech in Portland' + startOffset: 178 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=178 + endOffset: 289 +- name: 'Semiconductor Onboarding: Expediter Role and Fab Floor Experience' + startOffset: 289 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=289 + endOffset: 349 +- name: 'Fab Data Exposure: Millisecond Tool Logs and Process Telemetry' + startOffset: 349 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=349 + endOffset: 376 +- name: 'Self-Education: Learning English and Computer Science' + startOffset: 376 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=376 + endOffset: 704 +- name: 'Automation Initiative: Building a Java Tool for CMP Calculations' + startOffset: 704 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=704 + endOffset: 923 +- name: 'Yield Analytics: JMP, Oracle, and Cross-Area Data Access' + startOffset: 923 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=923 + endOffset: 1262 +- name: 'ML Introduction: Academic AI Project and Predictive Interest' + startOffset: 1262 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1262 + endOffset: 1409 +- name: 'Predictive Maintenance: "Wafers at Risk" Model for Yield Improvement' + startOffset: 1409 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1409 + endOffset: 1516 +- name: 'Explainability Dilemma: Tweaking Models vs. Understanding Results' + startOffset: 1516 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1516 + endOffset: 1773 +- name: 'Course Selection: Choosing ML Zoomcamp Cohort Experience' + startOffset: 1773 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1773 + endOffset: 1942 +- name: 'Applied Curriculum: Deliverable ML Beyond Jupyter Notebooks' + startOffset: 1942 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1942 + endOffset: 2074 +- name: 'Learning Support: Slack Q&A, Cohorts, and Peer Study Groups' + startOffset: 2074 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2074 + endOffset: 2249 +- name: 'Production Focus: Flask REST API, Docker, and Google Cloud' + startOffset: 2249 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2249 + endOffset: 2392 +- name: 'Midterm Demo: COVID Comorbidity Model Deployed as an API' + startOffset: 2392 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2392 + endOffset: 2676 +- name: 'Infrastructure Automation: Terraform and MLOps Takeaways' + startOffset: 2676 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2676 + endOffset: 2904 +- name: 'Computer Vision Project: Butterfly Image Classification (TensorFlow)' + startOffset: 2904 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2904 + endOffset: 3070 +- name: 'Kaggle Workflow: EDA, Feature Engineering, and Model Iteration' + startOffset: 3070 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3070 + endOffset: 3113 +- name: 'Model Portability: ONNX for Framework Interoperability' + startOffset: 3113 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3113 + endOffset: 3203 +- name: 'Full-Stack ML Skills: Docker, VMs, Databases, and Deployment' + startOffset: 3203 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3203 + endOffset: 3265 +- name: 'Common Roadblocks: Mac M1 Issues and Wide Categorical Data' + startOffset: 3265 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3265 + endOffset: 3362 +- name: 'Time Commitment: Homework Strategy and Active Video Learning' + startOffset: 3362 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3362 + endOffset: 3487 +- name: 'Community Value: Rapid Help, Code Reviews, and Study Groups' + startOffset: 3487 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3487 + endOffset: 3631 +- name: 'Motivation Techniques: Public Learning and Project Accountability' + startOffset: 3631 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3631 + endOffset: 4056 +- name: 'Teaching Ambition: Creating High-Quality Spanish ML Content' + startOffset: 4056 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4056 + endOffset: 4228 +- name: 'Upcoming Offerings: AI-for-Developers, React, and LLM Coding' + startOffset: 4228 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4228 + endOffset: 4369 +- name: 'Closing Remarks: Course Endorsement and Next Steps' + startOffset: 4369 + url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4369 + endOffset: 4388 transcript: -- header: Podcast Introduction & Data Docs Club +- header: Podcast Introduction & DataTalksClub - line: Hi everyone, welcome to our event. This event is brought to you by the Data Docs Club, a community of people who love data. We have weekly events and today is one of them. If you want to find out more about our events, there is a link @@ -1026,141 +1148,13 @@ transcript: sec: 4388 time: '1:13:08' who: Dashel -description: 'Learn predictive maintenance & yield analytics for semiconductors: deploy - ML with Flask, Docker & MLOps to boost yield, enable explainability, and ship APIs.' -intro: 'How do you move machine learning for predictive maintenance and yield analytics - out of a notebook and into production on the fab floor? In this episode, Dashel Ruiz - Perez—data analyst, ML engineer, and educator who spent nearly a decade at Microchip - Technology—walks through practical steps for deploying ML to improve semiconductor - yield. Drawing on millisecond tool logs, process telemetry, and a “Wafers at Risk” - predictive model, Dashel explains how to build explainable yield analytics, iterate - with Kaggle-style EDA and feature engineering, and ensure model portability with - ONNX.

Listen for concrete implementation details: turning models into Flask - REST APIs, containerizing with Docker, using Google Cloud and Terraform for infrastructure - automation, and MLOps best practices for production monitoring. Dashel also covers - hands-on learning paths from ML Zoomcamp—course deliverables beyond Jupyter notebooks, - common roadblocks (Mac M1 issues, wide categorical data), and examples like a COVID - comorbidity API demo and a TensorFlow computer vision project. If you’re responsible - for semiconductor predictive maintenance, yield analytics, or ML deployment, this - episode gives actionable guidance on tools, workflows, and learning strategies to - get models reliably running in production.' -dateadded: '2025-10-21' -duration: PT01H13M08S -quotableClips: -- name: Podcast Introduction & Data Docs Club - startOffset: 0 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=0 - endOffset: 111 -- name: 'Guest Overview: Multidisciplinary Career Snapshot' - startOffset: 111 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=111 - endOffset: 178 -- name: 'Career Pivot: From Classical Guitarist to Tech in Portland' - startOffset: 178 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=178 - endOffset: 289 -- name: 'Semiconductor Onboarding: Expediter Role and Fab Floor Experience' - startOffset: 289 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=289 - endOffset: 349 -- name: 'Fab Data Exposure: Millisecond Tool Logs and Process Telemetry' - startOffset: 349 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=349 - endOffset: 376 -- name: 'Self-Education: Learning English and Computer Science' - startOffset: 376 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=376 - endOffset: 704 -- name: 'Automation Initiative: Building a Java Tool for CMP Calculations' - startOffset: 704 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=704 - endOffset: 923 -- name: 'Yield Analytics: JMP, Oracle, and Cross-Area Data Access' - startOffset: 923 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=923 - endOffset: 1262 -- name: 'ML Introduction: Academic AI Project and Predictive Interest' - startOffset: 1262 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1262 - endOffset: 1409 -- name: 'Predictive Maintenance: "Wafers at Risk" Model for Yield Improvement' - startOffset: 1409 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1409 - endOffset: 1516 -- name: 'Explainability Dilemma: Tweaking Models vs. Understanding Results' - startOffset: 1516 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1516 - endOffset: 1773 -- name: 'Course Selection: Choosing ML Zoomcamp Cohort Experience' - startOffset: 1773 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1773 - endOffset: 1942 -- name: 'Applied Curriculum: Deliverable ML Beyond Jupyter Notebooks' - startOffset: 1942 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=1942 - endOffset: 2074 -- name: 'Learning Support: Slack Q&A, Cohorts, and Peer Study Groups' - startOffset: 2074 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2074 - endOffset: 2249 -- name: 'Production Focus: Flask REST API, Docker, and Google Cloud' - startOffset: 2249 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2249 - endOffset: 2392 -- name: 'Midterm Demo: COVID Comorbidity Model Deployed as an API' - startOffset: 2392 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2392 - endOffset: 2676 -- name: 'Infrastructure Automation: Terraform and MLOps Takeaways' - startOffset: 2676 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2676 - endOffset: 2904 -- name: 'Computer Vision Project: Butterfly Image Classification (TensorFlow)' - startOffset: 2904 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=2904 - endOffset: 3070 -- name: 'Kaggle Workflow: EDA, Feature Engineering, and Model Iteration' - startOffset: 3070 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3070 - endOffset: 3113 -- name: 'Model Portability: ONNX for Framework Interoperability' - startOffset: 3113 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3113 - endOffset: 3203 -- name: 'Full-Stack ML Skills: Docker, VMs, Databases, and Deployment' - startOffset: 3203 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3203 - endOffset: 3265 -- name: 'Common Roadblocks: Mac M1 Issues and Wide Categorical Data' - startOffset: 3265 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3265 - endOffset: 3362 -- name: 'Time Commitment: Homework Strategy and Active Video Learning' - startOffset: 3362 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3362 - endOffset: 3487 -- name: 'Community Value: Rapid Help, Code Reviews, and Study Groups' - startOffset: 3487 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3487 - endOffset: 3631 -- name: 'Motivation Techniques: Public Learning and Project Accountability' - startOffset: 3631 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=3631 - endOffset: 4056 -- name: 'Teaching Ambition: Creating High-Quality Spanish ML Content' - startOffset: 4056 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4056 - endOffset: 4228 -- name: 'Upcoming Offerings: AI-for-Developers, React, and LLM Coding' - startOffset: 4228 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4228 - endOffset: 4369 -- name: 'Closing Remarks: Course Endorsement and Next Steps' - startOffset: 4369 - url: https://www.youtube.com/watch?v=B2tzuUg5uZs&t=4369 - endOffset: 4388 +context: 'A single through-line: the episode is about a hands-on, end-to-end journey + into applied machine learning — a multidisciplinary career pivot powered by self-education + and cohort/community support that takes messy, high-frequency industrial data through + pragmatic tool-building, model development, explainability tradeoffs, and MLOps + (APIs, containers, Terraform, ONNX) into real production impact, with a commitment + to teaching and scaling that practice to others.' --- - Links: * [Linkedin](https://www.linkedin.com/in/dashel-ruiz-perez-2b036172/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s04e01-from-swe-to-ml.md b/_podcast/from-software-engineer-to-machine-learning.md similarity index 97% rename from _podcast/s04e01-from-swe-to-ml.md rename to _podcast/from-software-engineer-to-machine-learning.md index 0cf84087..6dc3109e 100644 --- a/_podcast/s04e01-from-swe-to-ml.md +++ b/_podcast/from-software-engineer-to-machine-learning.md @@ -1,12 +1,11 @@ --- -title: 'From Software Engineering to Machine Learning: 7 Lessons, Tools, MLOps & Project - Roadmap' -short: From Software Engineering to Machine Learning -guests: -- svpino -image: images/podcast/s04e01-from-swe-to-ml.jpg +title: "From Software Engineering to Machine Learning: 7 Lessons, Tools, MLOps & Project Roadmap" +short: "From Software Engineering to Machine Learning" season: 4 episode: 1 +guests: +- svpino +image: images/podcast/from-software-engineer-to-machine-learning.jpg ids: youtube: xVYOdRrN7hw anchor: From-Software-Engineering-to-Machine-Learning---Santiago-Valdarrama-e139s63 @@ -15,6 +14,141 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/From-Software-Engineering-to-Machine-Learning---Santiago-Valdarrama-e139s63 spotify: https://open.spotify.com/episode/0PHDZPGyXgyDM9HH7QzVdZ apple: https://podcasts.apple.com/us/podcast/from-software-engineering-to-machine-learning-santiago/id1541710331?i=1000526870384 + +description: "Learn practical machine learning for software engineers: 7 lessons, Python tools, MLOps & a project roadmap to build, deploy and monitor real ML systems." +topics: +- machine learning +- MLOps +- software engineering +- career transition +- tools +intro: "How do you move from software engineering into practical machine learning without getting stuck on theory or math? In this episode, Santiago Valdarrama — Director of Computer Vision and a computer scientist with two decades of software experience — walks through a pragmatic roadmap for software engineers transitioning to machine learning.

We cover seven practical lessons for getting started (start projects, think long-term, teach and join communities, build real projects, prioritize coding, analyze problems first, and favor pragmatism), core ML tooling (Python, NumPy, Pandas, Matplotlib, scikit-learn), and recommended learning resources (Google ML Crash Course, Kaggle, Deep Learning with Python, Hands-On Machine Learning). Santiago compares problem-based vs top-down learning, outlines a course roadmap for engineers, and explains ML engineering skills: data pipelines, modeling, deployment, monitoring, plus MLOps fundamentals like APIs, Docker, and cloud providers.

Listen to gain an actionable project roadmap, tools checklist, and concrete strategies to conquer math anxiety and ship ML systems — practical guidance for engineers who want to build, deploy, and maintain real machine learning solutions." +dateadded: 2021-06-25 + +duration: PT00H59M24S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=0 + endOffset: 159 +- name: 'Guest Overview: Santiago — Director of Computer Vision' + startOffset: 159 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=159 + endOffset: 208 +- name: Adding Machine Learning to a Software Engineering Skillset + startOffset: 208 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=208 + endOffset: 291 +- name: 'Personal & Academic Background: Cuba, Bachelor’s, Georgia Tech MS' + startOffset: 291 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=291 + endOffset: 393 +- name: 'Software Engineers’ Advantage: Coding as a Core ML Skill' + startOffset: 393 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=393 + endOffset: 492 +- name: 'Overcoming Math Anxiety: Practical, Problem-First Learning' + startOffset: 492 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=492 + endOffset: 780 +- name: 'Communicating ML Simply: Teaching and Writing for Understanding' + startOffset: 780 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=780 + endOffset: 971 +- name: Seven Practical Lessons for Starting a Machine Learning Career + startOffset: 971 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=971 + endOffset: 1045 +- name: 'Lesson 1 — Take Action: Start Projects Instead of Overpreparing' + startOffset: 1045 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1045 + endOffset: 1149 +- name: 'Lesson 2 — Learning as a Marathon: Long-Term Growth in ML' + startOffset: 1149 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1149 + endOffset: 1238 +- name: 'Lesson 3 — Community & Teaching: Accelerating Progress Together' + startOffset: 1238 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1238 + endOffset: 1338 +- name: 'Lesson 4 — Apply Knowledge: Build and Share Real Projects' + startOffset: 1338 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1338 + endOffset: 1500 +- name: 'Lesson 5 — Math vs Coding: Coding Often Determines Success' + startOffset: 1500 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1500 + endOffset: 1599 +- name: 'Lesson 6 — Problem Analysis First: Design Solutions Before Code' + startOffset: 1599 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1599 + endOffset: 1745 +- name: 'Lesson 7 — Pragmatism Over Purism: Deliver Value Without Knowing Every Detail' + startOffset: 1745 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1745 + endOffset: 1990 +- name: 'Core ML Tooling: Python, NumPy, Pandas, Matplotlib, scikit-learn' + startOffset: 1990 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1990 + endOffset: 2179 +- name: 'Learning Approaches: Problem-Based vs Top-Down (Theory First)' + startOffset: 2179 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2179 + endOffset: 2328 +- name: 'Recommended Courses & Tutorials: Google ML Crash Course, Kaggle' + startOffset: 2328 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2328 + endOffset: 2469 +- name: 'Essential Books: Deep Learning with Python; Hands-On Machine Learning' + startOffset: 2469 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2469 + endOffset: 2528 +- name: Course Roadmap for Software Engineers Transitioning to ML + startOffset: 2528 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2528 + endOffset: 2641 +- name: 'Improving Coding Skills: Learn Python by Building Solutions' + startOffset: 2641 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2641 + endOffset: 2727 +- name: 'Build Projects Without ML: Automation Examples (Selenium)' + startOffset: 2727 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2727 + endOffset: 2799 +- name: 'ML Engineering Skills: Data Pipeline, Modeling, Deployment, Monitoring' + startOffset: 2799 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2799 + endOffset: 2963 +- name: 'Deployment & MLOps Fundamentals: APIs, Docker, Cloud Providers' + startOffset: 2963 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2963 + endOffset: 3081 +- name: 'Learning Cloud Pragmatically: Learn What the Project Demands' + startOffset: 3081 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3081 + endOffset: 3139 +- name: 'Machine Learning vs Data Science: Roles, Tools, and Focus' + startOffset: 3139 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3139 + endOffset: 3310 +- name: 'Getting Started: Andrew Ng Coursera vs Hands-On Project Work' + startOffset: 3310 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3310 + endOffset: 3397 +- name: 'Conquering Math: Intuition, Translate Formulas to Code' + startOffset: 3397 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3397 + endOffset: 3594 +- name: 'Episode Resources: Santiago’s Twitter, Course Links' + startOffset: 3594 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3594 + endOffset: 3639 +- name: Closing Remarks & Conference Announcements + startOffset: 3639 + url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3639 + endOffset: 3564 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Santiago — Director of Computer Vision' @@ -1113,146 +1247,6 @@ transcript: sec: 3723 time: '1:02:03' who: Alexey -description: 'Learn practical machine learning for software engineers: 7 lessons, - Python tools, MLOps & a project roadmap to build, deploy and monitor real ML systems.' -intro: 'How do you move from software engineering into practical machine learning - without getting stuck on theory or math? In this episode, Santiago Valdarrama — Director - of Computer Vision and a computer scientist with two decades of software experience - — walks through a pragmatic roadmap for software engineers transitioning to machine - learning.

We cover seven practical lessons for getting started (start projects, - think long-term, teach and join communities, build real projects, prioritize coding, - analyze problems first, and favor pragmatism), core ML tooling (Python, NumPy, Pandas, - Matplotlib, scikit-learn), and recommended learning resources (Google ML Crash Course, - Kaggle, Deep Learning with Python, Hands-On Machine Learning). Santiago compares - problem-based vs top-down learning, outlines a course roadmap for engineers, and - explains ML engineering skills: data pipelines, modeling, deployment, monitoring, - plus MLOps fundamentals like APIs, Docker, and cloud providers.

Listen - to gain an actionable project roadmap, tools checklist, and concrete strategies - to conquer math anxiety and ship ML systems — practical guidance for engineers who - want to build, deploy, and maintain real machine learning solutions.' -dateadded: '2021-06-25' -duration: PT00H59M24S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=0 - endOffset: 159 -- name: 'Guest Overview: Santiago — Director of Computer Vision' - startOffset: 159 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=159 - endOffset: 208 -- name: Adding Machine Learning to a Software Engineering Skillset - startOffset: 208 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=208 - endOffset: 291 -- name: 'Personal & Academic Background: Cuba, Bachelor’s, Georgia Tech MS' - startOffset: 291 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=291 - endOffset: 393 -- name: 'Software Engineers’ Advantage: Coding as a Core ML Skill' - startOffset: 393 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=393 - endOffset: 492 -- name: 'Overcoming Math Anxiety: Practical, Problem-First Learning' - startOffset: 492 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=492 - endOffset: 780 -- name: 'Communicating ML Simply: Teaching and Writing for Understanding' - startOffset: 780 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=780 - endOffset: 971 -- name: Seven Practical Lessons for Starting a Machine Learning Career - startOffset: 971 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=971 - endOffset: 1045 -- name: 'Lesson 1 — Take Action: Start Projects Instead of Overpreparing' - startOffset: 1045 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1045 - endOffset: 1149 -- name: 'Lesson 2 — Learning as a Marathon: Long-Term Growth in ML' - startOffset: 1149 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1149 - endOffset: 1238 -- name: 'Lesson 3 — Community & Teaching: Accelerating Progress Together' - startOffset: 1238 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1238 - endOffset: 1338 -- name: 'Lesson 4 — Apply Knowledge: Build and Share Real Projects' - startOffset: 1338 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1338 - endOffset: 1500 -- name: 'Lesson 5 — Math vs Coding: Coding Often Determines Success' - startOffset: 1500 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1500 - endOffset: 1599 -- name: 'Lesson 6 — Problem Analysis First: Design Solutions Before Code' - startOffset: 1599 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1599 - endOffset: 1745 -- name: 'Lesson 7 — Pragmatism Over Purism: Deliver Value Without Knowing Every Detail' - startOffset: 1745 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1745 - endOffset: 1990 -- name: 'Core ML Tooling: Python, NumPy, Pandas, Matplotlib, scikit-learn' - startOffset: 1990 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=1990 - endOffset: 2179 -- name: 'Learning Approaches: Problem-Based vs Top-Down (Theory First)' - startOffset: 2179 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2179 - endOffset: 2328 -- name: 'Recommended Courses & Tutorials: Google ML Crash Course, Kaggle' - startOffset: 2328 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2328 - endOffset: 2469 -- name: 'Essential Books: Deep Learning with Python; Hands-On Machine Learning' - startOffset: 2469 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2469 - endOffset: 2528 -- name: Course Roadmap for Software Engineers Transitioning to ML - startOffset: 2528 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2528 - endOffset: 2641 -- name: 'Improving Coding Skills: Learn Python by Building Solutions' - startOffset: 2641 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2641 - endOffset: 2727 -- name: 'Build Projects Without ML: Automation Examples (Selenium)' - startOffset: 2727 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2727 - endOffset: 2799 -- name: 'ML Engineering Skills: Data Pipeline, Modeling, Deployment, Monitoring' - startOffset: 2799 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2799 - endOffset: 2963 -- name: 'Deployment & MLOps Fundamentals: APIs, Docker, Cloud Providers' - startOffset: 2963 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=2963 - endOffset: 3081 -- name: 'Learning Cloud Pragmatically: Learn What the Project Demands' - startOffset: 3081 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3081 - endOffset: 3139 -- name: 'Machine Learning vs Data Science: Roles, Tools, and Focus' - startOffset: 3139 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3139 - endOffset: 3310 -- name: 'Getting Started: Andrew Ng Coursera vs Hands-On Project Work' - startOffset: 3310 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3310 - endOffset: 3397 -- name: 'Conquering Math: Intuition, Translate Formulas to Code' - startOffset: 3397 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3397 - endOffset: 3594 -- name: 'Episode Resources: Santiago’s Twitter, Course Links' - startOffset: 3594 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3594 - endOffset: 3639 -- name: Closing Remarks & Conference Announcements - startOffset: 3639 - url: https://www.youtube.com/watch?v=xVYOdRrN7hw&t=3639 - endOffset: 3564 --- Links: diff --git a/_podcast/s07e08-from-data-science-to-data-engineering.md b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md similarity index 99% rename from _podcast/s07e08-from-data-science-to-data-engineering.md rename to _podcast/from-software-engineering-data-science-to-data-engineering-leadership.md index 1c581057..a9f4534a 100644 --- a/_podcast/s07e08-from-data-science-to-data-engineering.md +++ b/_podcast/from-software-engineering-data-science-to-data-engineering-leadership.md @@ -1,33 +1,131 @@ --- +title: "How to Become a Data Engineer: Skills, MLOps, Pipelines, SQL, CI/CD & Cloud" +short: "From Data Science to Data Engineering" +season: 7 episode: 8 guests: - ellenkonig -description: 'Master data engineering, MLOps and pipelines: learn CI/CD, cloud cost - control and SQL/Python skills to switch careers and accelerate growth now.' -intro: In this episode, Ellen König—Head of Engineering at alcemy—shares her journey - from software and data science to data engineering leadership. She explains why - many professionals make the switch, the skills that matter most (from DevOps and - CI/CD to collaboration), and how to prepare through side projects and software fundamentals. -

Ellen also breaks down key tools like Git, Docker, and Airflow, discusses - the realities of cloud costs and team structures, and offers practical advice for - anyone planning a transition into data engineering. -date: 2025-11-07 +image: images/podcast/from-software-engineering-data-science-to-data-engineering-leadership.jpg ids: anchor: From-Data-Science-to-Data-Engineering---Ellen-Knig-e1fgfbm youtube: 3TTu-hYzxeg -image: images/podcast/s07e08-from-data-science-to-data-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Data-Science-to-Data-Engineering---Ellen-Knig-e1fgfbm apple: https://podcasts.apple.com/us/podcast/from-data-science-to-data-engineering-ellen-k%C3%B6nig/id1541710331?i=1000553736781 spotify: https://open.spotify.com/episode/4R9F5B4f8vf5r5yQEmwYiu youtube: https://www.youtube.com/watch?v=3TTu-hYzxeg -season: 7 -short: From Data Science to Data Engineering -title: 'How to Become a Data Engineer: Skills, MLOps, Pipelines, SQL, CI/CD & Cloud' + +description: "Master data engineering, MLOps and pipelines: learn CI/CD, cloud cost control and SQL/Python skills to switch careers and accelerate growth now." +intro: "In this episode, Ellen König—Head of Engineering at alcemy—shares her journey from software and data science to data engineering leadership. She explains why many professionals make the switch, the skills that matter most (from DevOps and CI/CD to collaboration), and how to prepare through side projects and software fundamentals.

Ellen also breaks down key tools like Git, Docker, and Airflow, discusses the realities of cloud costs and team structures, and offers practical advice for anyone planning a transition into data engineering" topics: - data science - data engineering -- career switch +- career transition +- MLOps +- tools +dateadded: 2022-03-14 +date: 2025-11-07 + +duration: PT00H59M45S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=0 + endOffset: 111 +- name: 'Career Narrative: From Backend Developer to Data Engineering Lead' + startOffset: 111 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=111 + endOffset: 392 +- name: 'Motivation to Switch: Blackbox Models, Code Quality, and Professional Fit' + startOffset: 392 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=392 + endOffset: 581 +- name: 'Role Overlap: Data Science Tasks That Are Data Engineering Work' + startOffset: 581 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=581 + endOffset: 722 +- name: 'Data Intuition: How Data Is Produced, Structured, and Biased' + startOffset: 722 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=722 + endOffset: 835 +- name: 'Transferable Strengths: Pipelines, Stakeholder Communication, Exploration' + startOffset: 835 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=835 + endOffset: 902 +- name: 'Core Upskills: Collaborative Coding, CI/CD and DevOps Practices' + startOffset: 902 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=902 + endOffset: 1054 +- name: 'MLOps vs Research: When Data Scientists Need Production Engineering Skills' + startOffset: 1054 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1054 + endOffset: 1176 +- name: 'Learning Pathways: On-the-Job Mentorship, Bootcamps, and Courses' + startOffset: 1176 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1176 + endOffset: 1285 +- name: 'Experiment First: Side Projects and Small Work Assignments Before Switching' + startOffset: 1285 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1285 + endOffset: 1421 +- name: 'Software Foundations: Take General Dev Courses (Web, Mobile) to Learn Engineering' + startOffset: 1421 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1421 + endOffset: 1580 +- name: 'Essential Course Components: Git, Docker, Testing, CLI, Clean Code' + startOffset: 1580 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1580 + endOffset: 1734 +- name: 'Language Guidance: SQL & Python for Analytics; Java/Scala for Streaming' + startOffset: 1734 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1734 + endOffset: 1963 +- name: 'Market Dynamics: Strong Demand for Data Engineers and Expectation Gaps' + startOffset: 1963 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1963 + endOffset: 2140 +- name: 'Teamwork Shift: Adapting to Pair Programming and Close Collaboration' + startOffset: 2140 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2140 + endOffset: 2300 +- name: 'Organizational Models: Embedded Data Engineers vs Central Platform Teams' + startOffset: 2300 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2300 + endOffset: 2370 +- name: 'Intersection Roles: Analytics Engineer, Data-Science-Engineers, MLOps' + startOffset: 2370 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2370 + endOffset: 2489 +- name: 'Project Recipes: Build Scrapers, ETL Pipelines, Schedulers (Airflow)' + startOffset: 2489 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2489 + endOffset: 2640 +- name: 'Portfolio Example: Domain-Focused Pipelines with Real Data & Automation' + startOffset: 2640 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2640 + endOffset: 2962 +- name: 'Cloud Cost Control: Billing Exploration, Budgets, and Alerting' + startOffset: 2962 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2962 + endOffset: 3166 +- name: 'Entry Strategy: When to Apply for Entry-Level Roles vs Internships' + startOffset: 3166 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3166 + endOffset: 3346 +- name: 'Career Acceleration: Benefits of Consultancies and Large Companies' + startOffset: 3346 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3346 + endOffset: 3516 +- name: 'Cloud Choice: Practical Differences, Local Demand, and Free Tiers' + startOffset: 3516 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3516 + endOffset: 3621 +- name: Closing Remarks & How to Contact Ellen + startOffset: 3621 + url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3621 + endOffset: 3585 + transcript: - header: Episode Introduction & Guest Overview - header: Episode Introduction & Guest Overview @@ -1731,103 +1829,4 @@ transcript: sec: 3659 time: '1:00:59' who: Ellen -dateadded: '2022-03-14' -duration: PT00H59M45S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=0 - endOffset: 111 -- name: 'Career Narrative: From Backend Developer to Data Engineering Lead' - startOffset: 111 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=111 - endOffset: 392 -- name: 'Motivation to Switch: Blackbox Models, Code Quality, and Professional Fit' - startOffset: 392 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=392 - endOffset: 581 -- name: 'Role Overlap: Data Science Tasks That Are Data Engineering Work' - startOffset: 581 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=581 - endOffset: 722 -- name: 'Data Intuition: How Data Is Produced, Structured, and Biased' - startOffset: 722 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=722 - endOffset: 835 -- name: 'Transferable Strengths: Pipelines, Stakeholder Communication, Exploration' - startOffset: 835 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=835 - endOffset: 902 -- name: 'Core Upskills: Collaborative Coding, CI/CD and DevOps Practices' - startOffset: 902 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=902 - endOffset: 1054 -- name: 'MLOps vs Research: When Data Scientists Need Production Engineering Skills' - startOffset: 1054 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1054 - endOffset: 1176 -- name: 'Learning Pathways: On-the-Job Mentorship, Bootcamps, and Courses' - startOffset: 1176 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1176 - endOffset: 1285 -- name: 'Experiment First: Side Projects and Small Work Assignments Before Switching' - startOffset: 1285 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1285 - endOffset: 1421 -- name: 'Software Foundations: Take General Dev Courses (Web, Mobile) to Learn Engineering' - startOffset: 1421 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1421 - endOffset: 1580 -- name: 'Essential Course Components: Git, Docker, Testing, CLI, Clean Code' - startOffset: 1580 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1580 - endOffset: 1734 -- name: 'Language Guidance: SQL & Python for Analytics; Java/Scala for Streaming' - startOffset: 1734 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1734 - endOffset: 1963 -- name: 'Market Dynamics: Strong Demand for Data Engineers and Expectation Gaps' - startOffset: 1963 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=1963 - endOffset: 2140 -- name: 'Teamwork Shift: Adapting to Pair Programming and Close Collaboration' - startOffset: 2140 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2140 - endOffset: 2300 -- name: 'Organizational Models: Embedded Data Engineers vs Central Platform Teams' - startOffset: 2300 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2300 - endOffset: 2370 -- name: 'Intersection Roles: Analytics Engineer, Data-Science-Engineers, MLOps' - startOffset: 2370 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2370 - endOffset: 2489 -- name: 'Project Recipes: Build Scrapers, ETL Pipelines, Schedulers (Airflow)' - startOffset: 2489 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2489 - endOffset: 2640 -- name: 'Portfolio Example: Domain-Focused Pipelines with Real Data & Automation' - startOffset: 2640 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2640 - endOffset: 2962 -- name: 'Cloud Cost Control: Billing Exploration, Budgets, and Alerting' - startOffset: 2962 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=2962 - endOffset: 3166 -- name: 'Entry Strategy: When to Apply for Entry-Level Roles vs Internships' - startOffset: 3166 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3166 - endOffset: 3346 -- name: 'Career Acceleration: Benefits of Consultancies and Large Companies' - startOffset: 3346 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3346 - endOffset: 3516 -- name: 'Cloud Choice: Practical Differences, Local Demand, and Free Tiers' - startOffset: 3516 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3516 - endOffset: 3621 -- name: Closing Remarks & How to Contact Ellen - startOffset: 3621 - url: https://www.youtube.com/watch?v=3TTu-hYzxeg&t=3621 - endOffset: 3585 --- diff --git a/_podcast/s12e01-from-software-engineer-to-data-science-manager.md b/_podcast/from-software-engineering-to-leading-data-science-teams.md similarity index 97% rename from _podcast/s12e01-from-software-engineer-to-data-science-manager.md rename to _podcast/from-software-engineering-to-leading-data-science-teams.md index 45c6ceff..f2967128 100644 --- a/_podcast/s12e01-from-software-engineer-to-data-science-manager.md +++ b/_podcast/from-software-engineering-to-leading-data-science-teams.md @@ -1,20 +1,152 @@ --- +title: "Transitioning from Software Engineer to Data Science Manager: Search, ML & Leadership" +short: "From Software Engineer to Data Science Manager" +season: 12 episode: 1 guests: - sadatanwar +image: images/podcast/from-software-engineering-to-leading-data-science-teams.jpg ids: anchor: From-Software-Engineer-to-Data-Science-Manager---Sadat-Anwar-e1rqkdf youtube: xyTfqIWeKf8 -image: images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Software-Engineer-to-Data-Science-Manager---Sadat-Anwar-e1rqkdf apple: https://podcasts.apple.com/us/podcast/from-software-engineer-to-data-science-manager-sadat-anwar/id1541710331?i=1000589637838 spotify: https://open.spotify.com/episode/3vOUwe4WtNQFXHRgTcyMtg?si=87o3XW_EQZ-n68nhAeV8Xw youtube: https://www.youtube.com/watch?v=xyTfqIWeKf8 -season: 12 -short: From Software Engineer to Data Science Manager -title: 'Transitioning from Software Engineer to Data Science Manager: Search, ML & - Leadership' + +description: "Learn to transition into a Data Science Manager: master search engineering, machine learning and leadership to hire, scale teams and measure business impact." +intro: "How do you move from hands-on software engineering into leading data science teams while staying effective on search and machine learning projects? In this episode Sadat Anwar — a people-centric Data Science Manager and former software engineer fluent in Java, Scala and Python — maps his path from an electronics and informatics background to research in computer vision at Fraunhofer and production search work at OLX.

We cover practical search engineering topics (Solr autoscaling, decoupling search from a monolith, Kotlin services with Python ML satellites), early ML projects and experimentation strategies (master’s thesis on neural nets, 20% time wins, “act before you think”), and engineering safety nets like feature flags, backups and monitoring. Sadat also walks through the promotion/hiring process, documenting leadership evidence, people management challenges (conflict resolution, hiring, motivation loss when stepping away from code), and transitioning into data science management with NLP, trust & safety and fraud detection responsibilities.

Listen for concrete advice on measuring managerial impact, leveraging EM experience to lead data science teams, and tactical steps for engineers aiming to become data science managers in search and ML domains" +topics: +- career transition +- software engineering +- data science +- machine learning +- leadership +- team building +dateadded: 2022-12-10 + +duration: PT01H28S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=0 + endOffset: 65 +- name: 'Episode Overview: From Software Engineer to Data Science Manager' + startOffset: 65 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=65 + endOffset: 97 +- name: 'Early Career & Education: Aspiring Doctor, Electronics Bachelor, Informatics + Master' + startOffset: 97 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=97 + endOffset: 192 +- name: 'Fraunhofer Research Assistant: Computer Vision and Car Dent Detection' + startOffset: 192 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=192 + endOffset: 391 +- name: 'Search Engineering at OLX: First Day Firefighting and Team Onboarding' + startOffset: 391 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=391 + endOffset: 522 +- name: 'Solr Autoscaling: Root Cause, CPU Load, and Scheduled Scaling Fixes' + startOffset: 522 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=522 + endOffset: 637 +- name: 'Decoupling Search from Monolith: Proposal, Implementation, and Experiment + Velocity' + startOffset: 637 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=637 + endOffset: 870 +- name: 'Tech Stack for Search: Kotlin Services, Python Satellites, and ML Integrations' + startOffset: 870 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=870 + endOffset: 913 +- name: 'First ML Project: Master’s Thesis on Neural Networks for Energy Forecasting' + startOffset: 913 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=913 + endOffset: 1138 +- name: '20% Time Success: Spellchecker Attempt, Word2Vec, and Recommendation System + Wins' + startOffset: 1138 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1138 + endOffset: 1247 +- name: 'Learning Approach: "Act Before You Think" and Building Practical ML Experience' + startOffset: 1247 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1247 + endOffset: 1318 +- name: 'Safety Nets for Experimentation: Feature Flags, Backups, Monitoring, Experimentation' + startOffset: 1318 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1318 + endOffset: 1511 +- name: 'Path to Management: Opportunity, Promotion, and Timing' + startOffset: 1511 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1511 + endOffset: 1588 +- name: 'Internal Hiring Process: Panel Interviews, Feedback, and Internal Candidate + Dynamics' + startOffset: 1588 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1588 + endOffset: 1825 +- name: 'People Management Skills: Conflict Resolution, Hiring, and Business Metrics' + startOffset: 1825 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1825 + endOffset: 2026 +- name: 'The Brag List: Documenting Leadership Evidence for Interviews' + startOffset: 2026 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2026 + endOffset: 2176 +- name: 'Transition Pain Points: Dopamine Loss, Dropping Hands-On Coding, and Withdrawal' + startOffset: 2176 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2176 + endOffset: 2448 +- name: 'Managing Momentum: Project Ownership, Milestones, and Team Coordination' + startOffset: 2448 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2448 + endOffset: 2613 +- name: 'Transition to Data Science Management: Case Study Interview and Motivation' + startOffset: 2613 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2613 + endOffset: 2678 +- name: 'Trust & Safety Work: Chat Moderation, NLP Challenges, and Fraud Detection' + startOffset: 2678 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2678 + endOffset: 2901 +- name: 'Managerial Domain Knowledge: When ML/NLP Expertise Helps vs Coordination + Role' + startOffset: 2901 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2901 + endOffset: 3044 +- name: 'Role Shift: Greater Product Involvement and Stakeholder Influence' + startOffset: 3044 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3044 + endOffset: 3172 +- name: Leveraging EM Experience to Lead Data Science Teams + startOffset: 3172 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3172 + endOffset: 3236 +- name: 'Transitioning from Data Engineering/Analytics to Data Science Manager: Advice' + startOffset: 3236 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3236 + endOffset: 3454 +- name: 'Measuring Managerial Impact: Influence, Business Value, and Team Health Metrics' + startOffset: 3454 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3454 + endOffset: 3516 +- name: 'Recommended Reading: The Manager''s Path and No Rules Rules (Team Culture)' + startOffset: 3516 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3516 + endOffset: 3620 +- name: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It" + startOffset: 3620 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3620 + endOffset: 3672 +- name: Podcast Closing and Final Remarks + startOffset: 3672 + url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3672 + endOffset: 3628 + transcript: - header: Podcast Introduction - header: 'Episode Overview: From Software Engineer to Data Science Manager' @@ -809,7 +941,7 @@ transcript: sec: 2174 time: '36:14' who: Alexey -- header: 'Transition Pain Points: Dopamine Loss, Dropping Hands‑On Coding, and Withdrawal' +- header: 'Transition Pain Points: Dopamine Loss, Dropping Hands-On Coding, and Withdrawal' - line: When you’re coding, right. You create a merge request – bam, that's dopamine. You get an approval – another shot of dopamine. You hit the merge button – dopamine. Deploy – dopamine. A/B test started – dopamine. There’s dopamine throughout the @@ -1368,7 +1500,7 @@ transcript: sec: 3584 time: '59:44' who: Sadat -- header: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It"' +- header: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It" - line: When is the next Search Meetup meeting? sec: 3620 time: '1:00:20' @@ -1396,144 +1528,6 @@ transcript: sec: 3693 time: '1:01:33' who: Sadat -description: 'Learn to transition into a Data Science Manager: master search engineering, - machine learning and leadership to hire, scale teams and measure business impact.' -intro: How do you move from hands-on software engineering into leading data science - teams while staying effective on search and machine learning projects? In this episode - Sadat Anwar — a people‑centric Data Science Manager and former software engineer - fluent in Java, Scala and Python — maps his path from an electronics and informatics - background to research in computer vision at Fraunhofer and production search work - at OLX.

We cover practical search engineering topics (Solr autoscaling, - decoupling search from a monolith, Kotlin services with Python ML satellites), early - ML projects and experimentation strategies (master’s thesis on neural nets, 20% - time wins, “act before you think”), and engineering safety nets like feature flags, - backups and monitoring. Sadat also walks through the promotion/hiring process, documenting - leadership evidence, people management challenges (conflict resolution, hiring, - motivation loss when stepping away from code), and transitioning into data science - management with NLP, trust & safety and fraud detection responsibilities.

- Listen for concrete advice on measuring managerial impact, leveraging EM experience - to lead data science teams, and tactical steps for engineers aiming to become data - science managers in search and ML domains. -dateadded: '2022-12-10' -duration: PT01H28S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=0 - endOffset: 65 -- name: 'Episode Overview: From Software Engineer to Data Science Manager' - startOffset: 65 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=65 - endOffset: 97 -- name: 'Early Career & Education: Aspiring Doctor, Electronics Bachelor, Informatics - Master' - startOffset: 97 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=97 - endOffset: 192 -- name: 'Fraunhofer Research Assistant: Computer Vision and Car Dent Detection' - startOffset: 192 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=192 - endOffset: 391 -- name: 'Search Engineering at OLX: First Day Firefighting and Team Onboarding' - startOffset: 391 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=391 - endOffset: 522 -- name: 'Solr Autoscaling: Root Cause, CPU Load, and Scheduled Scaling Fixes' - startOffset: 522 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=522 - endOffset: 637 -- name: 'Decoupling Search from Monolith: Proposal, Implementation, and Experiment - Velocity' - startOffset: 637 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=637 - endOffset: 870 -- name: 'Tech Stack for Search: Kotlin Services, Python Satellites, and ML Integrations' - startOffset: 870 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=870 - endOffset: 913 -- name: 'First ML Project: Master’s Thesis on Neural Networks for Energy Forecasting' - startOffset: 913 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=913 - endOffset: 1138 -- name: '20% Time Success: Spellchecker Attempt, Word2Vec, and Recommendation System - Wins' - startOffset: 1138 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1138 - endOffset: 1247 -- name: 'Learning Approach: "Act Before You Think" and Building Practical ML Experience' - startOffset: 1247 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1247 - endOffset: 1318 -- name: 'Safety Nets for Experimentation: Feature Flags, Backups, Monitoring, Experimentation' - startOffset: 1318 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1318 - endOffset: 1511 -- name: 'Path to Management: Opportunity, Promotion, and Timing' - startOffset: 1511 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1511 - endOffset: 1588 -- name: 'Internal Hiring Process: Panel Interviews, Feedback, and Internal Candidate - Dynamics' - startOffset: 1588 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1588 - endOffset: 1825 -- name: 'People Management Skills: Conflict Resolution, Hiring, and Business Metrics' - startOffset: 1825 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=1825 - endOffset: 2026 -- name: 'The Brag List: Documenting Leadership Evidence for Interviews' - startOffset: 2026 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2026 - endOffset: 2176 -- name: 'Transition Pain Points: Dopamine Loss, Dropping Hands‑On Coding, and Withdrawal' - startOffset: 2176 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2176 - endOffset: 2448 -- name: 'Managing Momentum: Project Ownership, Milestones, and Team Coordination' - startOffset: 2448 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2448 - endOffset: 2613 -- name: 'Transition to Data Science Management: Case Study Interview and Motivation' - startOffset: 2613 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2613 - endOffset: 2678 -- name: 'Trust & Safety Work: Chat Moderation, NLP Challenges, and Fraud Detection' - startOffset: 2678 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2678 - endOffset: 2901 -- name: 'Managerial Domain Knowledge: When ML/NLP Expertise Helps vs Coordination - Role' - startOffset: 2901 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=2901 - endOffset: 3044 -- name: 'Role Shift: Greater Product Involvement and Stakeholder Influence' - startOffset: 3044 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3044 - endOffset: 3172 -- name: Leveraging EM Experience to Lead Data Science Teams - startOffset: 3172 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3172 - endOffset: 3236 -- name: 'Transitioning from Data Engineering/Analytics to Data Science Manager: Advice' - startOffset: 3236 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3236 - endOffset: 3454 -- name: 'Measuring Managerial Impact: Influence, Business Value, and Team Health Metrics' - startOffset: 3454 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3454 - endOffset: 3516 -- name: 'Recommended Reading: The Manager''s Path and No Rules Rules (Team Culture)' - startOffset: 3516 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3516 - endOffset: 3620 -- name: 'Community Event: Search Meetup — "Bias in AI: How to Measure and Fix It"' - startOffset: 3620 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3620 - endOffset: 3672 -- name: Podcast Closing and Final Remarks - startOffset: 3672 - url: https://www.youtube.com/watch?v=xyTfqIWeKf8&t=3672 - endOffset: 3628 --- Links: diff --git a/_podcast/s16e06-unwritten-rules-for-success-in-machine-learning.md b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md similarity index 94% rename from _podcast/s16e06-unwritten-rules-for-success-in-machine-learning.md rename to _podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md index 4b785f47..ee534e9e 100644 --- a/_podcast/s16e06-unwritten-rules-for-success-in-machine-learning.md +++ b/_podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.md @@ -1,19 +1,127 @@ --- +title: "From Software Engineer to VP of Machine Learning: Stakeholder Buy-In, Rapid POCs and Full-Stack Skills" +short: "The Unwritten Rules for Success in Machine Learning" +season: 16 episode: 6 guests: - jackblandin +image: images/podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg ids: - anchor: atatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk + anchor: datatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk youtube: su2M058m3Lw -image: images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Unwritten-Rules-for-Success-in-Machine-Learning---Jack-Blandin-e2bojjk apple: https://podcasts.apple.com/us/podcast/the-unwritten-rules-for-success-in-machine-learning/id1541710331?i=1000635206953 spotify: https://open.spotify.com/episode/2c8E0hZ02osih7ljEB6I6f?si=lSPp07r4TgmpGQey0cUjsA youtube: https://www.youtube.com/watch?v=su2M058m3Lw -season: 16 -short: The Unwritten Rules for Success in Machine Learning -title: 'From Engineer to VP of ML: How to Lead, Sell, and Ship Actionable ML Products' +description: "Discover how to win stakeholder buy-in, build rapid POCs and scale machine learning with full-stack skills—accelerate to VP-level impact and leadership." +topics: +- machine learning +- MLOps +- tools +- career transition +- leadership +- software engineering +intro: "How do you move from a hands-on software engineer to a VP of Machine Learning while getting stakeholders to say “yes,” delivering rapid POCs, and building the full-stack skills teams need? In this episode Jack Blandin walks through that transition. Jack began as a Software Engineer in 2015, shifted into Data Science and Machine Learning in 2017, and has held ML and leadership roles at Fi, Wayfair, Trunk Club, and GoHealth—managing teams of 2 to 15. He’s now VP of Data Science & Machine Learning at Fi, finishing a PhD focused on ML, reinforcement learning, and algorithmic fairness, and launching a hiring marketplace for data and ML professionals.

We dig into practical strategies for stakeholder buy-in, how to scope and run rapid proofs of concept that prove value, and which full-stack skills accelerate career growth in ML and data science leadership. Listeners will come away with actionable approaches to design fast, business-focused POCs, communicate technical tradeoffs to non-technical stakeholders, and level up skill sets that bridge engineering and product — useful for anyone aiming to scale into ML management or improve ML engineering outcomes." +dateadded: 2023-11-20 +duration: PT00H53M23S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=0 + endOffset: 13 +- name: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' + startOffset: 13 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=13 + endOffset: 64 +- name: 'Career Pivot: Transition from full-stack engineering to data science' + startOffset: 64 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=64 + endOffset: 161 +- name: 'Early Leadership: Informal management and promotion at GoHealth' + startOffset: 161 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=161 + endOffset: 287 +- name: 'Rapid Advancement: Reflections on moving from IC to manager' + startOffset: 287 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=287 + endOffset: 413 +- name: 'Leadership Learning: Trial-and-error development of soft skills' + startOffset: 413 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=413 + endOffset: 541 +- name: 'Problem Framing: Technical context and product-level understanding' + startOffset: 541 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=541 + endOffset: 693 +- name: 'Reputation Management: Building respect, trust, and influence' + startOffset: 693 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=693 + endOffset: 925 +- name: 'Stakeholder Communication: Speaking marketing language (CAC, KPIs)' + startOffset: 925 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=925 + endOffset: 1042 +- name: 'ML Project Complexity: Resource needs and cross-functional buy-in' + startOffset: 1042 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1042 + endOffset: 1248 +- name: 'Selling ML: Fast POCs and demos to generate stakeholder support' + startOffset: 1248 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1248 + endOffset: 1398 +- name: 'Demo Design: Visualizations and user-centric proof-of-concepts' + startOffset: 1398 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1398 + endOffset: 1575 +- name: 'Risk Communication: Explaining model trade-offs without raw accuracy' + startOffset: 1575 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1575 + endOffset: 1697 +- name: 'Rapid Prototyping Tools: Gradio, Streamlit, and lightweight demos' + startOffset: 1697 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1697 + endOffset: 1726 +- name: 'Baseline First: Start with heuristics and manual processes before ML' + startOffset: 1726 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1726 + endOffset: 1863 +- name: 'Hypothesis Validation: Quick experiments to test product assumptions' + startOffset: 1863 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1863 + endOffset: 2049 +- name: 'Actionability Over Accuracy: Churn model lesson on usable insights' + startOffset: 2049 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2049 + endOffset: 2204 +- name: 'Outcome Focus: Avoiding technical tunnel vision on ML tuning' + startOffset: 2204 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2204 + endOffset: 2254 +- name: 'Data Generative Process: Treating data as a shadow of reality' + startOffset: 2254 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2254 + endOffset: 2437 +- name: 'Domain Immersion: Customer empathy through product usage' + startOffset: 2437 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2437 + endOffset: 2675 +- name: 'Full-Stack ML: Importance of software engineering for production ML' + startOffset: 2675 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2675 + endOffset: 2878 +- name: 'Content & Community: Daily LinkedIn posts and where to follow Jack' + startOffset: 2878 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2878 + endOffset: 3037 +- name: 'New Venture: Reimagining hiring and recruiting for ML/data roles' + startOffset: 3037 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3037 + endOffset: 3182 +- name: Episode Wrap-Up and Final Remarks + startOffset: 3182 + url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3182 + endOffset: 3203 transcript: - header: Podcast Introduction - header: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' @@ -36,7 +144,7 @@ transcript: sec: 56 time: 0:56 who: Alexey -- header: 'Career Pivot: Transition from full‑stack engineering to data science' +- header: 'Career Pivot: Transition from full-stack engineering to data science' - line: Before we go into our main topic of these unwritten rules, let's start with your background. Can you tell us about your career journeys so far? sec: 64 @@ -175,7 +283,7 @@ transcript: sec: 377 time: '6:17' who: Jack -- header: 'Leadership Learning: Trial‑and‑error development of soft skills' +- header: 'Leadership Learning: Trial-and-error development of soft skills' - line: There’s no school for VPs of data science, right? [Jack agrees] So you just have to… How do you actually learn these skills? sec: 413 @@ -227,7 +335,7 @@ transcript: sec: 509 time: '8:29' who: Alexey -- header: 'Problem Framing: Technical context and product‑level understanding' +- header: 'Problem Framing: Technical context and product-level understanding' - line: Yeah, I would say it's a mix of trial and error and it's also a mix of observing what works for others. Another thing that is not really taught very often in any kind of school environment is the importance of technical problem framing and @@ -373,7 +481,7 @@ transcript: sec: 1004 time: '16:44' who: Jack -- header: 'ML Project Complexity: Resource needs and cross‑functional buy‑in' +- header: 'ML Project Complexity: Resource needs and cross-functional buy-in' - line: Okay. We'll see how relevant it is – I hope it is relevant to the actual discussion we plan to have today, which is about the rules for success in machine learning. Probably it is related. I just want to summarize. If you want to be a technical @@ -477,7 +585,7 @@ transcript: sec: 1302 time: '21:42' who: Jack -- header: 'Demo Design: Visualizations and user‑centric proof‑of‑concepts' +- header: 'Demo Design: Visualizations and user-centric proof-of-concepts' - line: You said visuals are important to them – what do you mean by that? Is having a demo with a user interface where they can play around important, or did you mean something else? Or did you mean planting a picture in their head or something @@ -539,7 +647,7 @@ transcript: sec: 1526 time: '25:26' who: Jack -- header: 'Risk Communication: Explaining model trade‑offs without raw accuracy' +- header: 'Risk Communication: Explaining model trade-offs without raw accuracy' - line: If you start talking about accuracy – you say, “Okay, this model is 70% accurate,” which may or may not be a good number, depending on the model, but to the stakeholders, it might sound scary like, “Ooh, 30% error rate. 30% of the time, it will make @@ -948,7 +1056,7 @@ transcript: sec: 2672 time: '44:32' who: Jack -- header: 'Full‑Stack ML: Importance of software engineering for production ML' +- header: 'Full-Stack ML: Importance of software engineering for production ML' - line: Well, I guess we have time for one or two more rules. I guess you have a bunch of them, right? What's the third one? sec: 2675 @@ -1119,7 +1227,7 @@ transcript: sec: 3040 time: '50:40' who: Jack -- header: Episode Wrap‑Up and Final Remarks +- header: Episode Wrap-Up and Final Remarks - line: We will all subscribe – follow you on LinkedIn – and we will see all the updates about your new endeavor. I don't like saying good luck because you probably don't need luck – you need something like perseverance, more – but luck is also important. @@ -1134,124 +1242,18 @@ transcript: sec: 3216 time: '53:36' who: Jack -description: 'Discover how to lead and ship actionable ML products: master stakeholder - communication, rapid POCs, demo design, and full‑stack ML to deliver business impact.' -intro: 'How do you move from software engineer to VP of Machine Learning while learning - to lead, sell, and ship ML products that actually change outcomes? In this episode - Jack Blandin—now VP of Data Science & Machine Learning at Fi, who transitioned from - full‑stack engineering to data science and has managed teams of 2–15—walks through - that exact journey.

We cover Jack’s career pivot and early leadership lessons, - practical approaches to problem framing and reputation management, and how to speak - the language of stakeholders (CAC, KPIs) to win buy‑in for ML projects. You’ll hear - concrete tactics for selling ML: fast POCs and user‑centric demos (Gradio, Streamlit), - starting with baseline heuristics and manual processes, running quick hypothesis - validation experiments, and communicating model trade‑offs without obsessing over - raw accuracy. Jack also explains the importance of domain immersion, full‑stack - engineering for production ML, and prioritizing actionability over accuracy—illustrated - by a churn model lesson.

If you lead or ship ML products, this episode - delivers actionable guidance on machine learning leadership, rapid prototyping, - demo design, and stakeholder communication to move models from prototype to product.' -dateadded: '2023-11-20' -duration: PT00H53M23S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=0 - endOffset: 13 -- name: 'Guest Overview: Jack’s career arc from software engineer to VP of ML' - startOffset: 13 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=13 - endOffset: 64 -- name: 'Career Pivot: Transition from full‑stack engineering to data science' - startOffset: 64 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=64 - endOffset: 161 -- name: 'Early Leadership: Informal management and promotion at GoHealth' - startOffset: 161 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=161 - endOffset: 287 -- name: 'Rapid Advancement: Reflections on moving from IC to manager' - startOffset: 287 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=287 - endOffset: 413 -- name: 'Leadership Learning: Trial‑and‑error development of soft skills' - startOffset: 413 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=413 - endOffset: 541 -- name: 'Problem Framing: Technical context and product‑level understanding' - startOffset: 541 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=541 - endOffset: 693 -- name: 'Reputation Management: Building respect, trust, and influence' - startOffset: 693 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=693 - endOffset: 925 -- name: 'Stakeholder Communication: Speaking marketing language (CAC, KPIs)' - startOffset: 925 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=925 - endOffset: 1042 -- name: 'ML Project Complexity: Resource needs and cross‑functional buy‑in' - startOffset: 1042 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1042 - endOffset: 1248 -- name: 'Selling ML: Fast POCs and demos to generate stakeholder support' - startOffset: 1248 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1248 - endOffset: 1398 -- name: 'Demo Design: Visualizations and user‑centric proof‑of‑concepts' - startOffset: 1398 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1398 - endOffset: 1575 -- name: 'Risk Communication: Explaining model trade‑offs without raw accuracy' - startOffset: 1575 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1575 - endOffset: 1697 -- name: 'Rapid Prototyping Tools: Gradio, Streamlit, and lightweight demos' - startOffset: 1697 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1697 - endOffset: 1726 -- name: 'Baseline First: Start with heuristics and manual processes before ML' - startOffset: 1726 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1726 - endOffset: 1863 -- name: 'Hypothesis Validation: Quick experiments to test product assumptions' - startOffset: 1863 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=1863 - endOffset: 2049 -- name: 'Actionability Over Accuracy: Churn model lesson on usable insights' - startOffset: 2049 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2049 - endOffset: 2204 -- name: 'Outcome Focus: Avoiding technical tunnel vision on ML tuning' - startOffset: 2204 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2204 - endOffset: 2254 -- name: 'Data Generative Process: Treating data as a shadow of reality' - startOffset: 2254 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2254 - endOffset: 2437 -- name: 'Domain Immersion: Customer empathy through product usage' - startOffset: 2437 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2437 - endOffset: 2675 -- name: 'Full‑Stack ML: Importance of software engineering for production ML' - startOffset: 2675 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2675 - endOffset: 2878 -- name: 'Content & Community: Daily LinkedIn posts and where to follow Jack' - startOffset: 2878 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=2878 - endOffset: 3037 -- name: 'New Venture: Reimagining hiring and recruiting for ML/data roles' - startOffset: 3037 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3037 - endOffset: 3182 -- name: Episode Wrap‑Up and Final Remarks - startOffset: 3182 - url: https://www.youtube.com/watch?v=su2M058m3Lw&t=3182 - endOffset: 3203 ---- +context: 'Context — A career arc from software engineer to VP of ML frames concrete + stories about promotion, informal leadership, stakeholder selling, demo-driven buy-in, + rapid prototyping, baseline-first experiments, domain immersion, and building full-stack + production capabilities. + Core narrative — Success in applied machine learning is not primarily about squeezing + marginal accuracy from models but about bridging technical craft and business impact: + become a product-focused, full-stack practitioner and leader who rapidly validates + hypotheses with simple baselines and demos, speaks the language of stakeholders, + builds trust and reputation, communicates trade-offs clearly, and embeds ML into + real user workflows so technical work directly drives measurable outcomes.' +--- Links: * [Jack's LinkedIn profile](https://www.linkedin.com/in/jackblandin/){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md b/_podcast/from-startup-engineering-to-freelance-data-science.md similarity index 96% rename from _podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md rename to _podcast/from-startup-engineering-to-freelance-data-science.md index dd5ee653..49ef2f10 100644 --- a/_podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.md +++ b/_podcast/from-startup-engineering-to-freelance-data-science.md @@ -1,29 +1,151 @@ --- +title: "Freelance Data Scientist Playbook: MLOps, Model Monitoring, Upwork & Startup Skills" +short: "Lessons Learned from Freelancing and Working in a Start-up" +season: 14 episode: 5 guests: - antonisstellas -date: 2025-11-07 -topics: -- Freelance -- Startups -- Entrepreneurship -- Career Growth -- Remote Work -- Consulting -- Self-Employment +image: images/podcast/from-startup-engineering-to-freelance-data-science.jpg ids: anchor: ow/datatalksclub/episodes/Lessons-Learned-from-Freelancing-and-Working-in-a-Start-up---Antonis-Stellas-e25g94r youtube: -Gj7SaI-QW4 -image: images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Lessons-Learned-from-Freelancing-and-Working-in-a-Start-up---Antonis-Stellas-e25g94r apple: https://podcasts.apple.com/us/podcast/lessons-learned-from-freelancing-and-working-in-a/id1541710331?i=1000616311575 spotify: https://open.spotify.com/episode/4ehGduC0p734UtwPr5HANq?si=rEC_XP-4RSKYh0TtSQBtlw youtube: https://www.youtube.com/watch?v=-Gj7SaI-QW4 -season: 14 -short: Lessons Learned from Freelancing and Working in a Start-up -title: 'Freelance Data Scientist Playbook: MLOps, Model Monitoring, Upwork & Startup - Skills' + +description: "Discover MLOps, model monitoring & Upwork tips to build a freelance data scientist portfolio: pricing, onboarding, tools to land clients including MLflow" +intro: "How do you transition from startup engineering to a sustainable freelance data science practice while handling MLOps, model monitoring, and client work on Upwork? In this episode, Antonis Stellas — a freelance data scientist at Nanometrisis with a background in applied mathematics, physics and a professional doctorate working on industry consultancy — lays out a practical playbook.

Antonis walks through startup-honed skills (cross-functional roles, lean build-measure-learn, communication and business know-how), concrete MLOps tooling and patterns (MLflow, Prefect, Grafana), and model monitoring essentials like data drift, concept drift and using Evidently AI. He shares a hands-on course project (semiconductor prediction), streaming examples (YouTube metrics into BigQuery/Looker), Kafka/Confluent emphasis, and an open-source Evidently how-to from Hacktoberfest.

For freelancers, Antonis details starting on Upwork — profile building, proposal iteration, pricing strategy, onboarding workflows, invoicing and balancing startup commitments. Listen to get actionable guidance on building a portfolio, selecting projects, monitoring production models, and practical steps to find and retain clients as a freelance data scientist" +topics: +- freelance +- startups +- career growth +- remote work +- MLOps +dateadded: 2023-06-10 +date: 2025-11-07 + +duration: PT00H58M11S + +quotableClips: +- name: 'Podcast Introduction: guest Antonis and episode themes' + startOffset: 0 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=0 + endOffset: 148 +- name: 'Early Education: applied mathematics, physics and nanotechnology' + startOffset: 148 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=148 + endOffset: 230 +- name: 'Professional Doctorate: industry projects and consultancy in the Netherlands' + startOffset: 230 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=230 + endOffset: 335 +- name: 'Nanometrisis Focus: nanoscale inspection for chips, razors and cosmetics' + startOffset: 335 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=335 + endOffset: 499 +- name: 'Career Choice: choosing a startup over a corporation' + startOffset: 499 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=499 + endOffset: 716 +- name: 'Role Variety in Startups: cross-functional responsibilities' + startOffset: 716 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=716 + endOffset: 870 +- name: 'Small-Team Dynamics: working in a four-person startup' + startOffset: 870 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=870 + endOffset: 949 +- name: 'Skills Acquired: communication, business knowledge and self-organization' + startOffset: 949 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=949 + endOffset: 1059 +- name: 'Lean Methodology: build-measure-learn applied to products and ML' + startOffset: 1059 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1059 + endOffset: 1260 +- name: 'Model Monitoring: data drift, concept drift and Evidently AI' + startOffset: 1260 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1260 + endOffset: 1314 +- name: 'Community Onboarding: discovering and joining DataTalks.Club' + startOffset: 1314 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1314 + endOffset: 1512 +- name: 'MLOps Course Project: semiconductor prediction with MLflow, Prefect, Grafana' + startOffset: 1512 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1512 + endOffset: 1603 +- name: 'Course Recommendations: do exercises, be patient, complete final project' + startOffset: 1603 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1603 + endOffset: 1723 +- name: 'Open Source Contribution: creating an Evidently how-to during Hacktoberfest' + startOffset: 1723 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1723 + endOffset: 1833 +- name: 'Starting on Upwork: goals, platform mechanics and client discovery' + startOffset: 1833 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1833 + endOffset: 1917 +- name: 'Project Types on Upwork: ML, analytics, LLMs and variable durations' + startOffset: 1917 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1917 + endOffset: 2059 +- name: 'Profile Building: portfolios, attachments and iterative improvements' + startOffset: 2059 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2059 + endOffset: 2229 +- name: 'Learning from Rejection: refining proposals and specializing skills' + startOffset: 2229 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2229 + endOffset: 2355 +- name: 'Motivation for Freelancing: learning, extra income and persistence' + startOffset: 2355 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2355 + endOffset: 2439 +- name: 'Pricing Approach: hourly rates, client type and valuing your time' + startOffset: 2439 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2439 + endOffset: 2553 +- name: 'Onboarding Workflow: data inspection, milestones and client alignment' + startOffset: 2553 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2553 + endOffset: 2718 +- name: 'Financial Setup: registering as a freelancer and invoicing considerations' + startOffset: 2718 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2718 + endOffset: 2848 +- name: 'Balancing Commitments: wearing many hats across startup and freelance work' + startOffset: 2848 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2848 + endOffset: 2980 +- name: 'Client Acquisition Tips: focus, upskilling and leveraging community resources' + startOffset: 2980 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2980 + endOffset: 3102 +- name: 'Data Engineering Course: streaming emphasis and Kafka/Confluent usage' + startOffset: 3102 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3102 + endOffset: 3217 +- name: 'Example Project: streaming YouTube metrics into BigQuery and Looker' + startOffset: 3217 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3217 + endOffset: 3401 +- name: 'Portfolio Advice: choose projects you enjoy and prioritize exploration' + startOffset: 3401 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3401 + endOffset: 3491 +- name: 'Recommended Reading: The Lean Startup, Lean Analytics, Designing ML Systems' + startOffset: 3491 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3491 + endOffset: 3583 +- name: Closing Remarks and invitation to the community + startOffset: 3583 + url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3583 + endOffset: 3491 + transcript: - header: 'Podcast Introduction: guest Antonis and episode themes' - header: 'Podcast Introduction: guest Antonis and episode themes' @@ -1026,143 +1148,6 @@ transcript: sec: 3583 time: '59:43' who: Alexey -intro: How do you transition from startup engineering to a sustainable freelance data - science practice while handling MLOps, model monitoring, and client work on Upwork? - In this episode, Antonis Stellas — a freelance data scientist at Nanometrisis with - a background in applied mathematics, physics and a professional doctorate working - on industry consultancy — lays out a practical playbook.

Antonis walks - through startup-honed skills (cross-functional roles, lean build-measure-learn, - communication and business know-how), concrete MLOps tooling and patterns (MLflow, - Prefect, Grafana), and model monitoring essentials like data drift, concept drift - and using Evidently AI. He shares a hands-on course project (semiconductor prediction), - streaming examples (YouTube metrics into BigQuery/Looker), Kafka/Confluent emphasis, - and an open-source Evidently how-to from Hacktoberfest.

For freelancers, - Antonis details starting on Upwork — profile building, proposal iteration, pricing - strategy, onboarding workflows, invoicing and balancing startup commitments. Listen - to get actionable guidance on building a portfolio, selecting projects, monitoring - production models, and practical steps to find and retain clients as a freelance - data scientist. -description: 'Discover MLOps, model monitoring & Upwork tips to build a freelance - data scientist portfolio: pricing, onboarding, tools to land clients including MLflow' -dateadded: '2023-06-10' -duration: PT00H58M11S -quotableClips: -- name: 'Podcast Introduction: guest Antonis and episode themes' - startOffset: 0 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=0 - endOffset: 148 -- name: 'Early Education: applied mathematics, physics and nanotechnology' - startOffset: 148 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=148 - endOffset: 230 -- name: 'Professional Doctorate: industry projects and consultancy in the Netherlands' - startOffset: 230 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=230 - endOffset: 335 -- name: 'Nanometrisis Focus: nanoscale inspection for chips, razors and cosmetics' - startOffset: 335 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=335 - endOffset: 499 -- name: 'Career Choice: choosing a startup over a corporation' - startOffset: 499 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=499 - endOffset: 716 -- name: 'Role Variety in Startups: cross-functional responsibilities' - startOffset: 716 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=716 - endOffset: 870 -- name: 'Small-Team Dynamics: working in a four-person startup' - startOffset: 870 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=870 - endOffset: 949 -- name: 'Skills Acquired: communication, business knowledge and self-organization' - startOffset: 949 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=949 - endOffset: 1059 -- name: 'Lean Methodology: build-measure-learn applied to products and ML' - startOffset: 1059 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1059 - endOffset: 1260 -- name: 'Model Monitoring: data drift, concept drift and Evidently AI' - startOffset: 1260 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1260 - endOffset: 1314 -- name: 'Community Onboarding: discovering and joining DataTalks.Club' - startOffset: 1314 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1314 - endOffset: 1512 -- name: 'MLOps Course Project: semiconductor prediction with MLflow, Prefect, Grafana' - startOffset: 1512 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1512 - endOffset: 1603 -- name: 'Course Recommendations: do exercises, be patient, complete final project' - startOffset: 1603 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1603 - endOffset: 1723 -- name: 'Open Source Contribution: creating an Evidently how-to during Hacktoberfest' - startOffset: 1723 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1723 - endOffset: 1833 -- name: 'Starting on Upwork: goals, platform mechanics and client discovery' - startOffset: 1833 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1833 - endOffset: 1917 -- name: 'Project Types on Upwork: ML, analytics, LLMs and variable durations' - startOffset: 1917 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=1917 - endOffset: 2059 -- name: 'Profile Building: portfolios, attachments and iterative improvements' - startOffset: 2059 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2059 - endOffset: 2229 -- name: 'Learning from Rejection: refining proposals and specializing skills' - startOffset: 2229 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2229 - endOffset: 2355 -- name: 'Motivation for Freelancing: learning, extra income and persistence' - startOffset: 2355 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2355 - endOffset: 2439 -- name: 'Pricing Approach: hourly rates, client type and valuing your time' - startOffset: 2439 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2439 - endOffset: 2553 -- name: 'Onboarding Workflow: data inspection, milestones and client alignment' - startOffset: 2553 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2553 - endOffset: 2718 -- name: 'Financial Setup: registering as a freelancer and invoicing considerations' - startOffset: 2718 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2718 - endOffset: 2848 -- name: 'Balancing Commitments: wearing many hats across startup and freelance work' - startOffset: 2848 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2848 - endOffset: 2980 -- name: 'Client Acquisition Tips: focus, upskilling and leveraging community resources' - startOffset: 2980 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=2980 - endOffset: 3102 -- name: 'Data Engineering Course: streaming emphasis and Kafka/Confluent usage' - startOffset: 3102 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3102 - endOffset: 3217 -- name: 'Example Project: streaming YouTube metrics into BigQuery and Looker' - startOffset: 3217 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3217 - endOffset: 3401 -- name: 'Portfolio Advice: choose projects you enjoy and prioritize exploration' - startOffset: 3401 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3401 - endOffset: 3491 -- name: 'Recommended Reading: The Lean Startup, Lean Analytics, Designing ML Systems' - startOffset: 3491 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3491 - endOffset: 3583 -- name: Closing Remarks and invitation to the community - startOffset: 3583 - url: https://www.youtube.com/watch?v=-Gj7SaI-QW4&t=3583 - endOffset: 3491 --- Links: diff --git a/_podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.md b/_podcast/generative-ai-chatbots-in-production-security.md similarity index 93% rename from _podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.md rename to _podcast/generative-ai-chatbots-in-production-security.md index 3aed7042..d385a196 100644 --- a/_podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.md +++ b/_podcast/generative-ai-chatbots-in-production-security.md @@ -1,38 +1,121 @@ --- +title: "Hardening Generative AI Chatbots: Prevent Prompt Injection, Data Exfiltration & Hallucinations" +short: "AI in Industry: Trust, Return on Investment and Future" +season: 19 episode: 6 guests: - mariasukhareva -description: Discover generative AI, chatbot safety, and prompting strategies to prevent - hallucinations and data exfiltration, boost translation quality and ROI. -intro: How do we balance the rapid democratization of generative AI with real-world - chatbot safety, trust, and operational value? In this episode, we speak with a linguist-turned-computational-linguist - who now serves as a principal key expert in AI advising on technology and risk. - We trace their career path into industry and then dig into the practical security - and reliability challenges of large-scale chatbots.

Topics include the - rise of prompt engineering and new “AI experts,” a large-scale chatbot hacking exercise - and its findings on hallucinations, legal exposure, and financial incidents, and - data exfiltration methods like overloaded prompts and knowledge-base retrieval. - The conversation moves to concrete mitigations—output validation, query analysis, - layered defenses, and non-LLM classifiers—plus usability and ROI issues that slow - adoption. We also cover human-in-the-loop review, AI-assisted translation workflows, - prompt customization for controlled machine translation, and broader multilingual - and historical-linguistics challenges (from orthography to low-resource languages). -

Listen to learn actionable strategies for chatbot safety, practical prompt - and translation techniques, and how to evaluate trade-offs between research innovation - and operational risk. +image: images/podcast/generative-ai-chatbots-in-production-security.jpg ids: - anchor: atalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 + anchor: datatalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 youtube: bT7-HRNCltk -image: images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/AI-in-Industry-Trust--Return-on-Investment-and-Future---Maria-Sukhareva-e2rp9f8 apple: https://podcasts.apple.com/us/podcast/ai-in-industry-trust-return-on-investment-and-future/id1541710331?i=1000679505962 spotify: https://open.spotify.com/episode/5GOBabz65IRmiMow8FYbr5?si=a99463e34ffb48f1 youtube: https://www.youtube.com/watch?v=bT7-HRNCltk -season: 19 -short: 'AI in Industry: Trust, Return on Investment and Future' -title: 'Hardening Generative AI Chatbots: Prevent Prompt Injection, Data Exfiltration - & Hallucinations' +description: "Learn to harden generative AI chatbots against prompt injection and data exfiltration—defenses, detection, and techniques to reduce hallucinations." +topics: +- AI +- LLMs +- NLP +- MLOps +- production +- AI red teaming +- security +intro: "How do you harden generative AI chatbots against prompt injection, data exfiltration, and dangerous hallucinations? In this episode Maria Sukhareva — a principal key expert in AI at Siemens with 15+ years working at the intersection of linguistics and computational AI — walks through real-world risks, attack findings, and practical defenses for chatbot security.

We trace Maria’s path from linguist to industry expert and her role advising on technology and risk, then dive into a large-scale chatbot hacking exercise and the common failures that lead to legal exposure and financial incidents. Key topics include prompt injection and knowledge-base exfiltration techniques, hallucination causes and their impact on trust, and mitigations such as output validation, query analysis, layered defenses, and the use of non-LLM classifiers. We also cover human-in-the-loop workflows, AI-as-assistant moderation tools, and prompt customization for controlled machine translation.

If you’re building or deploying generative AI systems, this episode offers practical, production-focused guidance on chatbot security, AI safety, and improving accuracy and trust in deployed models." +dateadded: 2024-12-17 +duration: PT00H59M53S +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=0 + endOffset: 133 +- name: 'Career Path: From Linguist to Computational Linguistics and Industry' + startOffset: 133 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=133 + endOffset: 251 +- name: 'Role Definition: Principal Key Expert in AI — Advising on Technology and + Risk' + startOffset: 251 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=251 + endOffset: 342 +- name: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts" + startOffset: 342 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=342 + endOffset: 568 +- name: 'Bot Safety Challenge: Large-Scale Chatbot Hacking Exercise and Findings' + startOffset: 568 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=568 + endOffset: 698 +- name: 'Chatbot Failures: Hallucinations, Legal Exposure, and Financial Incidents' + startOffset: 698 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=698 + endOffset: 800 +- name: 'Data Exfiltration Techniques: Overloading Prompts and Knowledge-Base Retrieval' + startOffset: 800 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=800 + endOffset: 975 +- name: 'Mitigations: Output Validation, Query Analysis, and Layered Defenses' + startOffset: 975 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=975 + endOffset: 1020 +- name: 'Non-LLM Classifiers: Robust Alternatives to Manipulable Generative Models' + startOffset: 1020 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1020 + endOffset: 1081 +- name: 'Trust and Hallucinations: User Confidence, Safety, and Adoption Risks' + startOffset: 1081 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1081 + endOffset: 1239 +- name: 'Chatbot Adoption Issues: Usability, Verbosity, and Return on Investment' + startOffset: 1239 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1239 + endOffset: 1534 +- name: 'Human-in-the-Loop Solutions: Hybrid Review to Improve Accuracy' + startOffset: 1534 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1534 + endOffset: 1633 +- name: 'AI as Assistant: Moderation Tools, Autopilot Analogy, and Workforce Impact' + startOffset: 1633 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1633 + endOffset: 1793 +- name: 'Translation Workflows: AI-Augmented Translators and Quality Control' + startOffset: 1793 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1793 + endOffset: 1948 +- name: 'Prompt Customization: Controlled Machine Translation with ChatGPT' + startOffset: 1948 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1948 + endOffset: 2144 +- name: 'Historical Linguistics: Middle & Old English Pronunciation Insights' + startOffset: 2144 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2144 + endOffset: 2708 +- name: 'Ancient Languages: Cuneiform, Sumerian Transcription, and MT Approaches' + startOffset: 2708 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2708 + endOffset: 2906 +- name: 'Script Complexity: Logograms vs. Phonetics in Ancient Texts' + startOffset: 2906 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2906 + endOffset: 3181 +- name: 'Multilingual Models: Progress and Challenges for Low-Resource Languages' + startOffset: 3181 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3181 + endOffset: 3412 +- name: 'Orthography & Data Quality: Inconsistent Spelling in Historical Corpora' + startOffset: 3412 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3412 + endOffset: 3448 +- name: 'Industry Trade-offs: Research Innovation vs. ROI and Operational Needs' + startOffset: 3448 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3448 + endOffset: 3554 +- name: 'Episode Wrap-Up: Key Takeaways on AI Trust, Safety, and Future Directions' + startOffset: 3554 + url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3554 + endOffset: 3593 transcript: - header: Episode Introduction & Guest Overview - line: This week, we’re discussing the practical application of generative AI in @@ -106,7 +189,7 @@ transcript: sec: 328 time: '5:28' who: Alexey -- header: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts"' +- header: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts" - line: Exactly. AI has become very accessible. Previously, being an AI expert required coding skills. Now, simply knowing how to use a keyboard and craft prompts can position someone as a "prompt engineer" or even an AI expert. @@ -811,96 +894,15 @@ transcript: sec: 3593 time: '59:53' who: Alexey -dateadded: '2024-12-17' -duration: PT00H59M53S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=0 - endOffset: 133 -- name: 'Career Path: From Linguist to Computational Linguistics and Industry' - startOffset: 133 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=133 - endOffset: 251 -- name: 'Role Definition: Principal Key Expert in AI — Advising on Technology and - Risk' - startOffset: 251 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=251 - endOffset: 342 -- name: 'Democratization of Generative AI: Rise of Prompting and New "AI Experts"' - startOffset: 342 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=342 - endOffset: 568 -- name: 'Bot Safety Challenge: Large-Scale Chatbot Hacking Exercise and Findings' - startOffset: 568 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=568 - endOffset: 698 -- name: 'Chatbot Failures: Hallucinations, Legal Exposure, and Financial Incidents' - startOffset: 698 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=698 - endOffset: 800 -- name: 'Data Exfiltration Techniques: Overloading Prompts and Knowledge-Base Retrieval' - startOffset: 800 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=800 - endOffset: 975 -- name: 'Mitigations: Output Validation, Query Analysis, and Layered Defenses' - startOffset: 975 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=975 - endOffset: 1020 -- name: 'Non-LLM Classifiers: Robust Alternatives to Manipulable Generative Models' - startOffset: 1020 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1020 - endOffset: 1081 -- name: 'Trust and Hallucinations: User Confidence, Safety, and Adoption Risks' - startOffset: 1081 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1081 - endOffset: 1239 -- name: 'Chatbot Adoption Issues: Usability, Verbosity, and Return on Investment' - startOffset: 1239 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1239 - endOffset: 1534 -- name: 'Human-in-the-Loop Solutions: Hybrid Review to Improve Accuracy' - startOffset: 1534 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1534 - endOffset: 1633 -- name: 'AI as Assistant: Moderation Tools, Autopilot Analogy, and Workforce Impact' - startOffset: 1633 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1633 - endOffset: 1793 -- name: 'Translation Workflows: AI-Augmented Translators and Quality Control' - startOffset: 1793 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1793 - endOffset: 1948 -- name: 'Prompt Customization: Controlled Machine Translation with ChatGPT' - startOffset: 1948 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=1948 - endOffset: 2144 -- name: 'Historical Linguistics: Middle & Old English Pronunciation Insights' - startOffset: 2144 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2144 - endOffset: 2708 -- name: 'Ancient Languages: Cuneiform, Sumerian Transcription, and MT Approaches' - startOffset: 2708 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2708 - endOffset: 2906 -- name: 'Script Complexity: Logograms vs. Phonetics in Ancient Texts' - startOffset: 2906 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=2906 - endOffset: 3181 -- name: 'Multilingual Models: Progress and Challenges for Low-Resource Languages' - startOffset: 3181 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3181 - endOffset: 3412 -- name: 'Orthography & Data Quality: Inconsistent Spelling in Historical Corpora' - startOffset: 3412 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3412 - endOffset: 3448 -- name: 'Industry Trade-offs: Research Innovation vs. ROI and Operational Needs' - startOffset: 3448 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3448 - endOffset: 3554 -- name: 'Episode Wrap-Up: Key Takeaways on AI Trust, Safety, and Future Directions' - startOffset: 3554 - url: https://www.youtube.com/watch?v=bT7-HRNCltk&t=3554 - endOffset: 3593 +context: 'Context: Across a career-spanning conversation about linguistics, industry + AI roles, chatbot hacks, safety failures, mitigations, human-in-the-loop workflows, + translation and ancient-language challenges, and industry trade-offs, the episode + maps how generative AI is rapidly democratized yet brittle, risky, and dependent + on data and linguistic nuance. + + Core: The central imperative is that realizing the real-world promise of generative + AI requires marrying deep linguistic and domain expertise with layered technical + defenses, human oversight, and pragmatic product trade-offs—so systems can be safe, + trustworthy, and useful despite hallucinations, manipulation, data-quality limits, + and operational constraints.' --- diff --git a/_podcast/s08e09-from-academia-to-data-analytics-and-engineering.md b/_podcast/get-data-analytics-and-data-engineering-job.md similarity index 97% rename from _podcast/s08e09-from-academia-to-data-analytics-and-engineering.md rename to _podcast/get-data-analytics-and-data-engineering-job.md index bc01a49f..57ed4edf 100644 --- a/_podcast/s08e09-from-academia-to-data-analytics-and-engineering.md +++ b/_podcast/get-data-analytics-and-data-engineering-job.md @@ -1,40 +1,120 @@ --- +title: "How I Landed a Data Engineering Job: Bootcamp, Docker, Airflow, AWS & Interview Tips" +short: "From Academia to Data Analytics and Engineering" +season: 8 episode: 9 guests: - gloriaquiceno -intro: How do you go from neuroscience research to a data engineering role — and what - practical steps and skills actually get you hired? In this episode, Gloria Quiceno, - Senior Analytics Engineer at ICE, walks through her transition from neuroscience - labs to rebuilding enterprise data platforms (including a BI rebuild that saved - €250K), and the concrete tools and tactics that landed her a data engineering job. - We cover her early lab automation and scripting experience, learning MATLAB/R and - Python, a first industry role as a business data analyst, and a four-month job-search - timeline from bootcamp graduation to offer. Gloria explains building reproducible - pipelines with Docker, orchestrating workflows with Airflow and AWS (including Step - Functions), designing ETL/ELT and Snowflake integrations, capstone projects like - a Twitter data pipeline, and volunteer ML practice with Omdena. She also shares - application strategy (tracking ~130 applications), handling live coding and take-home - interviews, GDPR recruitment rights, salary negotiation, and portfolio advice to - stand out. Listen to get actionable interview tips, portfolio project ideas, and - hands-on guidance for landing a data engineering job using bootcamps, Docker, Airflow, - and AWS. +image: images/podcast/get-data-analytics-and-data-engineering-job.jpg ids: anchor: From-Academia-to-Data-Analytics-and-Engineering---Gloria-Quiceno-e1ikrd8 youtube: 0wANfIvum4U -image: images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Academia-to-Data-Analytics-and-Engineering---Gloria-Quiceno-e1ikrd8 apple: https://podcasts.apple.com/us/podcast/from-academia-to-data-analytics-and-engineering/id1541710331?i=1000562898040 spotify: https://open.spotify.com/episode/1kDpXugcmDdVJ6qUAiNnHQ?si=aa62cc4dce5f41b2 youtube: https://www.youtube.com/watch?v=0wANfIvum4U -season: 8 -short: From Academia to Data Analytics and Engineering -title: 'How I Landed a Data Engineering Job: Bootcamp, Docker, Airflow, AWS & Interview - Tips' + +description: "Learn data engineering with Docker and Airflow—bootcamp ROI, interview tips, portfolio tips and salary tactics to land a data engineering role" +intro: "How do you go from neuroscience research to a data engineering role — and what practical steps and skills actually get you hired? In this episode, Gloria Quiceno, Senior Analytics Engineer at ICE, walks through her transition from neuroscience labs to rebuilding enterprise data platforms (including a BI rebuild that saved €250K), and the concrete tools and tactics that landed her a data engineering job. We cover her early lab automation and scripting experience, learning MATLAB/R and Python, a first industry role as a business data analyst, and a four-month job-search timeline from bootcamp graduation to offer. Gloria explains building reproducible pipelines with Docker, orchestrating workflows with Airflow and AWS (including Step Functions), designing ETL/ELT and Snowflake integrations, capstone projects like a Twitter data pipeline, and volunteer ML practice with Omdena. She also shares application strategy (tracking ~130 applications), handling live coding and take-home interviews, GDPR recruitment rights, salary negotiation, and portfolio advice to stand out. Listen to get actionable interview tips, portfolio project ideas, and hands-on guidance for landing a data engineering job using bootcamps, Docker, Airflow, and AWS" topics: - career switch - data engineering - career growth +dateadded: 2022-05-21 + +duration: PT00H57M59S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=0 + endOffset: 118 +- name: 'Background: Transition from Neuroscience Research to Industry' + startOffset: 118 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=118 + endOffset: 161 +- name: 'Lab Automation & Scripting: Igor, C-style Code and Data Collection' + startOffset: 161 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=161 + endOffset: 393 +- name: 'Learning Curve: MATLAB, R and Falling in Love with Programming' + startOffset: 393 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=393 + endOffset: 466 +- name: 'First Industry Role: Business Data Analyst Duties and SQL Reporting' + startOffset: 466 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=466 + endOffset: 713 +- name: 'Company Overview: Music Metadata, Royalties and Data Workflows' + startOffset: 713 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=713 + endOffset: 974 +- name: 'Job Search Timeline: Bootcamp Graduation to Offer in Four Months' + startOffset: 974 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=974 + endOffset: 1101 +- name: 'Volunteer Experience: Omdena Project for Practical ML Practice' + startOffset: 1101 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1101 + endOffset: 1192 +- name: 'Role Focus: Choosing Data Engineering Tasks and Automation Work' + startOffset: 1192 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1192 + endOffset: 1285 +- name: 'Reproducibility: Docker for Collaborative Scripts and AWS Runs' + startOffset: 1285 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1285 + endOffset: 1377 +- name: 'Application Strategy: Tracking ~130 Applications and Organization' + startOffset: 1377 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1377 + endOffset: 1675 +- name: 'Interview Hurdles: Live Coding Pressure and Take-Home Challenges' + startOffset: 1675 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1675 + endOffset: 1819 +- name: 'Applicant Rights: GDPR and Deleting Recruitment Records' + startOffset: 1819 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1819 + endOffset: 1889 +- name: 'Salary Negotiation: Market Research, Confidence and Tactics' + startOffset: 1889 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1889 + endOffset: 2180 +- name: 'Bootcamp ROI: Learning Python, Docker, Airflow and Networking' + startOffset: 2180 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2180 + endOffset: 2245 +- name: 'Retrospective Tips: Earlier Career Coaching and More Networking' + startOffset: 2245 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2245 + endOffset: 2729 +- name: 'Cloud Platforms: AWS vs GCP Experience and Managing Credits' + startOffset: 2729 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2729 + endOffset: 3015 +- name: 'Capstone Project: Twitter Data Pipeline, Docker Containers and Slack Bot' + startOffset: 3015 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3015 + endOffset: 3102 +- name: 'Portfolio Strategy: Custom Projects to Stand Out to Employers' + startOffset: 3102 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3102 + endOffset: 3214 +- name: 'Data Quality: Detecting Bots, Cleaning Twitter Data and Sentiment Bias' + startOffset: 3214 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3214 + endOffset: 3408 +- name: 'Cohort Diversity: Varied Backgrounds and Cross-disciplinary Strengths' + startOffset: 3408 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3408 + endOffset: 3517 +- name: 'Closing Thoughts: Practical Advice for Transitioning to Data Engineering' + startOffset: 3517 + url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3517 + endOffset: 3479 + transcript: - header: Podcast Introduction - header: 'Background: Transition from Neuroscience Research to Industry' @@ -1289,97 +1369,4 @@ transcript: sec: 3597 time: '59:57' who: Alexey -description: Learn data engineering with Docker and Airflow—bootcamp ROI, interview - tips, portfolio tips and salary tactics to land a data engineering role. -dateadded: '2022-05-21' -duration: PT00H57M59S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=0 - endOffset: 118 -- name: 'Background: Transition from Neuroscience Research to Industry' - startOffset: 118 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=118 - endOffset: 161 -- name: 'Lab Automation & Scripting: Igor, C-style Code and Data Collection' - startOffset: 161 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=161 - endOffset: 393 -- name: 'Learning Curve: MATLAB, R and Falling in Love with Programming' - startOffset: 393 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=393 - endOffset: 466 -- name: 'First Industry Role: Business Data Analyst Duties and SQL Reporting' - startOffset: 466 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=466 - endOffset: 713 -- name: 'Company Overview: Music Metadata, Royalties and Data Workflows' - startOffset: 713 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=713 - endOffset: 974 -- name: 'Job Search Timeline: Bootcamp Graduation to Offer in Four Months' - startOffset: 974 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=974 - endOffset: 1101 -- name: 'Volunteer Experience: Omdena Project for Practical ML Practice' - startOffset: 1101 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1101 - endOffset: 1192 -- name: 'Role Focus: Choosing Data Engineering Tasks and Automation Work' - startOffset: 1192 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1192 - endOffset: 1285 -- name: 'Reproducibility: Docker for Collaborative Scripts and AWS Runs' - startOffset: 1285 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1285 - endOffset: 1377 -- name: 'Application Strategy: Tracking ~130 Applications and Organization' - startOffset: 1377 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1377 - endOffset: 1675 -- name: 'Interview Hurdles: Live Coding Pressure and Take-Home Challenges' - startOffset: 1675 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1675 - endOffset: 1819 -- name: 'Applicant Rights: GDPR and Deleting Recruitment Records' - startOffset: 1819 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1819 - endOffset: 1889 -- name: 'Salary Negotiation: Market Research, Confidence and Tactics' - startOffset: 1889 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=1889 - endOffset: 2180 -- name: 'Bootcamp ROI: Learning Python, Docker, Airflow and Networking' - startOffset: 2180 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2180 - endOffset: 2245 -- name: 'Retrospective Tips: Earlier Career Coaching and More Networking' - startOffset: 2245 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2245 - endOffset: 2729 -- name: 'Cloud Platforms: AWS vs GCP Experience and Managing Credits' - startOffset: 2729 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=2729 - endOffset: 3015 -- name: 'Capstone Project: Twitter Data Pipeline, Docker Containers and Slack Bot' - startOffset: 3015 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3015 - endOffset: 3102 -- name: 'Portfolio Strategy: Custom Projects to Stand Out to Employers' - startOffset: 3102 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3102 - endOffset: 3214 -- name: 'Data Quality: Detecting Bots, Cleaning Twitter Data and Sentiment Bias' - startOffset: 3214 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3214 - endOffset: 3408 -- name: 'Cohort Diversity: Varied Backgrounds and Cross-disciplinary Strengths' - startOffset: 3408 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3408 - endOffset: 3517 -- name: 'Closing Thoughts: Practical Advice for Transitioning to Data Engineering' - startOffset: 3517 - url: https://www.youtube.com/watch?v=0wANfIvum4U&t=3517 - endOffset: 3479 --- diff --git a/_podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md b/_podcast/get-data-engineering-job-prep-and-interview.md similarity index 96% rename from _podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md rename to _podcast/get-data-engineering-job-prep-and-interview.md index f40962c6..0acd043f 100644 --- a/_podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).md +++ b/_podcast/get-data-engineering-job-prep-and-interview.md @@ -1,20 +1,141 @@ --- +title: "Data Engineering Job Prep & Interview Guide: Python, SQL, Portfolio & Job Search Tips" +short: "Getting a Data Engineering Job (Summary and Q&A)" +season: 9 episode: 3 guests: - jeffkatz -date: 2025-11-07 +image: images/podcast/get-data-engineering-job-prep-and-interview.jpg ids: anchor: Getting-a-Data-Engineering-Job-Summary-and-QA---Jeff-Katz-e1jljmd youtube: asnt7xlyZXQ -image: images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Getting-a-Data-Engineering-Job-Summary-and-QA---Jeff-Katz-e1jljmd apple: https://podcasts.apple.com/us/podcast/getting-a-data-engineering-job-summary-and-q-a-jeff-katz/id1541710331?i=1000566005592 spotify: https://open.spotify.com/episode/1SaZ0QXAIhcdH1gfaNoN4Z?si=OvvNFdTpSu2MCCDOWdYgJQ youtube: https://www.youtube.com/watch?v=asnt7xlyZXQ -season: 9 -short: Getting a Data Engineering Job (Summary and Q&A) -title: Getting a Data Engineering Job (Summary and Q&A) + +description: "Master data engineering job prep: Python, SQL tips, portfolio strategy, interview formats and job search tactics to land offers faster. quick wins" +intro: "How do you actually get a data engineering job today — and which skills hiring teams care about most? In this episode, Jeff Katz, a Machine Learning Engineer at AppFolio and longtime instructor/founder of Jigsaw Labs and Flatiron School curriculum lead, distills a webinar on hiring demand into practical advice for job seekers. Drawing on applied AI and data engineering experience plus open-source contributions, Jeff walks through the core data engineering skills employers expect: deep Python and SQL, Docker, Airflow, and data warehouse fundamentals.

You’ll hear concrete guidance on portfolio strategy (personal projects and open source), code quality and OOP patterns, the application funnel (LinkedIn → resume → interviews), behavioral and technical interview formats (SQL LeetCode, Python problems, take-home projects), and essential database concepts (views, OLTP vs OLAP). The episode also covers learning resources, transitioning from BI, certification vs skills trade-offs, remote work realities, and how to leverage non-coding experience. Listen to learn a practical roadmap for interviews, portfolio building, and job search tactics to increase your chances of landing a data engineering role." +topics: +- data engineering +- job search +- tools +dateadded: 2022-06-10 +date: 2025-11-07 + +duration: PT00H48M02S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=0 + endOffset: 36 +- name: 'Webinar Recap: Hiring Demand and Skill Gaps' + startOffset: 36 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=36 + endOffset: 80 +- name: 'Core Skills & Tools: Python, SQL, Docker, Airflow, Data Warehouses' + startOffset: 80 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=80 + endOffset: 109 +- name: 'Python & SQL Depth: Project Volume and Emphasis' + startOffset: 109 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=109 + endOffset: 142 +- name: 'Code Quality & OOP: Small Functions, Classes, Tests' + startOffset: 142 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=142 + endOffset: 166 +- name: 'Portfolio Strategy: Personal Projects and Open Source Contributions' + startOffset: 166 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=166 + endOffset: 218 +- name: 'Application Funnel: LinkedIn, Resume, and Interview Stages' + startOffset: 218 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=218 + endOffset: 315 +- name: 'Behavioral Interview Best Practices: Positivity, Structure, Motivation' + startOffset: 315 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=315 + endOffset: 466 +- name: 'Technical Interview Formats: SQL LeetCode, Python Problems, Take-Home Projects' + startOffset: 466 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=466 + endOffset: 581 +- name: 'Core Database Concepts: Views, Materialized Views, OLTP vs OLAP' + startOffset: 581 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=581 + endOffset: 684 +- name: 'Learning Resources: Python Books, Flask Mega-Tutorial, SQL Platforms' + startOffset: 684 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=684 + endOffset: 851 +- name: 'BI to Data Engineering Transition: Upskilling Within Your Role' + startOffset: 851 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=851 + endOffset: 953 +- name: 'Job Search Strategy: Apply Broadly and Avoid Self-Filtering' + startOffset: 953 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=953 + endOffset: 1008 +- name: Leveraging Non-Coding Experience and Domain Expertise + startOffset: 1008 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1008 + endOffset: 1197 +- name: 'Role Differentiation: Data Analyst vs Data Engineer' + startOffset: 1197 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1197 + endOffset: 1316 +- name: 'Certifications vs Skills: When Certificates Help and When They Don’t' + startOffset: 1316 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1316 + endOffset: 1393 +- name: 'Master’s Degree Trade-offs: Research Depth vs Applied Learning' + startOffset: 1393 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1393 + endOffset: 1666 +- name: 'Remote Work Reality: Timezones, Legal Constraints, and Standout Candidates' + startOffset: 1666 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1666 + endOffset: 1806 +- name: 'Teaching & Coaching on Resume: Communication and Mentorship Value' + startOffset: 1806 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1806 + endOffset: 1942 +- name: 'OOP Relevance: Patterns for Airflow and Maintainable Code' + startOffset: 1942 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1942 + endOffset: 1983 +- name: 'Language Choices: Python Focus; Java/Scala and Spark Considerations' + startOffset: 1983 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1983 + endOffset: 2109 +- name: 'Interview Load: Typical Number and Style of Technical Questions' + startOffset: 2109 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2109 + endOffset: 2269 +- name: 'Cloud Certification Prep: Learning Fundamentals vs Credential Hunting' + startOffset: 2269 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2269 + endOffset: 2389 +- name: 'Commercial Experience Alternatives: Nonprofits, Contract Work, Internships' + startOffset: 2389 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2389 + endOffset: 2611 +- name: 'Mid-Career Switch: Sales Skills as an Asset in Tech Hiring' + startOffset: 2611 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2611 + endOffset: 2776 +- name: 'Solution Engineer Pathway: Pre-/Post-Sales Roles as Transition Options' + startOffset: 2776 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2776 + endOffset: 2846 +- name: Episode Wrap-Up and Further Resources + startOffset: 2846 + url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2846 + endOffset: 2882 + transcript: - header: Podcast Introduction - line: We had an amazing webinar about getting a data engineering job. It was a couple @@ -763,134 +884,6 @@ transcript: sec: 2882 time: '48:02' who: Jeff -intro: 'How do you actually get a data engineering job today — and which skills hiring - teams care about most? In this episode, Jeff Katz, a Machine Learning Engineer at - AppFolio and longtime instructor/founder of Jigsaw Labs and Flatiron School curriculum - lead, distills a webinar on hiring demand into practical advice for job seekers. - Drawing on applied AI and data engineering experience plus open-source contributions, - Jeff walks through the core data engineering skills employers expect: deep Python - and SQL, Docker, Airflow, and data warehouse fundamentals.

You’ll hear - concrete guidance on portfolio strategy (personal projects and open source), code - quality and OOP patterns, the application funnel (LinkedIn → resume → interviews), - behavioral and technical interview formats (SQL LeetCode, Python problems, take-home - projects), and essential database concepts (views, OLTP vs OLAP). The episode also - covers learning resources, transitioning from BI, certification vs skills trade-offs, - remote work realities, and how to leverage non-coding experience. Listen to learn - a practical roadmap for interviews, portfolio building, and job search tactics to - increase your chances of landing a data engineering role.' -description: 'Master data engineering job prep: Python, SQL tips, portfolio strategy, - interview formats and job search tactics to land offers faster. quick wins' -dateadded: '2022-06-10' -duration: PT00H48M02S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=0 - endOffset: 36 -- name: 'Webinar Recap: Hiring Demand and Skill Gaps' - startOffset: 36 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=36 - endOffset: 80 -- name: 'Core Skills & Tools: Python, SQL, Docker, Airflow, Data Warehouses' - startOffset: 80 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=80 - endOffset: 109 -- name: 'Python & SQL Depth: Project Volume and Emphasis' - startOffset: 109 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=109 - endOffset: 142 -- name: 'Code Quality & OOP: Small Functions, Classes, Tests' - startOffset: 142 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=142 - endOffset: 166 -- name: 'Portfolio Strategy: Personal Projects and Open Source Contributions' - startOffset: 166 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=166 - endOffset: 218 -- name: 'Application Funnel: LinkedIn, Resume, and Interview Stages' - startOffset: 218 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=218 - endOffset: 315 -- name: 'Behavioral Interview Best Practices: Positivity, Structure, Motivation' - startOffset: 315 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=315 - endOffset: 466 -- name: 'Technical Interview Formats: SQL LeetCode, Python Problems, Take-Home Projects' - startOffset: 466 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=466 - endOffset: 581 -- name: 'Core Database Concepts: Views, Materialized Views, OLTP vs OLAP' - startOffset: 581 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=581 - endOffset: 684 -- name: 'Learning Resources: Python Books, Flask Mega-Tutorial, SQL Platforms' - startOffset: 684 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=684 - endOffset: 851 -- name: 'BI to Data Engineering Transition: Upskilling Within Your Role' - startOffset: 851 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=851 - endOffset: 953 -- name: 'Job Search Strategy: Apply Broadly and Avoid Self-Filtering' - startOffset: 953 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=953 - endOffset: 1008 -- name: Leveraging Non-Coding Experience and Domain Expertise - startOffset: 1008 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1008 - endOffset: 1197 -- name: 'Role Differentiation: Data Analyst vs Data Engineer' - startOffset: 1197 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1197 - endOffset: 1316 -- name: 'Certifications vs Skills: When Certificates Help and When They Don’t' - startOffset: 1316 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1316 - endOffset: 1393 -- name: 'Master’s Degree Trade-offs: Research Depth vs Applied Learning' - startOffset: 1393 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1393 - endOffset: 1666 -- name: 'Remote Work Reality: Timezones, Legal Constraints, and Standout Candidates' - startOffset: 1666 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1666 - endOffset: 1806 -- name: 'Teaching & Coaching on Resume: Communication and Mentorship Value' - startOffset: 1806 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1806 - endOffset: 1942 -- name: 'OOP Relevance: Patterns for Airflow and Maintainable Code' - startOffset: 1942 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1942 - endOffset: 1983 -- name: 'Language Choices: Python Focus; Java/Scala and Spark Considerations' - startOffset: 1983 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=1983 - endOffset: 2109 -- name: 'Interview Load: Typical Number and Style of Technical Questions' - startOffset: 2109 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2109 - endOffset: 2269 -- name: 'Cloud Certification Prep: Learning Fundamentals vs Credential Hunting' - startOffset: 2269 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2269 - endOffset: 2389 -- name: 'Commercial Experience Alternatives: Nonprofits, Contract Work, Internships' - startOffset: 2389 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2389 - endOffset: 2611 -- name: 'Mid-Career Switch: Sales Skills as an Asset in Tech Hiring' - startOffset: 2611 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2611 - endOffset: 2776 -- name: 'Solution Engineer Pathway: Pre-/Post-Sales Roles as Transition Options' - startOffset: 2776 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2776 - endOffset: 2846 -- name: Episode Wrap-Up and Further Resources - startOffset: 2846 - url: https://www.youtube.com/watch?v=asnt7xlyZXQ&t=2846 - endOffset: 2882 --- Links: diff --git a/_podcast/s01e04-standing-out-as-a-data-scientist.md b/_podcast/get-data-scientist-job.md similarity index 95% rename from _podcast/s01e04-standing-out-as-a-data-scientist.md rename to _podcast/get-data-scientist-job.md index c26b4570..d925d39b 100644 --- a/_podcast/s01e04-standing-out-as-a-data-scientist.md +++ b/_podcast/get-data-scientist-job.md @@ -1,11 +1,11 @@ --- -title: 'Land Data Scientist Roles: Resumes, Portfolios, Interviews & Recruiter Workflow' -short: Standing out as a Data Scientist -guests: -- lukewhipps -image: images/podcast/s01e04-standing-out-as-a-data-scientist.jpg +title: "Land Data Scientist Roles: Resumes, Portfolios, Interviews & Recruiter Workflow" +short: "Standing out as a Data Scientist" season: 1 episode: 4 +guests: +- lukewhipps +image: images/podcast/get-data-scientist-job.jpg ids: youtube: Sb4CJlonB3c anchor: Standing-out-as-a-Data-Scientist---Luke-Whipps-envr7e @@ -14,6 +14,135 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Standing-out-as-a-Data-Scientist---Luke-Whipps-envr7e spotify: https://open.spotify.com/episode/2Yxay9HJmd6dvk34MHJ0K2 apple: https://podcasts.apple.com/us/podcast/standing-out-as-a-data-scientist-luke-whipps/id1541710331?i=1000502844994 + +description: "Master data scientist resumes, portfolios & interviews—insider recruiter workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster" +intro: "How do you actually land a data scientist role — from a resume that passes screening to a portfolio that wins interviews and an offer that closes? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast with 8+ years recruiting experience, walks through the recruiter workflow and practical steps data scientists can use to improve hiring outcomes.

We cover Luke’s six-stage recruitment process (role definition to close), how to define data scientist roles across companies, and recruiter expectations for CV design, information hierarchy, and industry/use-case alignment. Learn how to structure portfolios to link tech stack to concrete projects, craft a clear career narrative that demonstrates business impact, and prepare for interviews and negotiations. Junior candidates will get guidance on choosing an industry and showing purpose; academics learn how to productize research for industry. You’ll also hear tactical advice on tailored applications, LinkedIn outreach, candidate funnel sizes, salary signals, job-title alignment, and acceptable tenure patterns.

Listen to gain actionable tips for resumes, portfolios, interviews, and working effectively with recruiters to increase your chances of landing a data scientist role" +topics: +- data science +- career growth +- job search +dateadded: 2021-02-23 + +duration: PT01H08M47S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=0 + endOffset: 99 +- name: 'Guest Introduction: Luke Whipps, recruiter and podcast host' + startOffset: 99 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=99 + endOffset: 177 +- name: 'Recruiting background: a decade in data, analytics and AI' + startOffset: 177 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=177 + endOffset: 223 +- name: 'Neural AI origin: founding principles and non-transactional recruiting' + startOffset: 223 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=223 + endOffset: 316 +- name: 'Community focus: podcasts, events and value-driven talent work' + startOffset: 316 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=316 + endOffset: 422 +- name: 'Hiring challenges: why data scientist roles vary by company' + startOffset: 422 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=422 + endOffset: 455 +- name: 'Recruitment workflow: six-stage process from definition to close' + startOffset: 455 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=455 + endOffset: 495 +- name: Role definition & market guidance for data science hires + startOffset: 495 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=495 + endOffset: 554 +- name: Shortlist, interview preparation, feedback and offer negotiation + startOffset: 554 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=554 + endOffset: 683 +- name: 'Candidate funnel sizes: longlists, headhunting and volume hiring' + startOffset: 683 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=683 + endOffset: 847 +- name: 'First impressions: CV design, formatting and professional clarity' + startOffset: 847 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=847 + endOffset: 975 +- name: Industry and use-case alignment on resumes for better matches + startOffset: 975 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=975 + endOffset: 1190 +- name: 'Projects & portfolio: linking tech stack to concrete work' + startOffset: 1190 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1190 + endOffset: 1328 +- name: 'Career narrative: tenure, common themes and progression' + startOffset: 1328 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1328 + endOffset: 1504 +- name: Demonstrating business impact and real world use cases + startOffset: 1504 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1504 + endOffset: 1639 +- name: 'CV structure: clarity, audience fit and information hierarchy' + startOffset: 1639 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1639 + endOffset: 1810 +- name: 'Job-hopping: red flags, ideal tenure and acceptable exceptions' + startOffset: 1810 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1810 + endOffset: 1942 +- name: 'Junior candidates: pick an industry, aim small and show purpose' + startOffset: 1942 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1942 + endOffset: 2274 +- name: 'Tailored applications: research job needs and map your skills' + startOffset: 2274 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2274 + endOffset: 2381 +- name: 'Targeted outreach tactics: emails, LinkedIn and creative approaches' + startOffset: 2381 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2381 + endOffset: 2666 +- name: 'Focus strategy: approach fewer companies and segment your market' + startOffset: 2666 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2666 + endOffset: 2785 +- name: 'Academia → industry: adopt a product mindset and productionize research' + startOffset: 2785 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2785 + endOffset: 3039 +- name: 'Motivation vs money: career focus, progression and tradeoffs' + startOffset: 3039 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3039 + endOffset: 3142 +- name: 'Salary signals: asking salary, market alignment and recruiter views' + startOffset: 3142 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3142 + endOffset: 3407 +- name: 'CV formats & length: country differences and the two-page guideline' + startOffset: 3407 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3407 + endOffset: 3531 +- name: 'Job title alignment: adapt titles to industry norms without lying' + startOffset: 3531 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3531 + endOffset: 3615 +- name: 'Switching backgrounds: web development to machine learning skills' + startOffset: 3615 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3615 + endOffset: 3727 +- name: 'Disclosing other interviews: transparency, trust and recruiter differences' + startOffset: 3727 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3727 + endOffset: 4057 +- name: 'Episode summary: purpose-driven candidates and standing out as a data scientist' + startOffset: 4057 + url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=4057 + endOffset: 4127 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Luke Whipps, recruiter and podcast host' @@ -53,7 +182,7 @@ transcript: sec: 177 time: '2:57' who: Luke -- header: 'Neural AI origin: founding principles and non‑transactional recruiting' +- header: 'Neural AI origin: founding principles and non-transactional recruiting' - line: Neural came around from a couple of different perspectives. One of those perspectives was… I was just about to turn 30. I had two choices that I could have made. I could either continue working for businesses that I have to become a part of and @@ -77,7 +206,7 @@ transcript: sec: 256 time: '4:16' who: Luke -- header: 'Community focus: podcasts, events and value‑driven talent work' +- header: 'Community focus: podcasts, events and value-driven talent work' - line: For me, as I say Neural was built on the back of wanting to create something different that is value and community driven, rather than just, “Hey, do you want to work on a new business?” So yeah, we started in early 2020. So probably not @@ -120,7 +249,7 @@ transcript: sec: 422 time: '7:02' who: Luke -- header: 'Recruitment workflow: six‑stage process from definition to close' +- header: 'Recruitment workflow: six-stage process from definition to close' - line: One of the big problems – or the big challenges of the work that we do is – that every single company is different. That in itself is a challenge, because you need to first understand the problems and the different challenges that those @@ -267,7 +396,7 @@ transcript: sec: 940 time: '15:40' who: Luke -- header: Industry and use‑case alignment on resumes for better matches +- header: Industry and use-case alignment on resumes for better matches - line: Number one is, I’ll instantly look for the crossover between the business and the role that they're applying for with the company that they're currently working in. The reason I say that is not because it's a name game, or… It's not @@ -487,7 +616,7 @@ transcript: sec: 1803 time: '30:03' who: Luke -- header: 'Job‑hopping: red flags, ideal tenure and acceptable exceptions' +- header: 'Job-hopping: red flags, ideal tenure and acceptable exceptions' - line: We have a question. How often is too often when changing jobs? What is currently the average time that the person stays in a job? What would be a red flag for you? @@ -870,7 +999,7 @@ transcript: sec: 3371 time: '56:11' who: Alexey -- header: 'CV formats & length: country differences and the two‑page guideline' +- header: 'CV formats & length: country differences and the two-page guideline' - line: I think the ideal length is two pages. I don't think you can get enough content on one page to make it sing. Three pages is borderline too much. But two pages is ideal. That's the balance between having enough deep content that will give @@ -1017,7 +1146,7 @@ transcript: sec: 4037 time: '1:07:17' who: Alexey -- header: 'Episode summary: purpose‑driven candidates and standing out as a data scientist' +- header: 'Episode summary: purpose-driven candidates and standing out as a data scientist' - line: I thought about this for a while. One thing that really makes candidates stand out to me — it's understanding their purpose, and doubling down on that. Once you do that, everything else becomes easy. If you don't have that, and you're @@ -1078,141 +1207,4 @@ transcript: sec: 4226 time: '1:10:26' who: Alexey -description: Master data scientist resumes, portfolios & interviews—insider recruiter - workflow, CV tips, portfolio impact, negotiation and outreach to land roles faster. -intro: How do you actually land a data scientist role — from a resume that passes - screening to a portfolio that wins interviews and an offer that closes? In this - episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast - with 8+ years recruiting experience, walks through the recruiter workflow and practical - steps data scientists can use to improve hiring outcomes.

We cover Luke’s - six‑stage recruitment process (role definition to close), how to define data scientist - roles across companies, and recruiter expectations for CV design, information hierarchy, - and industry/use‑case alignment. Learn how to structure portfolios to link tech - stack to concrete projects, craft a clear career narrative that demonstrates business - impact, and prepare for interviews and negotiations. Junior candidates will get - guidance on choosing an industry and showing purpose; academics learn how to productize - research for industry. You’ll also hear tactical advice on tailored applications, - LinkedIn outreach, candidate funnel sizes, salary signals, job‑title alignment, - and acceptable tenure patterns.

Listen to gain actionable tips for resumes, - portfolios, interviews, and working effectively with recruiters to increase your - chances of landing a data scientist role. -dateadded: '2021-02-23' -duration: PT01H08M47S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=0 - endOffset: 99 -- name: 'Guest Introduction: Luke Whipps, recruiter and podcast host' - startOffset: 99 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=99 - endOffset: 177 -- name: 'Recruiting background: a decade in data, analytics and AI' - startOffset: 177 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=177 - endOffset: 223 -- name: 'Neural AI origin: founding principles and non‑transactional recruiting' - startOffset: 223 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=223 - endOffset: 316 -- name: 'Community focus: podcasts, events and value‑driven talent work' - startOffset: 316 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=316 - endOffset: 422 -- name: 'Hiring challenges: why data scientist roles vary by company' - startOffset: 422 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=422 - endOffset: 455 -- name: 'Recruitment workflow: six‑stage process from definition to close' - startOffset: 455 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=455 - endOffset: 495 -- name: Role definition & market guidance for data science hires - startOffset: 495 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=495 - endOffset: 554 -- name: Shortlist, interview preparation, feedback and offer negotiation - startOffset: 554 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=554 - endOffset: 683 -- name: 'Candidate funnel sizes: longlists, headhunting and volume hiring' - startOffset: 683 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=683 - endOffset: 847 -- name: 'First impressions: CV design, formatting and professional clarity' - startOffset: 847 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=847 - endOffset: 975 -- name: Industry and use‑case alignment on resumes for better matches - startOffset: 975 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=975 - endOffset: 1190 -- name: 'Projects & portfolio: linking tech stack to concrete work' - startOffset: 1190 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1190 - endOffset: 1328 -- name: 'Career narrative: tenure, common themes and progression' - startOffset: 1328 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1328 - endOffset: 1504 -- name: Demonstrating business impact and real world use cases - startOffset: 1504 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1504 - endOffset: 1639 -- name: 'CV structure: clarity, audience fit and information hierarchy' - startOffset: 1639 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1639 - endOffset: 1810 -- name: 'Job‑hopping: red flags, ideal tenure and acceptable exceptions' - startOffset: 1810 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1810 - endOffset: 1942 -- name: 'Junior candidates: pick an industry, aim small and show purpose' - startOffset: 1942 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=1942 - endOffset: 2274 -- name: 'Tailored applications: research job needs and map your skills' - startOffset: 2274 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2274 - endOffset: 2381 -- name: 'Targeted outreach tactics: emails, LinkedIn and creative approaches' - startOffset: 2381 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2381 - endOffset: 2666 -- name: 'Focus strategy: approach fewer companies and segment your market' - startOffset: 2666 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2666 - endOffset: 2785 -- name: 'Academia → industry: adopt a product mindset and productionize research' - startOffset: 2785 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=2785 - endOffset: 3039 -- name: 'Motivation vs money: career focus, progression and tradeoffs' - startOffset: 3039 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3039 - endOffset: 3142 -- name: 'Salary signals: asking salary, market alignment and recruiter views' - startOffset: 3142 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3142 - endOffset: 3407 -- name: 'CV formats & length: country differences and the two‑page guideline' - startOffset: 3407 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3407 - endOffset: 3531 -- name: 'Job title alignment: adapt titles to industry norms without lying' - startOffset: 3531 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3531 - endOffset: 3615 -- name: 'Switching backgrounds: web development to machine learning skills' - startOffset: 3615 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3615 - endOffset: 3727 -- name: 'Disclosing other interviews: transparency, trust and recruiter differences' - startOffset: 3727 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=3727 - endOffset: 4057 -- name: 'Episode summary: purpose‑driven candidates and standing out as a data scientist' - startOffset: 4057 - url: https://www.youtube.com/watch?v=Sb4CJlonB3c&t=4057 - endOffset: 4127 --- diff --git a/_podcast/s07e04-career-coaching.md b/_podcast/get-junior-data-job-and-transferable-skills.md similarity index 96% rename from _podcast/s07e04-career-coaching.md rename to _podcast/get-junior-data-job-and-transferable-skills.md index 9c557729..b659c223 100644 --- a/_podcast/s07e04-career-coaching.md +++ b/_podcast/get-junior-data-job-and-transferable-skills.md @@ -1,39 +1,130 @@ --- +title: "Land Junior Data Jobs: CVs, Interviews, Transferable Skills & Overcome Imposter Syndrome" +short: "Career Coaching" +season: 7 episode: 4 guests: - lindsaymcquade -short: Career Coaching -title: 'Land Junior Data Jobs: CVs, Interviews, Transferable Skills & Overcome Imposter - Syndrome' -intro: Struggling to land a junior data job—how do you turn non‑linear experience - into a recruiter‑ready CV, prepare for interviews, and push past imposter syndrome? - In this episode, Lindsay McQuade, a transformational coach with 20+ years across - management consulting, higher education and tech and former Senior Career & Development - Coach at SPICED Academy, guides listeners through practical steps for junior data - roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED - training rated 94% “very good/excellent”) to explain CV writing for data roles, - achievement‑based resumes, interview prep and negotiation. Topics include reframing - past experience into evidence, identifying transferable skills for data analyst/scientist/engineer - roles, tailoring applications by industry, the ikigai framework for career focus, - and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, - objective feedback strategies, and structured learning and T‑shaped skills to build - confidence. Tune in for clear job search strategy (balanced volume and targeted - applications), how to choose a career coach, and practical LinkedIn networking tips - to convert applications into interviews. -description: 'Master landing junior data jobs: craft achievement-based CVs, highlight - transferable skills, ace interviews and beat imposter syndrome with coach tips.' -topics: -- career growth +image: images/podcast/get-junior-data-job-and-transferable-skills.jpg ids: anchor: Career-Coaching---Lindsay-McQuade-e1e8elk youtube: _U8GrYJvmJM -image: images/podcast/s07e04-career-coaching.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Career-Coaching---Lindsay-McQuade-e1e8elk apple: https://podcasts.apple.com/us/podcast/career-coaching-lindsay-mcquade/id1541710331?i=1000550822996 spotify: https://open.spotify.com/episode/3jMRuqU3ZEcSeoizuOU5q1 youtube: https://www.youtube.com/watch?v=_U8GrYJvmJM -season: 7 + +description: "Master landing junior data jobs: craft achievement-based CVs, highlight transferable skills, ace interviews and beat imposter syndrome with coach tips." +intro: "Struggling to land a junior data job—how do you turn non-linear experience into a recruiter-ready CV, prepare for interviews, and push past imposter syndrome? In this episode, Lindsay McQuade, a transformational coach with 20+ years across management consulting, higher education and tech and former Senior Career & Development Coach at SPICED Academy, guides listeners through practical steps for junior data roles. Lindsay draws on her work designing programs for hundreds of learners (SPICED training rated 94% “very good/excellent”) to explain CV writing for data roles, achievement-based resumes, interview prep and negotiation. Topics include reframing past experience into evidence, identifying transferable skills for data analyst/scientist/engineer roles, tailoring applications by industry, the ikigai framework for career focus, and Berlin’s junior data market trends. We also cover impostor syndrome—its triggers, objective feedback strategies, and structured learning and T-shaped skills to build confidence. Tune in for clear job search strategy (balanced volume and targeted applications), how to choose a career coach, and practical LinkedIn networking tips to convert applications into interviews" +topics: +- career growth +dateadded: 2022-02-12 + +duration: PT00H58M39S + +quotableClips: +- name: Guest Introduction & Career Journey + startOffset: 68 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=68 + endOffset: 302 +- name: Spiced Academy Programs Overview (Full-Stack & Data Science) + startOffset: 302 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=302 + endOffset: 360 +- name: 'Career Coaching Services: CVs, Interview Prep, Negotiation' + startOffset: 360 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=360 + endOffset: 560 +- name: Defining Ideal Job Environment & Career Experiments + startOffset: 560 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=560 + endOffset: 711 +- name: Reframing Past Experience into Recruiter-Friendly Evidence + startOffset: 711 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=711 + endOffset: 782 +- name: Identifying Transferable Skills for Data Roles + startOffset: 782 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=782 + endOffset: 906 +- name: Achievement-Based CV Writing vs Responsibility Lists + startOffset: 906 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=906 + endOffset: 1014 +- name: Coaching Access Model & Typical Student Engagement + startOffset: 1014 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1014 + endOffset: 1078 +- name: 'Career Coach Impact: Belief, Market Navigation, Paperwork' + startOffset: 1078 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1078 + endOffset: 1228 +- name: 'Marketplace Ambiguity: Data Scientist vs Analyst vs Engineer' + startOffset: 1228 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1228 + endOffset: 1428 +- name: 'Job Focus Importance: Tailoring Applications to Industry' + startOffset: 1428 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1428 + endOffset: 1539 +- name: Ikigai Framework for Finding Career Focus and Projects + startOffset: 1539 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1539 + endOffset: 1714 +- name: Junior Job Market Trends in Berlin (Analytics vs Engineering) + startOffset: 1714 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1714 + endOffset: 1837 +- name: 'Job Search Strategy: Balanced Tailored Applications & Volume' + startOffset: 1837 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1837 + endOffset: 2011 +- name: 'Finding a Career Coach: Credentials, Specialization, Location' + startOffset: 2011 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2011 + endOffset: 2091 +- name: 'Imposter Syndrome: Origins and Common Triggers' + startOffset: 2091 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2091 + endOffset: 2225 +- name: Objective Feedback vs Distorted Self-Perception + startOffset: 2225 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2225 + endOffset: 2517 +- name: 'Coping Strategies: Accepting Failure and Learning Loops' + startOffset: 2517 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2517 + endOffset: 2695 +- name: 'Resources for Imposter Syndrome: Research, Coaches, Mentors' + startOffset: 2695 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2695 + endOffset: 2828 +- name: 'Building Confidence: Structured Learning Paths & T-Shaped Skills' + startOffset: 2828 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2828 + endOffset: 3070 +- name: 'Managing Expectations: Junior Role Requirements & Progression' + startOffset: 3070 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3070 + endOffset: 3171 +- name: Internships vs Junior Roles and Creating Hidden Opportunities + startOffset: 3171 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3171 + endOffset: 3320 +- name: 'Working with Recruiters: When They Help Juniors & Seniors' + startOffset: 3320 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3320 + endOffset: 3510 +- name: 'LinkedIn Networking: Credible Informational Outreach' + startOffset: 3510 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3510 + endOffset: 3564 +- name: Episode Wrap-Up & Final Career Coaching Takeaways + startOffset: 3564 + url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3564 + endOffset: 3519 + transcript: - header: Guest Introduction & Career Journey - line: This week, we'll talk about career coaching. We have a special guest today, @@ -106,7 +197,7 @@ transcript: sec: 106 time: '1:46' who: Lindsay -- header: Spiced Academy Programs Overview (Full‑Stack & Data Science) +- header: Spiced Academy Programs Overview (Full-Stack & Data Science) - line: Interesting. Can you tell us a few words about the school – Spiced Academy? What do you do there? sec: 302 @@ -233,7 +324,7 @@ transcript: sec: 648 time: '10:48' who: Alexey -- header: Reframing Past Experience into Recruiter‑Friendly Evidence +- header: Reframing Past Experience into Recruiter-Friendly Evidence - line: Well, I think the first thing is to have a very honest and open conversation about what they think they failed at. So you give them the space to let all of this come out and then encourage them to think about what their successes were. @@ -291,7 +382,7 @@ transcript: sec: 883 time: '14:43' who: Alexey -- header: Achievement‑Based CV Writing vs Responsibility Lists +- header: Achievement-Based CV Writing vs Responsibility Lists - line: Yeah. I'm sure there will have been some very complex analytical things that they've had to work out. But we don't need to know the details of this and they shouldn’t use law terminology that we don't understand – we just want to extract @@ -693,7 +784,7 @@ transcript: sec: 2221 time: '37:01' who: Alexey -- header: Objective Feedback vs Distorted Self‑Perception +- header: Objective Feedback vs Distorted Self-Perception - line: So imagine this person's got someone to do this offset coding challenge. How far are they gonna get with this? Right? I would imagine there's a good chance they're gonna get found out at the technical interview stage. But let's imagine @@ -868,7 +959,7 @@ transcript: sec: 2781 time: '46:21' who: Alexey -- header: 'Building Confidence: Structured Learning Paths & T‑Shaped Skills' +- header: 'Building Confidence: Structured Learning Paths & T-Shaped Skills' - line: Yes, you're right. Yeah, what do you do? I would say this – when we're changing careers, this is a particularly fragile time for the imposter syndrome. As you said, in the workplace it can be even more challenging. Part of the reason for @@ -1107,7 +1198,7 @@ transcript: sec: 3524 time: '58:44' who: Lindsay -- header: Episode Wrap‑Up & Final Career Coaching Takeaways +- header: Episode Wrap-Up & Final Career Coaching Takeaways - line: Okay, yeah. Thanks a lot. Thanks for your time. We should be wrapping up. Thanks a lot for sharing all this advice with us. And thanks, everyone, especially Michael, who needed to wake up at 6am to watch this. [laughs] I hope it was worth @@ -1123,109 +1214,6 @@ transcript: sec: 3587 time: '59:47' who: Alexey -dateadded: '2022-02-12' -duration: PT00H58M39S -quotableClips: -- name: Guest Introduction & Career Journey - startOffset: 68 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=68 - endOffset: 302 -- name: Spiced Academy Programs Overview (Full‑Stack & Data Science) - startOffset: 302 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=302 - endOffset: 360 -- name: 'Career Coaching Services: CVs, Interview Prep, Negotiation' - startOffset: 360 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=360 - endOffset: 560 -- name: Defining Ideal Job Environment & Career Experiments - startOffset: 560 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=560 - endOffset: 711 -- name: Reframing Past Experience into Recruiter‑Friendly Evidence - startOffset: 711 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=711 - endOffset: 782 -- name: Identifying Transferable Skills for Data Roles - startOffset: 782 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=782 - endOffset: 906 -- name: Achievement‑Based CV Writing vs Responsibility Lists - startOffset: 906 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=906 - endOffset: 1014 -- name: Coaching Access Model & Typical Student Engagement - startOffset: 1014 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1014 - endOffset: 1078 -- name: 'Career Coach Impact: Belief, Market Navigation, Paperwork' - startOffset: 1078 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1078 - endOffset: 1228 -- name: 'Marketplace Ambiguity: Data Scientist vs Analyst vs Engineer' - startOffset: 1228 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1228 - endOffset: 1428 -- name: 'Job Focus Importance: Tailoring Applications to Industry' - startOffset: 1428 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1428 - endOffset: 1539 -- name: Ikigai Framework for Finding Career Focus and Projects - startOffset: 1539 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1539 - endOffset: 1714 -- name: Junior Job Market Trends in Berlin (Analytics vs Engineering) - startOffset: 1714 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1714 - endOffset: 1837 -- name: 'Job Search Strategy: Balanced Tailored Applications & Volume' - startOffset: 1837 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=1837 - endOffset: 2011 -- name: 'Finding a Career Coach: Credentials, Specialization, Location' - startOffset: 2011 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2011 - endOffset: 2091 -- name: 'Imposter Syndrome: Origins and Common Triggers' - startOffset: 2091 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2091 - endOffset: 2225 -- name: Objective Feedback vs Distorted Self‑Perception - startOffset: 2225 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2225 - endOffset: 2517 -- name: 'Coping Strategies: Accepting Failure and Learning Loops' - startOffset: 2517 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2517 - endOffset: 2695 -- name: 'Resources for Imposter Syndrome: Research, Coaches, Mentors' - startOffset: 2695 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2695 - endOffset: 2828 -- name: 'Building Confidence: Structured Learning Paths & T‑Shaped Skills' - startOffset: 2828 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=2828 - endOffset: 3070 -- name: 'Managing Expectations: Junior Role Requirements & Progression' - startOffset: 3070 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3070 - endOffset: 3171 -- name: Internships vs Junior Roles and Creating Hidden Opportunities - startOffset: 3171 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3171 - endOffset: 3320 -- name: 'Working with Recruiters: When They Help Juniors & Seniors' - startOffset: 3320 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3320 - endOffset: 3510 -- name: 'LinkedIn Networking: Credible Informational Outreach' - startOffset: 3510 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3510 - endOffset: 3564 -- name: Episode Wrap‑Up & Final Career Coaching Takeaways - startOffset: 3564 - url: https://www.youtube.com/watch?v=_U8GrYJvmJM&t=3564 - endOffset: 3519 --- Links: diff --git a/_podcast/s11e02-data-science-career-development.md b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md similarity index 97% rename from _podcast/s11e02-data-science-career-development.md rename to _podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md index 63c4d68c..3652709a 100644 --- a/_podcast/s11e02-data-science-career-development.md +++ b/_podcast/hiring-and-managing-data-science-teams-in-b2b-saas.md @@ -1,19 +1,128 @@ --- +title: "How to Hire, Manage, and Grow a Data Science Team in B2B SaaS" +short: "How to Hire, Manage, and Grow a Data Science Team in B2B SaaS" +season: 11 episode: 2 guests: - katiebauer +image: images/podcast/hiring-and-managing-data-science-teams-in-b2b-saas.jpg ids: anchor: Data-Science-Career-Development---Katie-Bauer-e1oq96g youtube: i1NHRroQClQ -image: images/podcast/s11e02-data-science-career-development.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Science-Career-Development---Katie-Bauer-e1oq96g apple: https://podcasts.apple.com/us/podcast/data-science-career-development-katie-bauer/id1541710331?i=1000582680396 spotify: https://open.spotify.com/episode/0sm5qB1Cj4EJlbQ2giLtHR youtube: https://www.youtube.com/watch?v=i1NHRroQClQ -season: 11 -short: Data Science Career Development -title: 'Data Science Career Guide: Hiring, Managing & Growing Teams in B2B SaaS' + +description: "Learn hiring & management tactics for data science in B2B SaaS — hiring, onboarding, mentorship, and career growth strategies to build scalable analytics teams" +intro: "How do you hire, manage, and grow a high-impact data science team inside a B2B SaaS company? In this episode, Katie Bauer — Head of Data at GlossGenius and former data leader at Twitter and Reddit — walks through practical career frameworks and team-building strategies for product analysts, analytics engineers, marketing scientists, and data scientists. Katie traces her own trajectory from linguistics to analytics and explains what “craft” looks like in analytics: maintainability, documentation, and peer review. She breaks down IC versus management paths, how to mentor juniors through project-based learning, and when to buy versus build entry-level talent. You’ll hear concrete hiring and interview approaches for managers, how to assess strategy through case studies and trade-offs, and tips to help new hires succeed in their first month via proactive communication and async support channels. For leaders, Katie covers prioritization, raising data literacy, and fostering a data-driven culture. Listen to get actionable guidance on hiring data scientists, onboarding newcomers, developing senior talent, and scaling data teams in B2B SaaS." +topics: +- data science +- career development +- career growth +- hiring +- management +- team building +- mentorship +dateadded: 2022-10-15 + +duration: PT00H58M40S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=0 + endOffset: 93 +- name: 'Introduction: Episode focus on data science career development (Katie Bauer)' + startOffset: 93 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=93 + endOffset: 147 +- name: 'Career trajectory: linguistics to data science; Reddit and Twitter experience' + startOffset: 147 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=147 + endOffset: 276 +- name: GlossGenius product and head of data responsibilities (B2B SaaS) + startOffset: 276 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=276 + endOffset: 382 +- name: 'Current hiring needs: product analysts, analytics engineers, marketing scientists' + startOffset: 382 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=382 + endOffset: 428 +- name: 'Data scientist role: broad definition and varied responsibilities' + startOffset: 428 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=428 + endOffset: 513 +- name: 'Data science manager: building teams, matrix orgs, and cross-functional work' + startOffset: 513 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=513 + endOffset: 718 +- name: 'Craft quality: maintainability, documentation, peer review for analytics' + startOffset: 718 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=718 + endOffset: 912 +- name: 'Career framework: junior vs senior and the “terminal” career level' + startOffset: 912 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=912 + endOffset: 1130 +- name: 'Senior growth: abstraction, leadership exposure, and delegation' + startOffset: 1130 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1130 + endOffset: 1554 +- name: 'IC vs management: trying people leadership and the IC–manager pendulum' + startOffset: 1554 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1554 + endOffset: 1810 +- name: 'Managing juniors: mentorship, skills training, and project-based learning' + startOffset: 1810 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1810 + endOffset: 2056 +- name: 'Stakeholder conversations: talking to PMs and senior leaders (prep & questions)' + startOffset: 2056 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2056 + endOffset: 2342 +- name: 'Junior development: practice, exposure, and avoiding early specialization' + startOffset: 2342 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2342 + endOffset: 2412 +- name: 'Hiring juniors: build vs buy, long-term org benefits, and succession' + startOffset: 2412 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2412 + endOffset: 2679 +- name: 'Hiring managers: evaluation criteria for data science manager roles' + startOffset: 2679 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2679 + endOffset: 2841 +- name: 'Strategy assessment: case studies, trade-offs, and measurement in interviews' + startOffset: 2841 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2841 + endOffset: 3021 +- name: 'Entry-level hiring tips: standing out, outreach, and interview preparation' + startOffset: 3021 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3021 + endOffset: 3163 +- name: 'Onboarding first month: proactive communication and asking for help' + startOffset: 3163 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3163 + endOffset: 3251 +- name: 'Support mechanisms: regular check-ins, rubber-duck channels, async help' + startOffset: 3251 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3251 + endOffset: 3380 +- name: 'Head of data challenges: prioritization, data literacy, and culture building' + startOffset: 3380 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3380 + endOffset: 3549 +- name: 'Closing advice: careers as direction and guiding team growth' + startOffset: 3549 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3549 + endOffset: 3613 +- name: Episode wrap and contact information + startOffset: 3613 + url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3613 + endOffset: 3520 + transcript: - header: Podcast Introduction - header: 'Introduction: Episode focus on data science career development (Katie Bauer)' @@ -1044,7 +1153,7 @@ transcript: sec: 3189 time: '53:09' who: Katie -- header: 'Support mechanisms: regular check-ins, rubber‑duck channels, async help' +- header: 'Support mechanisms: regular check-ins, rubber-duck channels, async help' - line: One of the concerns I heard from juniors is that they are afraid of interrupting other people (seniors). The seniors are busy, “This is a very senior person, they have a lot of stuff to work on.” They don't feel that they should be interrupting @@ -1199,118 +1308,6 @@ transcript: sec: 3613 time: '1:00:13' who: Alexey -description: Learn hiring & management tactics for data science in B2B SaaS — hiring, - onboarding, mentorship, and career growth strategies to build scalable analytics - teams. -intro: 'How do you hire, manage, and grow a high-impact data science team inside a - B2B SaaS company? In this episode, Katie Bauer — Head of Data at GlossGenius and - former data leader at Twitter and Reddit — walks through practical career frameworks - and team-building strategies for product analysts, analytics engineers, marketing - scientists, and data scientists. Katie traces her own trajectory from linguistics - to analytics and explains what “craft” looks like in analytics: maintainability, - documentation, and peer review. She breaks down IC versus management paths, how - to mentor juniors through project-based learning, and when to buy versus build entry-level - talent. You’ll hear concrete hiring and interview approaches for managers, how to - assess strategy through case studies and trade-offs, and tips to help new hires - succeed in their first month via proactive communication and async support channels. - For leaders, Katie covers prioritization, raising data literacy, and fostering a - data-driven culture. Listen to get actionable guidance on hiring data scientists, - onboarding newcomers, developing senior talent, and scaling data teams in B2B SaaS.' -dateadded: '2022-10-15' -duration: PT00H58M40S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=0 - endOffset: 93 -- name: 'Introduction: Episode focus on data science career development (Katie Bauer)' - startOffset: 93 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=93 - endOffset: 147 -- name: 'Career trajectory: linguistics to data science; Reddit and Twitter experience' - startOffset: 147 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=147 - endOffset: 276 -- name: GlossGenius product and head of data responsibilities (B2B SaaS) - startOffset: 276 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=276 - endOffset: 382 -- name: 'Current hiring needs: product analysts, analytics engineers, marketing scientists' - startOffset: 382 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=382 - endOffset: 428 -- name: 'Data scientist role: broad definition and varied responsibilities' - startOffset: 428 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=428 - endOffset: 513 -- name: 'Data science manager: building teams, matrix orgs, and cross-functional work' - startOffset: 513 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=513 - endOffset: 718 -- name: 'Craft quality: maintainability, documentation, peer review for analytics' - startOffset: 718 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=718 - endOffset: 912 -- name: 'Career framework: junior vs senior and the “terminal” career level' - startOffset: 912 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=912 - endOffset: 1130 -- name: 'Senior growth: abstraction, leadership exposure, and delegation' - startOffset: 1130 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1130 - endOffset: 1554 -- name: 'IC vs management: trying people leadership and the IC–manager pendulum' - startOffset: 1554 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1554 - endOffset: 1810 -- name: 'Managing juniors: mentorship, skills training, and project-based learning' - startOffset: 1810 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=1810 - endOffset: 2056 -- name: 'Stakeholder conversations: talking to PMs and senior leaders (prep & questions)' - startOffset: 2056 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2056 - endOffset: 2342 -- name: 'Junior development: practice, exposure, and avoiding early specialization' - startOffset: 2342 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2342 - endOffset: 2412 -- name: 'Hiring juniors: build vs buy, long-term org benefits, and succession' - startOffset: 2412 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2412 - endOffset: 2679 -- name: 'Hiring managers: evaluation criteria for data science manager roles' - startOffset: 2679 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2679 - endOffset: 2841 -- name: 'Strategy assessment: case studies, trade-offs, and measurement in interviews' - startOffset: 2841 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=2841 - endOffset: 3021 -- name: 'Entry-level hiring tips: standing out, outreach, and interview preparation' - startOffset: 3021 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3021 - endOffset: 3163 -- name: 'Onboarding first month: proactive communication and asking for help' - startOffset: 3163 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3163 - endOffset: 3251 -- name: 'Support mechanisms: regular check-ins, rubber‑duck channels, async help' - startOffset: 3251 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3251 - endOffset: 3380 -- name: 'Head of data challenges: prioritization, data literacy, and culture building' - startOffset: 3380 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3380 - endOffset: 3549 -- name: 'Closing advice: careers as direction and guiding team growth' - startOffset: 3549 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3549 - endOffset: 3613 -- name: Episode wrap and contact information - startOffset: 3613 - url: https://www.youtube.com/watch?v=i1NHRroQClQ&t=3613 - endOffset: 3520 --- Links: diff --git a/_podcast/s07e02-recruiting-data-professionals.md b/_podcast/hiring-data-scientists-and-analysts.md similarity index 97% rename from _podcast/s07e02-recruiting-data-professionals.md rename to _podcast/hiring-data-scientists-and-analysts.md index 8e0b299d..cc3ebe89 100644 --- a/_podcast/s07e02-recruiting-data-professionals.md +++ b/_podcast/hiring-data-scientists-and-analysts.md @@ -1,12 +1,11 @@ --- -title: 'Hiring Data Scientists & Analysts: Talent Pipelines, Job Specs, CV Screening - & Salary Tips' -short: Recruiting Data Professionals -guests: -- alicjanotowska -image: images/podcast/s07e02-recruiting-data-professionals.jpg +title: "Hiring Data Scientists & Analysts: Talent Pipelines, Job Specs, CV Screening & Salary Tips" +short: "Recruiting Data Professionals" season: 7 episode: 2 +guests: +- alicjanotowska +image: images/podcast/hiring-data-scientists-and-analysts.jpg ids: youtube: WSMDXsjKYx4 anchor: Recruiting-Data-Professionals---Alicja-Notowska-e1dj2qn @@ -15,6 +14,129 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Recruiting-Data-Professionals---Alicja-Notowska-e1dj2qn spotify: https://open.spotify.com/episode/4LFZX7IfpdYkQ6si4ed0OR apple: https://podcasts.apple.com/us/podcast/recruiting-data-professionals-alicja-notowska/id1541710331?i=1000549307220 + +description: "Master hiring for data scientist & data analyst roles: craft job descriptions, build talent pipelines, screen CVs, negotiate salaries and land top hires." +topics: +- data science +- data analytics +- job search +- career transition +- hiring +intro: "How do you consistently find and hire the right data scientists and analysts in a competitive market? In this episode, Alicja Notowska — a talent acquisition specialist with 10+ years recruiting at Google, Zalando and now with embedded agency WeAreKeen — breaks down practical recruiting tactics for hiring data scientists and data analysts.

We cover the full interview funnel and end-to-end recruiting responsibilities, crafting job specs that emphasise problems over perks, and using inclusive language plus AI tools to attract diverse candidates. Alicja explains sourcing channels (LinkedIn, GitHub, conferences, academia), building talent pipelines and 360° recruitment, and keyword strategies for CV screening. You’ll hear guidance on education signals (BSc/MSc/PhD), CV best practices (clear responsibilities, dates, avoid buzzwords), recruiter screening interviews, and evaluating portfolio projects and online course work.

The episode also tackles salary conversations — bands, transparency and negotiation — managing hiring manager expectations, pathways for career changers, and offer etiquette. Listen to gain actionable tactics for job specs, CV screening, sourcing and salary negotiation when hiring data talent" +dateadded: 2022-01-29 + +duration: PT01H04M55S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=0 + endOffset: 125 +- name: Guest Background & Recruiting Experience + startOffset: 125 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=125 + endOffset: 186 +- name: 'Current Role: Embedded Talent Agency & Client Lead' + startOffset: 186 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=186 + endOffset: 284 +- name: Typical Interview Funnel for Data Roles + startOffset: 284 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=284 + endOffset: 380 +- name: End-to-End Recruiting Responsibilities + startOffset: 380 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=380 + endOffset: 429 +- name: Collaborating with Hiring Managers & Crafting Job Specs + startOffset: 429 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=429 + endOffset: 542 +- name: 'Sourcing Channels: LinkedIn, GitHub, Conferences & Academia' + startOffset: 542 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=542 + endOffset: 694 +- name: Building Talent Pipelines & 360° Recruitment + startOffset: 694 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=694 + endOffset: 837 +- name: Managing Hiring Expectations & Market Reality + startOffset: 837 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=837 + endOffset: 1038 +- name: Using Talent Market Data to Negotiate Requirements + startOffset: 1038 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1038 + endOffset: 1108 +- name: 'Job Description Focus: Problems Over Perks' + startOffset: 1108 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1108 + endOffset: 1204 +- name: Inclusive JD Language & AI Tools for Attraction + startOffset: 1204 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1204 + endOffset: 1292 +- name: 'Screening Profiles: Experience, Education & Responsibilities' + startOffset: 1292 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1292 + endOffset: 1556 +- name: Keyword Strategy for Data Scientist Searches + startOffset: 1556 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1556 + endOffset: 1630 +- name: 'Education Signals: Bachelor’s, Master’s & PhD Expectations' + startOffset: 1630 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1630 + endOffset: 1721 +- name: 'CV Best Practices: Responsibilities, Dates & Clarity' + startOffset: 1721 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1721 + endOffset: 1960 +- name: Avoiding Buzzwords & Making CVs Interview-Ready + startOffset: 1960 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1960 + endOffset: 2168 +- name: 'Recruiter Screening Interviews: Behavioral & Motivation Checks' + startOffset: 2168 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2168 + endOffset: 2433 +- name: 'Salary Conversations: Bands, Transparency & Negotiation' + startOffset: 2433 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2433 + endOffset: 2705 +- name: Handling High Salary Requests & Market Research + startOffset: 2705 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2705 + endOffset: 2856 +- name: 'Pathways for Career Changers: Gaining Practical Experience' + startOffset: 2856 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2856 + endOffset: 3053 +- name: 'Cover Letters vs CVs: When They Matter' + startOffset: 3053 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3053 + endOffset: 3249 +- name: 'Data Analyst Hiring: Title Ambiguity & Similar Processes' + startOffset: 3249 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3249 + endOffset: 3570 +- name: Portfolio Projects & Online Courses on Your CV + startOffset: 3570 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3570 + endOffset: 3697 +- name: 'Making a Strong Impression: Clear Explanations & Examples' + startOffset: 3697 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3697 + endOffset: 3709 +- name: 'Offer Etiquette: Communication, Commitments & Withdrawals' + startOffset: 3709 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3709 + endOffset: 4004 +- name: Where to Find Alicja & Episode Wrap-up + startOffset: 4004 + url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=4004 + endOffset: 3895 + transcript: - header: Podcast Introduction - header: Guest Background & Recruiting Experience @@ -1216,132 +1338,4 @@ transcript: sec: 4020 time: '1:07:00' who: Alicja -description: 'Master hiring for data scientist & data analyst roles: craft job descriptions, - build talent pipelines, screen CVs, negotiate salaries and land top hires.' -intro: How do you consistently find and hire the right data scientists and analysts - in a competitive market? In this episode, Alicja Notowska — a talent acquisition - specialist with 10+ years recruiting at Google, Zalando and now with embedded agency - WeAreKeen — breaks down practical recruiting tactics for hiring data scientists - and data analysts.

We cover the full interview funnel and end-to-end recruiting - responsibilities, crafting job specs that emphasise problems over perks, and using - inclusive language plus AI tools to attract diverse candidates. Alicja explains - sourcing channels (LinkedIn, GitHub, conferences, academia), building talent pipelines - and 360° recruitment, and keyword strategies for CV screening. You’ll hear guidance - on education signals (BSc/MSc/PhD), CV best practices (clear responsibilities, dates, - avoid buzzwords), recruiter screening interviews, and evaluating portfolio projects - and online course work.

The episode also tackles salary conversations — - bands, transparency and negotiation — managing hiring manager expectations, pathways - for career changers, and offer etiquette. Listen to gain actionable tactics for - job specs, CV screening, sourcing and salary negotiation when hiring data talent. -dateadded: '2022-01-29' -duration: PT01H04M55S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=0 - endOffset: 125 -- name: Guest Background & Recruiting Experience - startOffset: 125 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=125 - endOffset: 186 -- name: 'Current Role: Embedded Talent Agency & Client Lead' - startOffset: 186 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=186 - endOffset: 284 -- name: Typical Interview Funnel for Data Roles - startOffset: 284 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=284 - endOffset: 380 -- name: End-to-End Recruiting Responsibilities - startOffset: 380 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=380 - endOffset: 429 -- name: Collaborating with Hiring Managers & Crafting Job Specs - startOffset: 429 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=429 - endOffset: 542 -- name: 'Sourcing Channels: LinkedIn, GitHub, Conferences & Academia' - startOffset: 542 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=542 - endOffset: 694 -- name: Building Talent Pipelines & 360° Recruitment - startOffset: 694 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=694 - endOffset: 837 -- name: Managing Hiring Expectations & Market Reality - startOffset: 837 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=837 - endOffset: 1038 -- name: Using Talent Market Data to Negotiate Requirements - startOffset: 1038 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1038 - endOffset: 1108 -- name: 'Job Description Focus: Problems Over Perks' - startOffset: 1108 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1108 - endOffset: 1204 -- name: Inclusive JD Language & AI Tools for Attraction - startOffset: 1204 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1204 - endOffset: 1292 -- name: 'Screening Profiles: Experience, Education & Responsibilities' - startOffset: 1292 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1292 - endOffset: 1556 -- name: Keyword Strategy for Data Scientist Searches - startOffset: 1556 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1556 - endOffset: 1630 -- name: 'Education Signals: Bachelor’s, Master’s & PhD Expectations' - startOffset: 1630 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1630 - endOffset: 1721 -- name: 'CV Best Practices: Responsibilities, Dates & Clarity' - startOffset: 1721 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1721 - endOffset: 1960 -- name: Avoiding Buzzwords & Making CVs Interview-Ready - startOffset: 1960 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=1960 - endOffset: 2168 -- name: 'Recruiter Screening Interviews: Behavioral & Motivation Checks' - startOffset: 2168 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2168 - endOffset: 2433 -- name: 'Salary Conversations: Bands, Transparency & Negotiation' - startOffset: 2433 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2433 - endOffset: 2705 -- name: Handling High Salary Requests & Market Research - startOffset: 2705 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2705 - endOffset: 2856 -- name: 'Pathways for Career Changers: Gaining Practical Experience' - startOffset: 2856 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=2856 - endOffset: 3053 -- name: 'Cover Letters vs CVs: When They Matter' - startOffset: 3053 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3053 - endOffset: 3249 -- name: 'Data Analyst Hiring: Title Ambiguity & Similar Processes' - startOffset: 3249 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3249 - endOffset: 3570 -- name: Portfolio Projects & Online Courses on Your CV - startOffset: 3570 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3570 - endOffset: 3697 -- name: 'Making a Strong Impression: Clear Explanations & Examples' - startOffset: 3697 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3697 - endOffset: 3709 -- name: 'Offer Etiquette: Communication, Commitments & Withdrawals' - startOffset: 3709 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=3709 - endOffset: 4004 -- name: Where to Find Alicja & Episode Wrap-up - startOffset: 4004 - url: https://www.youtube.com/watch?v=WSMDXsjKYx4&t=4004 - endOffset: 3895 --- diff --git a/_podcast/s08e06-recruiting-data-engineers.md b/_podcast/hiring-for-data-engineering-jobs-in-europe.md similarity index 96% rename from _podcast/s08e06-recruiting-data-engineers.md rename to _podcast/hiring-for-data-engineering-jobs-in-europe.md index 00a4c2ee..d0729702 100644 --- a/_podcast/s08e06-recruiting-data-engineers.md +++ b/_podcast/hiring-for-data-engineering-jobs-in-europe.md @@ -1,44 +1,145 @@ --- +title: "Hiring Data Engineers in Europe: Nicolas Rassam on Interviews, Skills & Career Switches" +short: "Recruiting Data Engineers" +season: 8 episode: 6 guests: - nicolasrassam -date: 2025-11-07 -intro: How do you hire data engineers in Europe today — and what should candidates - and hiring managers actually focus on during interviews? In this episode, Nicolas - Rassam, a Senior Talent Acquisition Partner at Helsing with 10+ years scaling AI - and engineering teams at Onfido and Criteo, walks through the practical realities - of hiring data engineers across Europe's competitive, borderless market.

- We cover why data engineering matters now, differences in European hiring footprints, - and the rising demand for modern tooling. Nicolas breaks down common hiring challenges - — title ambiguity, experience mismatches, and recruiter technical literacy — and - explains how to evaluate transferable experience from software and BI roles. You'll - get concrete guidance on level expectations (junior → senior), typical interview - processes and assessments, resume essentials (SQL, Python, problem solving, outcomes), - cloud fundamentals, when infrastructure/DevOps skills matter, portfolio/GitHub storytelling, - and strategies for career switchers (internships, targeted projects). The episode - also addresses hiring without degrees, industry fit for regulated data, and how - targeted applications beat spray-and-pray. Listen to learn what to prepare for interviews, - how to position projects, and what hiring teams really look for when recruiting - data engineering talent in Europe. +image: images/podcast/hiring-for-data-engineering-jobs-in-europe.jpg ids: anchor: Recruiting-Data-Engineers---Nicolas-Rassam-e1hnkl1 youtube: hylxiu4VGTo -image: images/podcast/s08e06-recruiting-data-engineers.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Recruiting-Data-Engineers---Nicolas-Rassam-e1hnkl1 apple: https://podcasts.apple.com/us/podcast/recruiting-data-engineers-nicolas-rassam/id1541710331?i=1000559128813 spotify: https://open.spotify.com/episode/5ldkzYiHFvJCKoEyfAlvDs?si=WFJzcZ7fRCi1dzwapNGfzA youtube: https://www.youtube.com/watch?v=hylxiu4VGTo -season: 8 -short: Recruiting Data Engineers -title: 'Hiring Data Engineers in Europe: Nicolas Rassam on Interviews, Skills & Career - Switches' -description: 'Learn hiring strategies for data engineering in Europe: interview prep, - resume tips (SQL/Python), career-switch paths and cloud fundamentals to win roles.' + +description: "Learn hiring strategies for data engineering in Europe: interview prep, resume tips (SQL/Python), career-switch paths and cloud fundamentals to win roles." +intro: "How do you hire data engineers in Europe today — and what should candidates and hiring managers actually focus on during interviews? In this episode, Nicolas Rassam, a Senior Talent Acquisition Partner at Helsing with 10+ years scaling AI and engineering teams at Onfido and Criteo, walks through the practical realities of hiring data engineers across Europe's competitive, borderless market.

We cover why data engineering matters now, differences in European hiring footprints, and the rising demand for modern tooling. Nicolas breaks down common hiring challenges — title ambiguity, experience mismatches, and recruiter technical literacy — and explains how to evaluate transferable experience from software and BI roles. You'll get concrete guidance on level expectations (junior → senior), typical interview processes and assessments, resume essentials (SQL, Python, problem solving, outcomes), cloud fundamentals, when infrastructure/DevOps skills matter, portfolio/GitHub storytelling, and strategies for career switchers (internships, targeted projects). The episode also addresses hiring without degrees, industry fit for regulated data, and how targeted applications beat spray-and-pray. Listen to learn what to prepare for interviews, how to position projects, and what hiring teams really look for when recruiting data engineering talent in Europe" topics: - data engineering -- career switch +- career transition - career growth +dateadded: 2022-04-30 +date: 2025-11-07 + +duration: PT01H01M05S + +quotableClips: +- name: Episode Opening & Guest Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=0 + endOffset: 75 +- name: Guest Background and Career Path + startOffset: 75 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=75 + endOffset: 192 +- name: Onfido Role & European Hiring Footprint + startOffset: 192 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=192 + endOffset: 220 +- name: 'Roles Recruited: Data, ML & Research Spectrum' + startOffset: 220 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=220 + endOffset: 273 +- name: European Tech Market Differences + startOffset: 273 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=273 + endOffset: 375 +- name: Borderless Recruitment and Competition Dynamics + startOffset: 375 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=375 + endOffset: 419 +- name: 'Episode Focus: Why Data Engineering Matters Now' + startOffset: 419 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=419 + endOffset: 468 +- name: Tech vs Business Balance and Training Gaps + startOffset: 468 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=468 + endOffset: 666 +- name: Data Science Misconceptions and Data Quality Dependence + startOffset: 666 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=666 + endOffset: 794 +- name: Rising Demand for Data Engineering and Modern Tooling + startOffset: 794 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=794 + endOffset: 960 +- name: 'Recruiter Technical Literacy: Big-Picture Knowledge' + startOffset: 960 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=960 + endOffset: 1127 +- name: 'Hiring Challenges: Titles, Experience Mismatch, Demand' + startOffset: 1127 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1127 + endOffset: 1257 +- name: Evaluating Transferable Experience from Software/BI Roles + startOffset: 1257 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1257 + endOffset: 1375 +- name: 'Expectations by Level: Junior → Senior Responsibilities' + startOffset: 1375 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1375 + endOffset: 1598 +- name: Typical Interview Process and Level-Based Assessments + startOffset: 1598 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1598 + endOffset: 1839 +- name: 'Career Switchers: Internships, Projects, and Focused Skills' + startOffset: 1839 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1839 + endOffset: 1876 +- name: 'Resume Essentials: SQL, Python, Problems & Outcomes' + startOffset: 1876 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1876 + endOffset: 2107 +- name: 'Transition Strategy: Team Structure and Role Selection' + startOffset: 2107 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2107 + endOffset: 2381 +- name: 'Cloud Fundamentals: Tool-Agnostic Conceptual Knowledge' + startOffset: 2381 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2381 + endOffset: 2510 +- name: 'Infrastructure & DevOps Skills: When They Matter' + startOffset: 2510 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2510 + endOffset: 2675 +- name: 'Interview Prep: Research Company and Explain Projects Clearly' + startOffset: 2675 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2675 + endOffset: 2893 +- name: Targeted Applications vs. Spray-and-Pray Approach + startOffset: 2893 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2893 + endOffset: 3045 +- name: 'Hiring Without Degrees: Skills, Projects, Continuous Learning' + startOffset: 3045 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3045 + endOffset: 3265 +- name: 'Standout Project Examples: First Pipelines & Privacy Work' + startOffset: 3265 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3265 + endOffset: 3353 +- name: 'Portfolio & GitHub: Shareable Work and Storytelling' + startOffset: 3353 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3353 + endOffset: 3485 +- name: 'Industry Fit: Domain Knowledge for Regulated Data' + startOffset: 3485 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3485 + endOffset: 3659 +- name: 'Follow-up Resources: Webinars and Further Reading' + startOffset: 3659 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3659 + endOffset: 3698 +- name: Episode Close and Final Tips + startOffset: 3698 + url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3698 + endOffset: 3712 + transcript: - header: Episode Opening & Guest Welcome - line: This week, we'll talk about recruiting data engineers. We have a special guest @@ -150,8 +251,8 @@ transcript: sec: 419 time: '6:59' who: Alexey -- line: We're talking about hiring data engineers, but I also wanted to ask you - – you are recruiting for a wide range of positions ML engineers, data scientists, +- line: We're talking about hiring data engineers, but I also wanted to ask you – + you are recruiting for a wide range of positions ML engineers, data scientists, data analysts, data engineers – in your opinion, what is the main difference between hiring data scientists and data engineers? sec: 419 @@ -862,121 +963,6 @@ transcript: sec: 3712 time: '1:01:52' who: Nicolas -dateadded: '2022-04-30' -duration: PT01H01M05S -quotableClips: -- name: Episode Opening & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=0 - endOffset: 75 -- name: Guest Background and Career Path - startOffset: 75 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=75 - endOffset: 192 -- name: Onfido Role & European Hiring Footprint - startOffset: 192 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=192 - endOffset: 220 -- name: 'Roles Recruited: Data, ML & Research Spectrum' - startOffset: 220 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=220 - endOffset: 273 -- name: European Tech Market Differences - startOffset: 273 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=273 - endOffset: 375 -- name: Borderless Recruitment and Competition Dynamics - startOffset: 375 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=375 - endOffset: 419 -- name: 'Episode Focus: Why Data Engineering Matters Now' - startOffset: 419 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=419 - endOffset: 468 -- name: Tech vs Business Balance and Training Gaps - startOffset: 468 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=468 - endOffset: 666 -- name: Data Science Misconceptions and Data Quality Dependence - startOffset: 666 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=666 - endOffset: 794 -- name: Rising Demand for Data Engineering and Modern Tooling - startOffset: 794 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=794 - endOffset: 960 -- name: 'Recruiter Technical Literacy: Big-Picture Knowledge' - startOffset: 960 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=960 - endOffset: 1127 -- name: 'Hiring Challenges: Titles, Experience Mismatch, Demand' - startOffset: 1127 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1127 - endOffset: 1257 -- name: Evaluating Transferable Experience from Software/BI Roles - startOffset: 1257 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1257 - endOffset: 1375 -- name: 'Expectations by Level: Junior → Senior Responsibilities' - startOffset: 1375 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1375 - endOffset: 1598 -- name: Typical Interview Process and Level-Based Assessments - startOffset: 1598 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1598 - endOffset: 1839 -- name: 'Career Switchers: Internships, Projects, and Focused Skills' - startOffset: 1839 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1839 - endOffset: 1876 -- name: 'Resume Essentials: SQL, Python, Problems & Outcomes' - startOffset: 1876 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=1876 - endOffset: 2107 -- name: 'Transition Strategy: Team Structure and Role Selection' - startOffset: 2107 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2107 - endOffset: 2381 -- name: 'Cloud Fundamentals: Tool-Agnostic Conceptual Knowledge' - startOffset: 2381 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2381 - endOffset: 2510 -- name: 'Infrastructure & DevOps Skills: When They Matter' - startOffset: 2510 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2510 - endOffset: 2675 -- name: 'Interview Prep: Research Company and Explain Projects Clearly' - startOffset: 2675 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2675 - endOffset: 2893 -- name: Targeted Applications vs. Spray-and-Pray Approach - startOffset: 2893 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=2893 - endOffset: 3045 -- name: 'Hiring Without Degrees: Skills, Projects, Continuous Learning' - startOffset: 3045 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3045 - endOffset: 3265 -- name: 'Standout Project Examples: First Pipelines & Privacy Work' - startOffset: 3265 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3265 - endOffset: 3353 -- name: 'Portfolio & GitHub: Shareable Work and Storytelling' - startOffset: 3353 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3353 - endOffset: 3485 -- name: 'Industry Fit: Domain Knowledge for Regulated Data' - startOffset: 3485 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3485 - endOffset: 3659 -- name: 'Follow-up Resources: Webinars and Further Reading' - startOffset: 3659 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3659 - endOffset: 3698 -- name: Episode Close and Final Tips - startOffset: 3698 - url: https://www.youtube.com/watch?v=hylxiu4VGTo&t=3698 - endOffset: 3712 --- Links: diff --git a/_podcast/s09e09-hiring-data-science-talent.md b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md similarity index 97% rename from _podcast/s09e09-hiring-data-science-talent.md rename to _podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md index 79e75209..5a6e6598 100644 --- a/_podcast/s09e09-hiring-data-science-talent.md +++ b/_podcast/hiring-for-data-science-jobs-interview-questions-skills.md.md @@ -1,20 +1,117 @@ --- +title: "How to Hire Data Scientists: Interview Questions, MLOps, AutoML Limits & Inclusive Hiring" +short: "Hiring Data Science Talent" +season: 9 episode: 9 guests: - olgaivina +image: images/podcast/hiring-for-data-science-jobs-interview-questions-skills.md.jpg ids: anchor: Hiring-Data-Science-Talent---Olga-Ivina-e1l4aku youtube: Af9t9r2b0z0 -image: images/podcast/s09e09-hiring-data-science-talent.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Hiring-Data-Science-Talent---Olga-Ivina-e1l4aku apple: https://podcasts.apple.com/us/podcast/hiring-data-science-talent-olga-ivina/id1541710331?i=1000570846380 spotify: https://open.spotify.com/episode/7ddvA9zNTip5Bt6EYnMNty?si=4fee84a6ad43465d youtube: https://www.youtube.com/watch?v=Af9t9r2b0z0 -season: 9 -short: Hiring Data Science Talent -title: 'How to Hire Data Scientists: Interview Questions, MLOps, AutoML Limits & Inclusive - Hiring' + +description: "Learn to hire data scientists: interview questions, MLOps insights and inclusive hiring tactics to assess technical depth, AutoML limits and build better teams." +intro: "How do you hire the right data scientists today—balancing algorithmic depth, MLOps skills, and inclusive hiring practices? In this episode, Olga Ivina, Delivery Data Science Director at Microsoft and former Deloitte consultant with a Ph.D. and 16+ years in AI, walks through practical strategies for recruiting strong data science talent.

Olga draws on her journey from applied mathematics and air pollution research to leading delivery teams to explain core hiring criteria: technical excellence, growth mindset, communication, and humility. We cover concrete interview questions and diagnostic problems that reveal algorithmic understanding and assumptions, how to structure coding and analytical tasks, and ways to assess role fit between mathematical expertise and engineering skills. The conversation also addresses the rise of MLOps, realistic limits of AutoML and the human-in-the-loop, career path trade-offs, and interviewing candidates with employment gaps.

If you’re hiring data scientists or building interview processes, this episode delivers actionable frameworks, sample diagnostic questions, and inclusive hiring tips—language to avoid in job posts and strategies to attract diverse candidates—so you can evaluate both technical depth and practical delivery capability." +topics: +- data science +- career growth +- hiring +- MLOps +dateadded: 2022-07-22 + +duration: PT00H58M12S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=0 + endOffset: 104 +- name: 'Career Beginnings: Applied mathematics, forecasting, and consulting' + startOffset: 104 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=104 + endOffset: 385 +- name: 'PhD Research: Air pollution modeling and conformal prediction' + startOffset: 385 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=385 + endOffset: 510 +- name: 'Current Role: Leading delivery data science teams and startup support' + startOffset: 510 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=510 + endOffset: 638 +- name: 'Evolution of Data Science: Skill changes and rise of MLOps' + startOffset: 638 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=638 + endOffset: 889 +- name: 'Core Hiring Criteria: Technical excellence and growth mindset' + startOffset: 889 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=889 + endOffset: 945 +- name: 'Technical Depth: Demonstrating algorithmic understanding and assumptions' + startOffset: 945 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=945 + endOffset: 1083 +- name: 'Attitude & Motivation: Assessing passion, humility, and communication' + startOffset: 1083 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1083 + endOffset: 1216 +- name: 'Podcasting as Learning: Conversations that shape career perspectives' + startOffset: 1216 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1216 + endOffset: 1381 +- name: 'Staying Current: Sources for data science and engineering updates' + startOffset: 1381 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1381 + endOffset: 1521 +- name: 'Technical Interviews: Coding, analytical tasks, and objective criteria' + startOffset: 1521 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1521 + endOffset: 1712 +- name: 'Diagnostic Questions: Sample problems that reveal depth of knowledge' + startOffset: 1712 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1712 + endOffset: 1875 +- name: 'Foundational Skills: Descriptive statistics and recommended reading' + startOffset: 1875 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1875 + endOffset: 1952 +- name: 'Role Fit: Hiring for mathematical expertise versus engineering skills' + startOffset: 1952 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1952 + endOffset: 2264 +- name: 'AutoML & Automation: Limits of AutoML and the human-in-the-loop' + startOffset: 2264 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2264 + endOffset: 2529 +- name: 'Career Paths: Individual contributor vs management trade-offs' + startOffset: 2529 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2529 + endOffset: 2737 +- name: 'Career Transition: From data analyst to data scientist' + startOffset: 2737 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2737 + endOffset: 2826 +- name: 'Diversity Hiring: Strategies to attract female data science talent' + startOffset: 2826 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2826 + endOffset: 3233 +- name: 'Inclusive Job Posts: Language, requirements, and avoiding discouraging wording' + startOffset: 3233 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3233 + endOffset: 3391 +- name: 'Employment Gaps: Evaluating candidates with long CV breaks' + startOffset: 3391 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3391 + endOffset: 3553 +- name: Episode Wrap-up & Resources + startOffset: 3553 + url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3553 + endOffset: 3492 + transcript: - header: Episode Introduction - header: 'Career Beginnings: Applied mathematics, forecasting, and consulting' @@ -1269,112 +1366,6 @@ transcript: sec: 3596 time: '59:56' who: Olga -description: 'Learn to hire data scientists: interview questions, MLOps insights and - inclusive hiring tactics to assess technical depth, AutoML limits and build better - teams.' -intro: 'How do you hire the right data scientists today—balancing algorithmic depth, - MLOps skills, and inclusive hiring practices? In this episode, Olga Ivina, Delivery - Data Science Director at Microsoft and former Deloitte consultant with a Ph.D. and - 16+ years in AI, walks through practical strategies for recruiting strong data science - talent.

Olga draws on her journey from applied mathematics and air pollution - research to leading delivery teams to explain core hiring criteria: technical excellence, - growth mindset, communication, and humility. We cover concrete interview questions - and diagnostic problems that reveal algorithmic understanding and assumptions, how - to structure coding and analytical tasks, and ways to assess role fit between mathematical - expertise and engineering skills. The conversation also addresses the rise of MLOps, - realistic limits of AutoML and the human-in-the-loop, career path trade-offs, and - interviewing candidates with employment gaps.

If you’re hiring data scientists - or building interview processes, this episode delivers actionable frameworks, sample - diagnostic questions, and inclusive hiring tips—language to avoid in job posts and - strategies to attract diverse candidates—so you can evaluate both technical depth - and practical delivery capability.' -dateadded: '2022-07-22' -duration: PT00H58M12S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=0 - endOffset: 104 -- name: 'Career Beginnings: Applied mathematics, forecasting, and consulting' - startOffset: 104 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=104 - endOffset: 385 -- name: 'PhD Research: Air pollution modeling and conformal prediction' - startOffset: 385 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=385 - endOffset: 510 -- name: 'Current Role: Leading delivery data science teams and startup support' - startOffset: 510 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=510 - endOffset: 638 -- name: 'Evolution of Data Science: Skill changes and rise of MLOps' - startOffset: 638 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=638 - endOffset: 889 -- name: 'Core Hiring Criteria: Technical excellence and growth mindset' - startOffset: 889 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=889 - endOffset: 945 -- name: 'Technical Depth: Demonstrating algorithmic understanding and assumptions' - startOffset: 945 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=945 - endOffset: 1083 -- name: 'Attitude & Motivation: Assessing passion, humility, and communication' - startOffset: 1083 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1083 - endOffset: 1216 -- name: 'Podcasting as Learning: Conversations that shape career perspectives' - startOffset: 1216 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1216 - endOffset: 1381 -- name: 'Staying Current: Sources for data science and engineering updates' - startOffset: 1381 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1381 - endOffset: 1521 -- name: 'Technical Interviews: Coding, analytical tasks, and objective criteria' - startOffset: 1521 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1521 - endOffset: 1712 -- name: 'Diagnostic Questions: Sample problems that reveal depth of knowledge' - startOffset: 1712 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1712 - endOffset: 1875 -- name: 'Foundational Skills: Descriptive statistics and recommended reading' - startOffset: 1875 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1875 - endOffset: 1952 -- name: 'Role Fit: Hiring for mathematical expertise versus engineering skills' - startOffset: 1952 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=1952 - endOffset: 2264 -- name: 'AutoML & Automation: Limits of AutoML and the human-in-the-loop' - startOffset: 2264 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2264 - endOffset: 2529 -- name: 'Career Paths: Individual contributor vs management trade-offs' - startOffset: 2529 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2529 - endOffset: 2737 -- name: 'Career Transition: From data analyst to data scientist' - startOffset: 2737 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2737 - endOffset: 2826 -- name: 'Diversity Hiring: Strategies to attract female data science talent' - startOffset: 2826 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=2826 - endOffset: 3233 -- name: 'Inclusive Job Posts: Language, requirements, and avoiding discouraging wording' - startOffset: 3233 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3233 - endOffset: 3391 -- name: 'Employment Gaps: Evaluating candidates with long CV breaks' - startOffset: 3391 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3391 - endOffset: 3553 -- name: Episode Wrap-up & Resources - startOffset: 3553 - url: https://www.youtube.com/watch?v=Af9t9r2b0z0&t=3553 - endOffset: 3492 --- Links: diff --git a/_podcast/s09e05-data-scientists-at-work.md b/_podcast/how-to-break-into-data-science.md similarity index 96% rename from _podcast/s09e05-data-scientists-at-work.md rename to _podcast/how-to-break-into-data-science.md index f0f60d69..f9a36de9 100644 --- a/_podcast/s09e05-data-scientists-at-work.md +++ b/_podcast/how-to-break-into-data-science.md @@ -1,20 +1,124 @@ --- +title: "Data Science Career Playbook: Job Hunt, Portfolios, DALL·E 2 & Overcoming FOMO" +short: "Data Scientists at Work" +season: 9 episode: 5 guests: - misraturp +image: images/podcast/how-to-break-into-data-science.jpg ids: anchor: Data-Scientists-at-Work---Msra-Turp-e1k7pbn youtube: oUycqtMoYr8 -image: images/podcast/s09e05-data-scientists-at-work.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Data-Scientists-at-Work---Msra-Turp-e1k7pbn apple: https://podcasts.apple.com/us/podcast/data-scientists-at-work-m%C4%B1sra-turp/id1541710331?i=1000567625873 spotify: https://open.spotify.com/episode/1RSUsWDOBDD4sNDruEbnEY youtube: https://www.youtube.com/watch?v=oUycqtMoYr8 -season: 9 -short: Data Scientists at Work -title: 'Data Science Career Playbook: Job Hunt, Portfolios, DALL·E 2 & Overcoming - FOMO' + +description: "Master data science job hunt and portfolio tactics: actionable projects, recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster." +intro: "How do you actually break into data science, build a portfolio that gets interviews, and stay sane while every new AI model vies for your attention? In this episode Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI (founder of “So you want to be a data scientist?”) — walks through a practical career playbook for job hunting, portfolio building, and coping with FOMO and imposter syndrome.

We cover Mısra’s career path from big data engineering to developer advocacy, what a data scientist’s day-to-day looks like, and the typical deliverables hiring managers expect (models, pipelines, reports, presentations). She explains role variants (consultant, in-house, freelance), tradeoffs between generalist and specialist tracks, and when a master’s or PhD matters. You’ll get concrete job-hunt tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why real-world datasets (like NYC Open Data) matter. The episode also includes a clear, high-level overview of DALL·E 2 and diffusion models, plus strategies for staying current (conferences vs social media) and knowing when a new framework is “good enough.”

Listen to learn actionable steps to refine your portfolio, present data science value to stakeholders, and manage FOMO while advancing your career" +topics: +- data science +- career growth +- job search +dateadded: 2022-06-25 + +duration: PT01H04M12S + +quotableClips: +- name: Episode Introduction + startOffset: 67 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=67 + endOffset: 117 +- name: 'Misra Career Path: From Big Data Engineering to Content Creator' + startOffset: 117 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=117 + endOffset: 251 +- name: Transition to Developer Advocate and Content Work + startOffset: 251 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=251 + endOffset: 389 +- name: 'Data Scientist Day-to-Day: Explaining the Role to Non-Tech Audiences' + startOffset: 389 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=389 + endOffset: 541 +- name: 'Deliverables: Trained Models, Pipelines, Reports, and Presentations' + startOffset: 541 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=541 + endOffset: 658 +- name: 'Role Variants: Consultant, In-House, and Freelance Responsibilities' + startOffset: 658 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=658 + endOffset: 849 +- name: Unrealistic Expectations of Data Scientists in Industry + startOffset: 849 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=849 + endOffset: 943 +- name: 'Keeping Current with AI: Managing FOMA (Fear of Missing Out)' + startOffset: 943 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=943 + endOffset: 1221 +- name: 'DALL·E 2 Overview: Text-to-Image Capabilities' + startOffset: 1221 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1221 + endOffset: 1301 +- name: 'Diffusion Models: High-Level Explanation' + startOffset: 1301 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1301 + endOffset: 1659 +- name: 'Staying Updated: Value of Industry Conferences over Social Media' + startOffset: 1659 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1659 + endOffset: 1811 +- name: 'Major Challenge: Communicating Data Science Value to Stakeholders' + startOffset: 1811 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1811 + endOffset: 2131 +- name: 'FOMA and Imposter Syndrome: Causes and Coping Strategies' + startOffset: 2131 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2131 + endOffset: 2412 +- name: 'Learning a New Framework: Knowing When It''s "Good Enough" + startOffset: 2412 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2412 + endOffset: 2567 +- name: 'Preferred Setup: Advantages of In-House Data Science Roles' + startOffset: 2567 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2567 + endOffset: 2853 +- name: 'Career Tradeoffs: Generalist Versus Specialist Paths' + startOffset: 2853 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2853 + endOffset: 3032 +- name: 'Breaking In: Job-Hunting Strategies for Entry-Level Data Scientists' + startOffset: 3032 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3032 + endOffset: 3271 +- name: 'Catching Recruiter Attention: Research, Questions, and Relevant Projects' + startOffset: 3271 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3271 + endOffset: 3429 +- name: 'Portfolio Projects: What Hiring Managers Really Look For' + startOffset: 3429 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3429 + endOffset: 3494 +- name: 'Real-World Datasets: Using NYC Open Data and Dirty Data Examples' + startOffset: 3494 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3494 + endOffset: 3702 +- name: 'Degrees vs Experience: When a Master''s or PhD Matters' + startOffset: 3702 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3702 + endOffset: 3868 +- name: Where to Find Misra Online and Recommended Resources + startOffset: 3868 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3868 + endOffset: 3903 +- name: Episode Closing and Further Links + startOffset: 3903 + url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3903 + endOffset: 3852 + transcript: - header: Episode Introduction - line: Hi, everyone. This week, we'll talk about the work of data scientists and @@ -133,7 +237,7 @@ transcript: sec: 375 time: '6:15' who: Misra -- header: 'Data Scientist Day‑to‑Day: Explaining the Role to Non‑Tech Audiences' +- header: 'Data Scientist Day-to-Day: Explaining the Role to Non-Tech Audiences' - line: Well, coming back to our main topic – data scientists at work. Maybe there are some parallels to data developer advocates, but you were a data scientist quite recently. Coming back to this topic, imagine you're a data scientist, not @@ -226,7 +330,7 @@ transcript: sec: 541 time: '9:01' who: Misra -- header: 'Role Variants: Consultant, In‑House, and Freelance Responsibilities' +- header: 'Role Variants: Consultant, In-House, and Freelance Responsibilities' - line: Was this something that you did at IBM, or is this something that maybe consultants tend to do more often? Like creating presentations and reports? sec: 658 @@ -411,7 +515,7 @@ transcript: sec: 1139 time: '18:59' who: Misra -- header: 'DALL·E 2 Overview: Text‑to‑Image Capabilities' +- header: 'DALL·E 2 Overview: Text-to-Image Capabilities' - line: How does DALL·E work? Maybe you can tell us in a few sentences for those who have the fear of missing out. Because I do. I see these awesome pictures. I think the way it works, as a black box, you give it some prompt like a piece of text, @@ -437,7 +541,7 @@ transcript: sec: 1241 time: '20:41' who: Misra -- header: 'Diffusion Models: High‑Level Explanation' +- header: 'Diffusion Models: High-Level Explanation' - line: Yeah. Well, like it cannot imagine [audio cuts out] how it works. Like all these formulas that are there – they're just scary. I cannot imagine what it looks like for things like DALL·E like must be insane. @@ -789,7 +893,7 @@ transcript: sec: 2301 time: '38:21' who: Misra -- header: 'Learning a New Framework: Knowing When It''s "Good Enough"' +- header: 'Learning a New Framework: Knowing When It''s "Good Enough" - line: Let's say there is a new framework. You think this framework is useful so decide to pick it up and learn it a little bit better. We don't want to learn it perfectly – we know that this is not going to be a great way of spending our @@ -837,7 +941,7 @@ transcript: sec: 2563 time: '42:43' who: Misra -- header: 'Preferred Setup: Advantages of In‑House Data Science Roles' +- header: 'Preferred Setup: Advantages of In-House Data Science Roles' - line: Okay, I see that we have quite a lot of questions. I also prepared questions for you, but I think it's better to go through the questions. The first question is about the types of data scientists and this is something we talked about at @@ -1006,7 +1110,7 @@ transcript: sec: 3029 time: '50:29' who: Misra -- header: 'Breaking In: Job‑Hunting Strategies for Entry‑Level Data Scientists' +- header: 'Breaking In: Job-Hunting Strategies for Entry-Level Data Scientists' - line: Okay. For newer data scientists – from courses like yours or boot camps – how would you suggest they break into the oversaturated market for entry-level data scientists? @@ -1151,7 +1255,7 @@ transcript: sec: 3429 time: '57:09' who: Alexey -- header: 'Real‑World Datasets: Using NYC Open Data and Dirty Data Examples' +- header: 'Real-World Datasets: Using NYC Open Data and Dirty Data Examples' - line: Yeah, I think that's a good point. Obviously, people are doing the same projects. But I think with these projects, what you're trying to show is not how great of a model that you're building. The model you build might suck and that's fine, @@ -1301,119 +1405,6 @@ transcript: sec: 3919 time: '1:05:19' who: Misra -description: 'Master data science job hunt and portfolio tactics: actionable projects, - recruiter tips, DALL·E 2 basics and FOMO coping strategies to land interviews faster.' -intro: How do you actually break into data science, build a portfolio that gets interviews, - and stay sane while every new AI model vies for your attention? In this episode - Mısra Turp — data scientist, content creator, and developer advocate at AssemblyAI - (founder of “So you want to be a data scientist?”) — walks through a practical career - playbook for job hunting, portfolio building, and coping with FOMO and imposter - syndrome.

We cover Mısra’s career path from big data engineering to developer - advocacy, what a data scientist’s day‑to‑day looks like, and the typical deliverables - hiring managers expect (models, pipelines, reports, presentations). She explains - role variants (consultant, in‑house, freelance), tradeoffs between generalist and - specialist tracks, and when a master’s or PhD matters. You’ll get concrete job‑hunt - tactics—how to catch a recruiter’s eye, which portfolio projects resonate, and why - real‑world datasets (like NYC Open Data) matter. The episode also includes a clear, - high‑level overview of DALL·E 2 and diffusion models, plus strategies for staying - current (conferences vs social media) and knowing when a new framework is “good - enough.”

Listen to learn actionable steps to refine your portfolio, present - data science value to stakeholders, and manage FOMO while advancing your career. -dateadded: '2022-06-25' -duration: PT01H04M12S -quotableClips: -- name: Episode Introduction - startOffset: 67 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=67 - endOffset: 117 -- name: 'Misra Career Path: From Big Data Engineering to Content Creator' - startOffset: 117 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=117 - endOffset: 251 -- name: Transition to Developer Advocate and Content Work - startOffset: 251 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=251 - endOffset: 389 -- name: 'Data Scientist Day‑to‑Day: Explaining the Role to Non‑Tech Audiences' - startOffset: 389 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=389 - endOffset: 541 -- name: 'Deliverables: Trained Models, Pipelines, Reports, and Presentations' - startOffset: 541 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=541 - endOffset: 658 -- name: 'Role Variants: Consultant, In‑House, and Freelance Responsibilities' - startOffset: 658 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=658 - endOffset: 849 -- name: Unrealistic Expectations of Data Scientists in Industry - startOffset: 849 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=849 - endOffset: 943 -- name: 'Keeping Current with AI: Managing FOMA (Fear of Missing Out)' - startOffset: 943 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=943 - endOffset: 1221 -- name: 'DALL·E 2 Overview: Text‑to‑Image Capabilities' - startOffset: 1221 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1221 - endOffset: 1301 -- name: 'Diffusion Models: High‑Level Explanation' - startOffset: 1301 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1301 - endOffset: 1659 -- name: 'Staying Updated: Value of Industry Conferences over Social Media' - startOffset: 1659 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1659 - endOffset: 1811 -- name: 'Major Challenge: Communicating Data Science Value to Stakeholders' - startOffset: 1811 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=1811 - endOffset: 2131 -- name: 'FOMA and Imposter Syndrome: Causes and Coping Strategies' - startOffset: 2131 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2131 - endOffset: 2412 -- name: 'Learning a New Framework: Knowing When It''s "Good Enough"' - startOffset: 2412 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2412 - endOffset: 2567 -- name: 'Preferred Setup: Advantages of In‑House Data Science Roles' - startOffset: 2567 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2567 - endOffset: 2853 -- name: 'Career Tradeoffs: Generalist Versus Specialist Paths' - startOffset: 2853 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=2853 - endOffset: 3032 -- name: 'Breaking In: Job‑Hunting Strategies for Entry‑Level Data Scientists' - startOffset: 3032 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3032 - endOffset: 3271 -- name: 'Catching Recruiter Attention: Research, Questions, and Relevant Projects' - startOffset: 3271 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3271 - endOffset: 3429 -- name: 'Portfolio Projects: What Hiring Managers Really Look For' - startOffset: 3429 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3429 - endOffset: 3494 -- name: 'Real‑World Datasets: Using NYC Open Data and Dirty Data Examples' - startOffset: 3494 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3494 - endOffset: 3702 -- name: 'Degrees vs Experience: When a Master''s or PhD Matters' - startOffset: 3702 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3702 - endOffset: 3868 -- name: Where to Find Misra Online and Recommended Resources - startOffset: 3868 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3868 - endOffset: 3903 -- name: Episode Closing and Further Links - startOffset: 3903 - url: https://www.youtube.com/watch?v=oUycqtMoYr8&t=3903 - endOffset: 3852 --- Links: diff --git a/_podcast/s12e07-navigating-career-changes-in-machine-learning.md b/_podcast/how-to-grow-your-ml-engineering-career.md similarity index 96% rename from _podcast/s12e07-navigating-career-changes-in-machine-learning.md rename to _podcast/how-to-grow-your-ml-engineering-career.md index 40f58e58..ffc43d49 100644 --- a/_podcast/s12e07-navigating-career-changes-in-machine-learning.md +++ b/_podcast/how-to-grow-your-ml-engineering-career.md @@ -1,20 +1,126 @@ --- +title: "How to Grow Your ML Engineering Career: Platform Work, LLM Workflows & Debugging Skills" +short: "How to Grow Your ML Engineering Career" +season: 12 episode: 7 guests: - krzysztofszafanek +image: images/podcast/how-to-grow-your-ml-engineering-career.jpg ids: anchor: Navigating-Career-Changes-in-Machine-Learning---Chris-Szafranek-e1ucvn2 youtube: cUxZBXQgZaU -image: images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Navigating-Career-Changes-in-Machine-Learning---Chris-Szafranek-e1ucvn2 apple: https://podcasts.apple.com/us/podcast/navigating-career-changes-in-machine-learning-chris/id1541710331?i=1000597921745 spotify: https://open.spotify.com/episode/1mDlJi7vfLeJgIZStQ4G90?si=Spd04VwmSh2zZCgZzLIPbA youtube: https://www.youtube.com/watch?v=cUxZBXQgZaU -season: 12 -short: Navigating Career Changes in Machine Learning -title: 'From Web and Game Dev to ML Platforms and LLMs: Career Transitions, Prompt - Engineering and Debugging' + +description: "Discover career transitions into ML, prompt engineering and LLMs—practical debugging tips, transferable skills, hiring insights, and real platform lessons" +intro: "How do you move from web and game development into building machine learning platforms and working with LLMs—and what practical skills carry over? In this episode Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, gaming and online retail, and currently an ML Platform engineer and internal consultant at Zalando, answers that question through concrete examples and career lessons.

We trace Krzysztof’s path from HTML5, Objective-C, Swift and Unity to Python, ML platform work (the zflow library and pipeline architecture), and hands-on experiments with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions between stacks and roles, platform consulting—training, onboarding and user support—prompt engineering tips, debugging strategies (rubber ducking, divide-and-conquer), and a real Postgres optimization troubleshooting case. He also discusses transferable skills like SQL, Git and shell, T-shaped expertise, hiring dynamics, and how to get unstuck with ChatGPT and problem decomposition.

Listen to gain practical guidance on ML platforms, prompt engineering, debugging techniques, and career strategy for transitioning into ML and LLM work—plus actionable resources and prioritization tactics you can apply immediately" +topics: +- machine learning +- career transitions +- LLMs +- hiring +- career strategy +dateadded: 2023-02-04 + +duration: PT00H59M42S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=0 + endOffset: 132 +- name: 'Career Overview: Web, Game Development, and Python' + startOffset: 132 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=132 + endOffset: 384 +- name: 'Mobile & Game Development: HTML5, Objective-C, Swift, and Unity' + startOffset: 384 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=384 + endOffset: 425 +- name: 'Career Transitions: Adapting Between Stacks and Roles' + startOffset: 425 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=425 + endOffset: 606 +- name: Tech Radar & Language Freedom at Zalando + startOffset: 606 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=606 + endOffset: 805 +- name: 'Machine Learning Platform: zflow Library and Pipeline Architecture' + startOffset: 805 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=805 + endOffset: 959 +- name: 'Platform Consulting: Training, Onboarding, and User Support' + startOffset: 959 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=959 + endOffset: 1068 +- name: 'From Engineer to Consultant: Reduced Hands-on Coding' + startOffset: 1068 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1068 + endOffset: 1106 +- name: 'Sabbatical Focus: Learning, Break, and Exploration' + startOffset: 1106 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1106 + endOffset: 1112 +- name: 'Sabbatical Projects: Diffusion Models, ChatGPT Experiments, and Modal Labs' + startOffset: 1112 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1112 + endOffset: 1321 +- name: 'Large Language Models: Coding Assistance, Architecture Sparring, and Caveats' + startOffset: 1321 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1321 + endOffset: 1606 +- name: 'Prompt Engineering: Practical Tips and People to Follow' + startOffset: 1606 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1606 + endOffset: 1740 +- name: 'Transferable Skills: SQL, Git, Shell, and the Lindy Effect' + startOffset: 1740 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1740 + endOffset: 2014 +- name: 'Troubleshooting Example: Postgres Optimization and Performance Gains' + startOffset: 2014 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2014 + endOffset: 2123 +- name: 'T-Shaped Expertise: Depth, Breadth, and Career Strategy' + startOffset: 2123 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2123 + endOffset: 2257 +- name: 'Debugging as a Strength: Rubber Duck, Divide-and-Conquer, and Mentoring' + startOffset: 2257 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2257 + endOffset: 2692 +- name: 'Learning Motivation: Curiosity, Pomodoro, and Inspirational Content' + startOffset: 2692 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2692 + endOffset: 2917 +- name: 'Getting Unstuck: ChatGPT, Tutorials, and Problem Decomposition' + startOffset: 2917 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2917 + endOffset: 3022 +- name: 'Hiring Dynamics: Job Descriptions and Recruiting Generalists' + startOffset: 3022 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3022 + endOffset: 3263 +- name: 'Job Market Trends: Specialists vs Generalists in Machine Learning' + startOffset: 3263 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3263 + endOffset: 3382 +- name: 'Prioritization Techniques: To-Do Lists, Deadlines, and Focus' + startOffset: 3382 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3382 + endOffset: 3491 +- name: 'Learning Resources: Books, Documentation, YouTube, and Practical Learning' + startOffset: 3491 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3491 + endOffset: 3632 +- name: Episode Wrap-up and Key Takeaways + startOffset: 3632 + url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3632 + endOffset: 3582 + transcript: - header: Podcast Introduction - line: This week, we'll talk about quite a few things – building a machine learning @@ -166,7 +272,7 @@ transcript: sec: 374 time: '6:14' who: Alexey -- header: 'Mobile & Game Development: HTML5, Objective‑C, Swift, and Unity' +- header: 'Mobile & Game Development: HTML5, Objective-C, Swift, and Unity' - line: I didn't use anything myself. When I was there, it was a very exciting time because Wooga was trying different technology. We started with HTML5, which was a big thing at the time, around 2012. We built and shipped again, but we decided @@ -394,7 +500,7 @@ transcript: sec: 1057 time: '17:37' who: Alexey -- header: 'From Engineer to Consultant: Reduced Hands‑on Coding' +- header: 'From Engineer to Consultant: Reduced Hands-on Coding' - line: Not so much, I must say. I think this is a bit of a disadvantage that I don't code, especially when it comes to more complex problems – I can't solve them anymore as a consultant, compared to my previous role as a software engineer. I still @@ -748,7 +854,7 @@ transcript: sec: 2114 time: '35:14' who: Alexey -- header: 'T‑Shaped Expertise: Depth, Breadth, and Career Strategy' +- header: 'T-Shaped Expertise: Depth, Breadth, and Career Strategy' - line: I was also thinking about how it affects recruiting and finding a job. Very often I saw that companies look for experts. Sometimes it could be a bit scary, like you see a job ad and you see, “We want five years of experience with large @@ -789,7 +895,7 @@ transcript: sec: 2252 time: '37:32' who: Alexey -- header: 'Debugging as a Strength: Rubber Duck, Divide‑and‑Conquer, and Mentoring' +- header: 'Debugging as a Strength: Rubber Duck, Divide-and-Conquer, and Mentoring' - line: I’m not sure which letter it would be, Maybe an M. For example, I got pretty good at using Git. Very often, I help my colleagues to resolve problems with history, with rebasing, and things like that. I really also like Shell scripting and learning @@ -1121,7 +1227,7 @@ transcript: sec: 3375 time: '56:15' who: Alexey -- header: 'Prioritization Techniques: To‑Do Lists, Deadlines, and Focus' +- header: 'Prioritization Techniques: To-Do Lists, Deadlines, and Focus' - line: What I do is – I'm a huge fan of to-do lists. If you just write down everything that is on your mind – just try to capture it. And then you can prioritize. Then you can also just drop some things off the list. @@ -1208,7 +1314,7 @@ transcript: sec: 3624 time: '1:00:24' who: Alexey -- header: Episode Wrap‑up and Key Takeaways +- header: Episode Wrap-up and Key Takeaways - line: Sure. sec: 3691 time: '1:01:31' @@ -1224,119 +1330,6 @@ transcript: sec: 3652 time: '1:00:52' who: Chris -description: Discover career transitions into ML, prompt engineering and LLMs—practical - debugging tips, transferable skills, hiring insights, and real platform lessons. -intro: How do you move from web and game development into building machine learning - platforms and working with LLMs—and what practical skills carry over? In this episode - Krzysztof Szafanek, a seasoned engineer with 17 years across pharma, geo services, - gaming and online retail, and currently an ML Platform engineer and internal consultant - at Zalando, answers that question through concrete examples and career lessons. -

We trace Krzysztof’s path from HTML5, Objective‑C, Swift and Unity to Python, - ML platform work (the zflow library and pipeline architecture), and hands‑on experiments - with diffusion models, ChatGPT and Modal Labs. Key topics include career transitions - between stacks and roles, platform consulting—training, onboarding and user support—prompt - engineering tips, debugging strategies (rubber ducking, divide‑and‑conquer), and - a real Postgres optimization troubleshooting case. He also discusses transferable - skills like SQL, Git and shell, T‑shaped expertise, hiring dynamics, and how to - get unstuck with ChatGPT and problem decomposition.

Listen to gain practical - guidance on ML platforms, prompt engineering, debugging techniques, and career strategy - for transitioning into ML and LLM work—plus actionable resources and prioritization - tactics you can apply immediately. -dateadded: '2023-02-04' -duration: PT00H59M42S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=0 - endOffset: 132 -- name: 'Career Overview: Web, Game Development, and Python' - startOffset: 132 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=132 - endOffset: 384 -- name: 'Mobile & Game Development: HTML5, Objective‑C, Swift, and Unity' - startOffset: 384 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=384 - endOffset: 425 -- name: 'Career Transitions: Adapting Between Stacks and Roles' - startOffset: 425 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=425 - endOffset: 606 -- name: Tech Radar & Language Freedom at Zalando - startOffset: 606 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=606 - endOffset: 805 -- name: 'Machine Learning Platform: zflow Library and Pipeline Architecture' - startOffset: 805 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=805 - endOffset: 959 -- name: 'Platform Consulting: Training, Onboarding, and User Support' - startOffset: 959 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=959 - endOffset: 1068 -- name: 'From Engineer to Consultant: Reduced Hands‑on Coding' - startOffset: 1068 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1068 - endOffset: 1106 -- name: 'Sabbatical Focus: Learning, Break, and Exploration' - startOffset: 1106 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1106 - endOffset: 1112 -- name: 'Sabbatical Projects: Diffusion Models, ChatGPT Experiments, and Modal Labs' - startOffset: 1112 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1112 - endOffset: 1321 -- name: 'Large Language Models: Coding Assistance, Architecture Sparring, and Caveats' - startOffset: 1321 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1321 - endOffset: 1606 -- name: 'Prompt Engineering: Practical Tips and People to Follow' - startOffset: 1606 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1606 - endOffset: 1740 -- name: 'Transferable Skills: SQL, Git, Shell, and the Lindy Effect' - startOffset: 1740 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=1740 - endOffset: 2014 -- name: 'Troubleshooting Example: Postgres Optimization and Performance Gains' - startOffset: 2014 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2014 - endOffset: 2123 -- name: 'T‑Shaped Expertise: Depth, Breadth, and Career Strategy' - startOffset: 2123 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2123 - endOffset: 2257 -- name: 'Debugging as a Strength: Rubber Duck, Divide‑and‑Conquer, and Mentoring' - startOffset: 2257 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2257 - endOffset: 2692 -- name: 'Learning Motivation: Curiosity, Pomodoro, and Inspirational Content' - startOffset: 2692 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2692 - endOffset: 2917 -- name: 'Getting Unstuck: ChatGPT, Tutorials, and Problem Decomposition' - startOffset: 2917 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=2917 - endOffset: 3022 -- name: 'Hiring Dynamics: Job Descriptions and Recruiting Generalists' - startOffset: 3022 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3022 - endOffset: 3263 -- name: 'Job Market Trends: Specialists vs Generalists in Machine Learning' - startOffset: 3263 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3263 - endOffset: 3382 -- name: 'Prioritization Techniques: To‑Do Lists, Deadlines, and Focus' - startOffset: 3382 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3382 - endOffset: 3491 -- name: 'Learning Resources: Books, Documentation, YouTube, and Practical Learning' - startOffset: 3491 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3491 - endOffset: 3632 -- name: Episode Wrap‑up and Key Takeaways - startOffset: 3632 - url: https://www.youtube.com/watch?v=cUxZBXQgZaU&t=3632 - endOffset: 3582 --- Links: diff --git a/_podcast/s08e02-hacking-your-data-career.md b/_podcast/how-to-stand-out-in-data-science.md similarity index 98% rename from _podcast/s08e02-hacking-your-data-career.md rename to _podcast/how-to-stand-out-in-data-science.md index 157d2ece..58d56e51 100644 --- a/_podcast/s08e02-hacking-your-data-career.md +++ b/_podcast/how-to-stand-out-in-data-science.md @@ -1,35 +1,136 @@ --- +title: "Data Science Career Playbook: Build Unique IoT Portfolios, Explainable AI, OSINT & LinkedIn Growth" +short: "Hacking Your Data Career" +season: 8 episode: 2 guests: - marijnmarkus -intro: In this episode, Marijn Markus—AI Lead and Managing Data Scientist at Capgemini—shares - how to stand out in data science by combining curiosity, courage, and creativity. - From his unconventional background in sociology and criminology, Marijn explains - how diverse teams outperform homogeneous ones, why proactive problem-solving matters, - and how to challenge hierarchy with data-driven insights.

You'll learn - how to build unique portfolio projects (like time series modeling from a coffee - machine), apply OSINT concepts to modern analytics, and grow your visibility through - a thoughtful LinkedIn strategy. -date: 2025-11-07 -topics: -- data science -- career growth +image: images/podcast/how-to-stand-out-in-data-science.jpg ids: anchor: Hacking-Your-Data-Career---Marijn-Markus-e1gijep youtube: RhSg8ill1So -image: images/podcast/s08e02-hacking-your-data-career.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Hacking-Your-Data-Career---Marijn-Markus-e1gijep apple: https://podcasts.apple.com/us/podcast/hacking-your-data-career-marijn-markus/id1541710331?i=1000555977653 spotify: https://open.spotify.com/episode/6oJsS0vhvAQasLNv3IklQ6 youtube: https://www.youtube.com/watch?v=RhSg8ill1So -season: 8 -short: Hacking Your Data Career -title: 'Data Science Career Playbook: Build Unique IoT Portfolios, Explainable AI, - OSINT & LinkedIn Growth' -description: 'Discover proven strategies to stand out in data science: build unique - portfolio projects, master proactive task selection, and grow visibility with expert - LinkedIn tactics.' + +description: "Discover proven strategies to stand out in data science: build unique portfolio projects, master proactive task selection, and grow visibility with expert LinkedIn tactics." +intro: "In this episode, Marijn Markus—AI Lead and Managing Data Scientist at Capgemini—shares how to stand out in data science by combining curiosity, courage, and creativity. From his unconventional background in sociology and criminology, Marijn explains how diverse teams outperform homogeneous ones, why proactive problem-solving matters, and how to challenge hierarchy with data-driven insights.

You'll learn how to build unique portfolio projects (like time series modeling from a coffee machine), apply OSINT concepts to modern analytics, and grow your visibility through a thoughtful LinkedIn strategy" +topics: +- data science +- career growth +dateadded: 2022-04-01 +date: 2025-11-07 + +duration: PT01H02M16S + +quotableClips: +- name: Episode Introduction & Guest Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=0 + endOffset: 70 +- name: 'From Sociology to Data Science: Election Models, Social Media & Crime Research' + startOffset: 70 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=70 + endOffset: 242 +- name: Diverse Backgrounds as a Competitive Advantage in Data Science + startOffset: 242 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=242 + endOffset: 409 +- name: 'Hiring Pitfalls: Keyword-Driven Recruitment and Role Mismatch' + startOffset: 409 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=409 + endOffset: 462 +- name: 'Curriculum Myth: Rejecting Perfection—Double Down on Unique Strengths' + startOffset: 462 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=462 + endOffset: 511 +- name: 'Core Pillars: Statistics, Programming, and Domain Knowledge' + startOffset: 511 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=511 + endOffset: 676 +- name: 'Qualitative Methods & Interviews: Turning Social Science into Value' + startOffset: 676 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=676 + endOffset: 725 +- name: 'Proactive Task Ownership: Choosing High-Impact Assignments' + startOffset: 725 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=725 + endOffset: 1029 +- name: 'Learning on the Job: Growing into Management and Product Roles' + startOffset: 1029 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1029 + endOffset: 1152 +- name: 'Explainable AI & Risky Insights: Communicating Sensitive Findings' + startOffset: 1152 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1152 + endOffset: 1405 +- name: 'Constructive Pushback: Advising Seniors and Challenging Hierarchies' + startOffset: 1405 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1405 + endOffset: 1703 +- name: 'Stretch Assignments: Bite Off More to Discover Your Limits' + startOffset: 1703 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1703 + endOffset: 1847 +- name: 'Home Automation Demo: Home Assistant on Raspberry Pi' + startOffset: 1847 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1847 + endOffset: 1878 +- name: 'IoT for Plants: Sensors, Bluetooth, Zigbee and Practical Monitoring' + startOffset: 1878 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1878 + endOffset: 2071 +- name: 'Data Pipelines for Home Projects: Storage, Thresholds, and Alerts' + startOffset: 2071 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2071 + endOffset: 2181 +- name: 'Coffee Machine Time Series: Turning Laziness into a Portfolio Project' + startOffset: 2181 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2181 + endOffset: 2269 +- name: 'Portfolio Strategy: Build Unique Projects Instead of Only Doing Kaggle' + startOffset: 2269 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2269 + endOffset: 2425 +- name: 'NGO Impact Work: Predicting & Optimizing Smallholder Farmer Yields' + startOffset: 2425 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2425 + endOffset: 2588 +- name: 'Team Composition: Using Domain Experts and Cross-Disciplinary Skills' + startOffset: 2588 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2588 + endOffset: 2759 +- name: 'OSINT Explained: Social Media, GPS Intel and Modern Information Warfare' + startOffset: 2759 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2759 + endOffset: 3053 +- name: 'OSINT Applications: Mapping Reports, Evidence Gathering & Task Forces' + startOffset: 3053 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3053 + endOffset: 3151 +- name: 'Global Ripple Effects: Ukraine, Grain Shortages and Food Security' + startOffset: 3151 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3151 + endOffset: 3214 +- name: 'Soft Skills & Differentiation: Communication, Presence and Niche Expertise' + startOffset: 3214 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3214 + endOffset: 3450 +- name: 'LinkedIn Growth Strategy: Timing, Content Mix, Hashtags and Comments' + startOffset: 3450 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3450 + endOffset: 3744 +- name: 'Personal Branding Examples: Memes, Authenticity and Content Types' + startOffset: 3744 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3744 + endOffset: 3777 +- name: Episode Wrap-Up & Where to Find Marijn + startOffset: 3777 + url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3777 + endOffset: 3736 + transcript: - header: Episode Introduction & Guest Welcome - header: 'From Sociology to Data Science: Election Models, Social Media & Crime Research' @@ -1221,111 +1322,4 @@ transcript: sec: 3806 time: '1:03:26' who: Marijn -dateadded: '2022-04-01' -duration: PT01H02M16S -quotableClips: -- name: Episode Introduction & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=0 - endOffset: 70 -- name: 'From Sociology to Data Science: Election Models, Social Media & Crime Research' - startOffset: 70 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=70 - endOffset: 242 -- name: Diverse Backgrounds as a Competitive Advantage in Data Science - startOffset: 242 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=242 - endOffset: 409 -- name: 'Hiring Pitfalls: Keyword-Driven Recruitment and Role Mismatch' - startOffset: 409 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=409 - endOffset: 462 -- name: 'Curriculum Myth: Rejecting Perfection—Double Down on Unique Strengths' - startOffset: 462 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=462 - endOffset: 511 -- name: 'Core Pillars: Statistics, Programming, and Domain Knowledge' - startOffset: 511 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=511 - endOffset: 676 -- name: 'Qualitative Methods & Interviews: Turning Social Science into Value' - startOffset: 676 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=676 - endOffset: 725 -- name: 'Proactive Task Ownership: Choosing High-Impact Assignments' - startOffset: 725 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=725 - endOffset: 1029 -- name: 'Learning on the Job: Growing into Management and Product Roles' - startOffset: 1029 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1029 - endOffset: 1152 -- name: 'Explainable AI & Risky Insights: Communicating Sensitive Findings' - startOffset: 1152 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1152 - endOffset: 1405 -- name: 'Constructive Pushback: Advising Seniors and Challenging Hierarchies' - startOffset: 1405 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1405 - endOffset: 1703 -- name: 'Stretch Assignments: Bite Off More to Discover Your Limits' - startOffset: 1703 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1703 - endOffset: 1847 -- name: 'Home Automation Demo: Home Assistant on Raspberry Pi' - startOffset: 1847 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1847 - endOffset: 1878 -- name: 'IoT for Plants: Sensors, Bluetooth, Zigbee and Practical Monitoring' - startOffset: 1878 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=1878 - endOffset: 2071 -- name: 'Data Pipelines for Home Projects: Storage, Thresholds, and Alerts' - startOffset: 2071 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2071 - endOffset: 2181 -- name: 'Coffee Machine Time Series: Turning Laziness into a Portfolio Project' - startOffset: 2181 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2181 - endOffset: 2269 -- name: 'Portfolio Strategy: Build Unique Projects Instead of Only Doing Kaggle' - startOffset: 2269 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2269 - endOffset: 2425 -- name: 'NGO Impact Work: Predicting & Optimizing Smallholder Farmer Yields' - startOffset: 2425 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2425 - endOffset: 2588 -- name: 'Team Composition: Using Domain Experts and Cross-Disciplinary Skills' - startOffset: 2588 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2588 - endOffset: 2759 -- name: 'OSINT Explained: Social Media, GPS Intel and Modern Information Warfare' - startOffset: 2759 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=2759 - endOffset: 3053 -- name: 'OSINT Applications: Mapping Reports, Evidence Gathering & Task Forces' - startOffset: 3053 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3053 - endOffset: 3151 -- name: 'Global Ripple Effects: Ukraine, Grain Shortages and Food Security' - startOffset: 3151 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3151 - endOffset: 3214 -- name: 'Soft Skills & Differentiation: Communication, Presence and Niche Expertise' - startOffset: 3214 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3214 - endOffset: 3450 -- name: 'LinkedIn Growth Strategy: Timing, Content Mix, Hashtags and Comments' - startOffset: 3450 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3450 - endOffset: 3744 -- name: 'Personal Branding Examples: Memes, Authenticity and Content Types' - startOffset: 3744 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3744 - endOffset: 3777 -- name: Episode Wrap-Up & Where to Find Marijn - startOffset: 3777 - url: https://www.youtube.com/watch?v=RhSg8ill1So&t=3777 - endOffset: 3736 --- diff --git a/_podcast/s08e07-from-roasting-coffee-to-backend-development.md b/_podcast/how-to-switch-to-ml-tech-without-experience.md similarity index 96% rename from _podcast/s08e07-from-roasting-coffee-to-backend-development.md rename to _podcast/how-to-switch-to-ml-tech-without-experience.md index 38469619..f8f04b5b 100644 --- a/_podcast/s08e07-from-roasting-coffee-to-backend-development.md +++ b/_podcast/how-to-switch-to-ml-tech-without-experience.md @@ -1,41 +1,129 @@ --- +title: "How to Switch to Tech: Community Meetups, Open Source Fellowships & Landing an Ecosia Internship" +short: "From Roasting Coffee to Backend Development" +season: 8 episode: 7 guests: - jessicagreene -intro: How do you switch to tech from a completely different career and actually land - an internship at a mission-driven company? In this episode, Jessica Greene — Senior - Machine Learning Engineer at Ecosia and co-organizer of PyLadies Berlin — walks - through her journey from film and coffee roasting to machine learning, sharing concrete - steps for a career change to tech. We cover the learning path Jessica used (Codecademy, - Andrew Ng, FreeCodeCamp), funding and study time via Germany’s Bildungsgutschein, - and hands-on experience through an open source fellowship (Rails Girls Summer of - Code) and pair programming. You’ll hear how community meetups, PyLadies, and networking - translated into an Ecosia internship, what interviewers notice (inquisitiveness, - creating roles), and practical tips for building system skills (terminal, dual-boot - Linux), overcoming imposter syndrome, and getting started with public speaking and - event organizing. If you’re considering a switch to tech, this episode offers realistic - guidance on open source fellowships, meetups, study resources, and interview strategies - to help you build skills, confidence, and professional connections. +image: images/podcast/how-to-switch-to-ml-tech-without-experience.jpg ids: anchor: From-Roasting-Coffee-to-Backend-Development---Jessica-Greene-e1i1ten/a-a7s65oj youtube: BKqmNdxsBko -image: images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Roasting-Coffee-to-Backend-Development---Jessica-Greene-e1i1ten/a-a7s65oj apple: https://podcasts.apple.com/us/podcast/from-roasting-coffee-to-backend-development-jessica/id1541710331?i=1000559856138 spotify: https://open.spotify.com/episode/3AnUc03nLbIYS6ichWIrRE?si=momJMlwdTpKFkI0FYQilag youtube: https://www.youtube.com/watch?v=BKqmNdxsBko -season: 8 -short: From Roasting Coffee to Backend Development -title: 'How to Switch to Tech: Community Meetups, Open Source Fellowships & Landing - an Ecosia Internship' -description: 'Discover practical career switch tips: meetups, open source fellowship - & landing an Ecosia internship—networking, study paths, funding, mentorship to get - hired.' + +description: "Discover practical career switch tips: meetups, open source fellowship & landing an Ecosia internship—networking, study paths, funding, mentorship to get hired." +intro: "How do you switch to tech from a completely different career and actually land an internship at a mission-driven company? In this episode, Jessica Greene — Senior Machine Learning Engineer at Ecosia and co-organizer of PyLadies Berlin — walks through her journey from film and coffee roasting to machine learning, sharing concrete steps for a career change to tech. We cover the learning path Jessica used (Codecademy, Andrew Ng, FreeCodeCamp), funding and study time via Germany’s Bildungsgutschein, and hands-on experience through an open source fellowship (Rails Girls Summer of Code) and pair programming. You’ll hear how community meetups, PyLadies, and networking translated into an Ecosia internship, what interviewers notice (inquisitiveness, creating roles), and practical tips for building system skills (terminal, dual-boot Linux), overcoming imposter syndrome, and getting started with public speaking and event organizing. If you’re considering a switch to tech, this episode offers realistic guidance on open source fellowships, meetups, study resources, and interview strategies to help you build skills, confidence, and professional connections" topics: - career switch -- data science +- machine learning +- job search - career growth +dateadded: 2022-05-07 + +duration: PT00H59M32S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=0 + endOffset: 84 +- name: 'Career Origin: From Film & Coffee Roasting to Tech' + startOffset: 84 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=84 + endOffset: 161 +- name: Community Support & Early Conference Exposure (PyLadies, meetups) + startOffset: 161 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=161 + endOffset: 391 +- name: 'Learning Path: Codecademy, Andrew Ng Course & FreeCodeCamp' + startOffset: 391 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=391 + endOffset: 506 +- name: 'Funding Support: German Bildungsgutschein & Structured Study Time' + startOffset: 506 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=506 + endOffset: 713 +- name: 'Open Source Fellowship: Rails Girls Summer of Code & Pair Programming' + startOffset: 713 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=713 + endOffset: 923 +- name: 'Meetups to Internship: Networking That Led to Ecosia' + startOffset: 923 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=923 + endOffset: 993 +- name: 'Interview Impressions: Inquisitiveness & Creating an Internship Role' + startOffset: 993 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=993 + endOffset: 1168 +- name: 'Career Switch Timing: Age, Concerns & Perceptions' + startOffset: 1168 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1168 + endOffset: 1342 +- name: 'Staying Motivated: Community, Mastermind Groups & Meetups' + startOffset: 1342 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1342 + endOffset: 1550 +- name: 'Community Organizing: Event Management, Soft Skills & Leadership' + startOffset: 1550 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1550 + endOffset: 1661 +- name: 'Overcoming Imposter Syndrome: Jupyter Notebook Setup & Peer Support' + startOffset: 1661 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1661 + endOffset: 1932 +- name: 'System Skills: Terminal, Dual-Boot Linux & Hands-on Troubleshooting' + startOffset: 1932 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1932 + endOffset: 2054 +- name: 'Community Events: Workshops, Study Groups & Remote Formats' + startOffset: 2054 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2054 + endOffset: 2172 +- name: 'Open Source Hack Evenings: Mentorship with scikit-learn & Gene.ai' + startOffset: 2172 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2172 + endOffset: 2283 +- name: 'Hybrid Events & Outreach: Remote Reach vs. In-Person Help' + startOffset: 2283 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2283 + endOffset: 2451 +- name: 'Organizing Benefits: Networking, Company Access & Management Skills' + startOffset: 2451 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2451 + endOffset: 2574 +- name: 'Public Speaking: Start Small, Dry Runs & Crafting a Personal Edge' + startOffset: 2574 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2574 + endOffset: 2811 +- name: 'Handling Q&A: Graceful "I Don''t Know" & Turning Questions into Learning' + startOffset: 2811 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2811 + endOffset: 3032 +- name: 'Speaking ROI: Visibility, Networking & Career Opportunities' + startOffset: 3032 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3032 + endOffset: 3336 +- name: 'Ecosia Overview: Green Search Engine, Tree-Planting Mission & Backend (Go)' + startOffset: 3336 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3336 + endOffset: 3490 +- name: 'Diversity Challenges: Gender, Privilege & Inclusion in Tech' + startOffset: 3490 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3490 + endOffset: 3593 +- name: 'Connect with Jessica: Twitter, GitHub & PyLadies Slack' + startOffset: 3593 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3593 + endOffset: 3628 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3628 + url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3628 + endOffset: 3572 + transcript: - header: Podcast Introduction - header: 'Career Origin: From Film & Coffee Roasting to Tech' @@ -601,7 +689,7 @@ transcript: sec: 1929 time: '32:09' who: Alexey -- header: 'System Skills: Terminal, Dual‑Boot Linux & Hands‑on Troubleshooting' +- header: 'System Skills: Terminal, Dual-Boot Linux & Hands-on Troubleshooting' - line: Yeah, I did some courses on this, for sure. I think the only thing with doing it in a browser-based tool is you're not actually on your machine. I remember during the Rails Girls Summer of Code, I wanted to dual-loop my machine into Linux, @@ -681,7 +769,7 @@ transcript: sec: 2169 time: '36:09' who: Alexey -- header: 'Open Source Hack Evenings: Mentorship with scikit‑learn & Gene.ai' +- header: 'Open Source Hack Evenings: Mentorship with scikit-learn & Gene.ai' - line: Yeah [chuckles]. Well, actually, we ran one in January and I think we had at least three folks from that event then go on to speak at PyCon.de, which is really awesome. Right now, we're trying to experiment a lot more with being much @@ -714,7 +802,7 @@ transcript: sec: 2267 time: '37:47' who: Alexey -- header: 'Hybrid Events & Outreach: Remote Reach vs. In‑Person Help' +- header: 'Hybrid Events & Outreach: Remote Reach vs. In-Person Help' - line: I mean, yeah, that's true. [cross-talk] It's interesting. I think there's different overhead with online. But what we would love to do is hybrid. We found being remote allowed us to reach more people, because we could also do collaborations @@ -1038,7 +1126,7 @@ transcript: sec: 3329 time: '55:29' who: Jessica -- header: 'Ecosia Overview: Green Search Engine, Tree‑Planting Mission & Backend (Go)' +- header: 'Ecosia Overview: Green Search Engine, Tree-Planting Mission & Backend (Go)' - line: I see that we don't have a lot of time left and I really wanted to talk to you about the company where you work – Ecosia. I hope I'm pronouncing it correctly. I know that you're doing some amazing stuff there. So can you tell us more about @@ -1121,7 +1209,7 @@ transcript: sec: 3606 time: '1:00:06' who: Jessica -- header: Episode Wrap‑Up and Closing Remarks +- header: Episode Wrap-Up and Closing Remarks - line: Okay, thanks. Thanks a lot for joining us today, for sharing your story, for telling us about how you did that. And thanks, everyone, for joining us today as well, for asking questions, for being here today. I think that's all for today. @@ -1141,105 +1229,6 @@ transcript: sec: 3656 time: '1:00:56' who: Jessica -dateadded: '2022-05-07' -duration: PT00H59M32S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=0 - endOffset: 84 -- name: 'Career Origin: From Film & Coffee Roasting to Tech' - startOffset: 84 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=84 - endOffset: 161 -- name: Community Support & Early Conference Exposure (PyLadies, meetups) - startOffset: 161 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=161 - endOffset: 391 -- name: 'Learning Path: Codecademy, Andrew Ng Course & FreeCodeCamp' - startOffset: 391 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=391 - endOffset: 506 -- name: 'Funding Support: German Bildungsgutschein & Structured Study Time' - startOffset: 506 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=506 - endOffset: 713 -- name: 'Open Source Fellowship: Rails Girls Summer of Code & Pair Programming' - startOffset: 713 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=713 - endOffset: 923 -- name: 'Meetups to Internship: Networking That Led to Ecosia' - startOffset: 923 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=923 - endOffset: 993 -- name: 'Interview Impressions: Inquisitiveness & Creating an Internship Role' - startOffset: 993 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=993 - endOffset: 1168 -- name: 'Career Switch Timing: Age, Concerns & Perceptions' - startOffset: 1168 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1168 - endOffset: 1342 -- name: 'Staying Motivated: Community, Mastermind Groups & Meetups' - startOffset: 1342 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1342 - endOffset: 1550 -- name: 'Community Organizing: Event Management, Soft Skills & Leadership' - startOffset: 1550 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1550 - endOffset: 1661 -- name: 'Overcoming Imposter Syndrome: Jupyter Notebook Setup & Peer Support' - startOffset: 1661 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1661 - endOffset: 1932 -- name: 'System Skills: Terminal, Dual‑Boot Linux & Hands‑on Troubleshooting' - startOffset: 1932 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=1932 - endOffset: 2054 -- name: 'Community Events: Workshops, Study Groups & Remote Formats' - startOffset: 2054 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2054 - endOffset: 2172 -- name: 'Open Source Hack Evenings: Mentorship with scikit‑learn & Gene.ai' - startOffset: 2172 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2172 - endOffset: 2283 -- name: 'Hybrid Events & Outreach: Remote Reach vs. In‑Person Help' - startOffset: 2283 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2283 - endOffset: 2451 -- name: 'Organizing Benefits: Networking, Company Access & Management Skills' - startOffset: 2451 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2451 - endOffset: 2574 -- name: 'Public Speaking: Start Small, Dry Runs & Crafting a Personal Edge' - startOffset: 2574 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2574 - endOffset: 2811 -- name: 'Handling Q&A: Graceful "I Don''t Know" & Turning Questions into Learning' - startOffset: 2811 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=2811 - endOffset: 3032 -- name: 'Speaking ROI: Visibility, Networking & Career Opportunities' - startOffset: 3032 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3032 - endOffset: 3336 -- name: 'Ecosia Overview: Green Search Engine, Tree‑Planting Mission & Backend (Go)' - startOffset: 3336 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3336 - endOffset: 3490 -- name: 'Diversity Challenges: Gender, Privilege & Inclusion in Tech' - startOffset: 3490 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3490 - endOffset: 3593 -- name: 'Connect with Jessica: Twitter, GitHub & PyLadies Slack' - startOffset: 3593 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3593 - endOffset: 3628 -- name: Episode Wrap‑Up and Closing Remarks - startOffset: 3628 - url: https://www.youtube.com/watch?v=BKqmNdxsBko&t=3628 - endOffset: 3572 --- Links: diff --git a/_podcast/s11e01-from-testing-phones-to-managing-nlp-projects.md b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md similarity index 97% rename from _podcast/s11e01-from-testing-phones-to-managing-nlp-projects.md rename to _podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md index cf28ff39..a3f7fd7e 100644 --- a/_podcast/s11e01-from-testing-phones-to-managing-nlp-projects.md +++ b/_podcast/how-to-transition-into-ml-and-data-engineering-from-qa.md @@ -1,20 +1,126 @@ --- +title: "Transition from QA to Machine Learning & Data Engineering: Projects, Cloud & Interview Prep" +short: "From Testing Phones to Managing NLP Projects" +season: 11 episode: 1 guests: - alvaronavaspeire +image: images/podcast/how-to-transition-into-ml-and-data-engineering-from-qa.jpg ids: anchor: From-Testing-Phones-to-Managing-NLP-Projects---Alvaro-Navas-Peire-e1oj7n8 youtube: -xumbiXOlA8 -image: images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Testing-Phones-to-Managing-NLP-Projects---Alvaro-Navas-Peire-e1oj7n8 apple: https://podcasts.apple.com/us/podcast/from-testing-phones-to-managing-nlp-projects-alvaro/id1541710331?i=1000581943071 spotify: https://open.spotify.com/episode/1LMg70fGthIR2jF4JdmFkb?si=BmEfOtfgSEOpKvp5ENRA2g youtube: https://www.youtube.com/watch?v=-xumbiXOlA8 -season: 11 -short: From Testing Phones to Managing NLP Projects -title: 'Transition from QA to Machine Learning & Data Engineering: Projects, Cloud - & Interview Prep' + +description: "Master the transition to machine learning & data engineering: build cloud-deployed projects, sharpen interview prep, and revamp your CV to land offers." +intro: "How do you move from a QA role into machine learning and data engineering—what projects, cloud skills, and interview prep actually make a difference? In this episode Alvaro Navas Peire walks through his journey from testing Android phones and QA checklists to quitting the industry, taking a gap year, and retraining in machine learning and data engineering. With an informatics engineering background and hands-on experience from postgraduate courses, Neuromatch, and DataTalks’ ML & DE Zoomcamps, Alvaro explains the structured learning path he followed and the portfolio projects (EDA, vegetable image classification, NLP) that proved useful for hiring teams.

We cover practical topics: cloud deployment on Google Cloud, AWS and Azure; using cloud credits and Databricks; how to present projects without underselling them; technical note-taking and GitHub visibility; and role-play for interview soft skills. Alvaro also contrasts research-heavy ML with tooling-focused data engineering and shares CV, portfolio, and negotiation tips. Tune in if you’re planning a transition to machine learning or data engineering and need concrete guidance on projects, cloud experience, and interview preparation." +topics: +- QA +- machine learning +- data engineering +- career transition +- job search +dateadded: 2022-10-07 + +duration: PT01H01M24S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=0 + endOffset: 75 +- name: Early Life & Informatics Engineering; phone industry beginnings + startOffset: 75 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=75 + endOffset: 221 +- name: 'Phone prototyping and field testing: QA checklists, CTS & RF testing' + startOffset: 221 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=221 + endOffset: 515 +- name: 'Career pivot: quitting QA, gap year, and discovering machine learning' + startOffset: 515 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=515 + endOffset: 812 +- name: 'Structured learning path: postgraduate course, Neuromatch Academy, ML & Data + Engineering Zoomcamps' + startOffset: 812 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=812 + endOffset: 1077 +- name: 'Job search strategy: improving soft skills, hiring a coach, and CV redesign' + startOffset: 1077 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1077 + endOffset: 1358 +- name: 'Interview soft skills: role-play, confidence building, and behavioral prep' + startOffset: 1358 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1358 + endOffset: 1497 +- name: 'Zoomcamp projects: speed-dating EDA and vegetable image-classification' + startOffset: 1497 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1497 + endOffset: 1636 +- name: 'Project deployment experience: Google Cloud, AWS exercises, and cloud credits' + startOffset: 1636 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1636 + endOffset: 1732 +- name: 'Presenting projects objectively: avoid underselling and focus on facts' + startOffset: 1732 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1732 + endOffset: 1898 +- name: 'Interview formats encountered: take-home tasks, time-series exercise, and + NLP-focused hiring' + startOffset: 1898 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1898 + endOffset: 2041 +- name: 'Cloud familiarity in interviews: Google Cloud, Azure, AWS—what mattered' + startOffset: 2041 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2041 + endOffset: 2102 +- name: 'Creating technical notes: long-form Markdown, GitHub gists, and screenshots' + startOffset: 2102 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2102 + endOffset: 2238 +- name: 'Note-taking workflow: video pause-write method, indexes, and VS Code' + startOffset: 2238 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2238 + endOffset: 2613 +- name: 'Role of a coach: negotiation practice, interview framing, and communication' + startOffset: 2613 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2613 + endOffset: 2859 +- name: 'Skill distinction: math-heavy research ML vs. tooling-focused data engineering' + startOffset: 2859 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2859 + endOffset: 2972 +- name: 'Technical interview prep: tailor study to role, projects, and hands-on exercises' + startOffset: 2972 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2972 + endOffset: 3113 +- name: 'Typical workday as an ML project manager: planning, Teams, and task coordination' + startOffset: 3113 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3113 + endOffset: 3252 +- name: 'Production tech stack: Azure, Databricks, AutoKeras, Azure Data Factory, + and SQL' + startOffset: 3252 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3252 + endOffset: 3371 +- name: 'Transition advice: programming background, math, and transferable skills + for ML careers' + startOffset: 3371 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3371 + endOffset: 3626 +- name: 'CV and portfolio tips: visual résumé, GitHub visibility, and sample CV link' + startOffset: 3626 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3626 + endOffset: 3731 +- name: 'Closing remarks & links: CV, GitHub, and LinkedIn resources' + startOffset: 3731 + url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3731 + endOffset: 3684 + transcript: - header: Podcast Introduction - header: Early Life & Informatics Engineering; phone industry beginnings @@ -1269,118 +1375,6 @@ transcript: sec: 3759 time: '1:02:39' who: Alexey -description: 'Master the transition to machine learning & data engineering: build - cloud-deployed projects, sharpen interview prep, and revamp your CV to land offers.' -intro: 'How do you move from a QA role into machine learning and data engineering—what - projects, cloud skills, and interview prep actually make a difference? In this episode - Alvaro Navas Peire walks through his journey from testing Android phones and QA - checklists to quitting the industry, taking a gap year, and retraining in machine - learning and data engineering. With an informatics engineering background and hands-on - experience from postgraduate courses, Neuromatch, and DataTalks’ ML & DE Zoomcamps, - Alvaro explains the structured learning path he followed and the portfolio projects - (EDA, vegetable image classification, NLP) that proved useful for hiring teams. -

We cover practical topics: cloud deployment on Google Cloud, AWS and Azure; - using cloud credits and Databricks; how to present projects without underselling - them; technical note-taking and GitHub visibility; and role-play for interview soft - skills. Alvaro also contrasts research-heavy ML with tooling-focused data engineering - and shares CV, portfolio, and negotiation tips. Tune in if you’re planning a transition - to machine learning or data engineering and need concrete guidance on projects, - cloud experience, and interview preparation.' -dateadded: '2022-10-07' -duration: PT01H01M24S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=0 - endOffset: 75 -- name: Early Life & Informatics Engineering; phone industry beginnings - startOffset: 75 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=75 - endOffset: 221 -- name: 'Phone prototyping and field testing: QA checklists, CTS & RF testing' - startOffset: 221 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=221 - endOffset: 515 -- name: 'Career pivot: quitting QA, gap year, and discovering machine learning' - startOffset: 515 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=515 - endOffset: 812 -- name: 'Structured learning path: postgraduate course, Neuromatch Academy, ML & Data - Engineering Zoomcamps' - startOffset: 812 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=812 - endOffset: 1077 -- name: 'Job search strategy: improving soft skills, hiring a coach, and CV redesign' - startOffset: 1077 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1077 - endOffset: 1358 -- name: 'Interview soft skills: role-play, confidence building, and behavioral prep' - startOffset: 1358 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1358 - endOffset: 1497 -- name: 'Zoomcamp projects: speed-dating EDA and vegetable image-classification' - startOffset: 1497 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1497 - endOffset: 1636 -- name: 'Project deployment experience: Google Cloud, AWS exercises, and cloud credits' - startOffset: 1636 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1636 - endOffset: 1732 -- name: 'Presenting projects objectively: avoid underselling and focus on facts' - startOffset: 1732 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1732 - endOffset: 1898 -- name: 'Interview formats encountered: take-home tasks, time-series exercise, and - NLP-focused hiring' - startOffset: 1898 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=1898 - endOffset: 2041 -- name: 'Cloud familiarity in interviews: Google Cloud, Azure, AWS—what mattered' - startOffset: 2041 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2041 - endOffset: 2102 -- name: 'Creating technical notes: long-form Markdown, GitHub gists, and screenshots' - startOffset: 2102 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2102 - endOffset: 2238 -- name: 'Note-taking workflow: video pause-write method, indexes, and VS Code' - startOffset: 2238 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2238 - endOffset: 2613 -- name: 'Role of a coach: negotiation practice, interview framing, and communication' - startOffset: 2613 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2613 - endOffset: 2859 -- name: 'Skill distinction: math-heavy research ML vs. tooling-focused data engineering' - startOffset: 2859 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2859 - endOffset: 2972 -- name: 'Technical interview prep: tailor study to role, projects, and hands-on exercises' - startOffset: 2972 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=2972 - endOffset: 3113 -- name: 'Typical workday as an ML project manager: planning, Teams, and task coordination' - startOffset: 3113 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3113 - endOffset: 3252 -- name: 'Production tech stack: Azure, Databricks, AutoKeras, Azure Data Factory, - and SQL' - startOffset: 3252 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3252 - endOffset: 3371 -- name: 'Transition advice: programming background, math, and transferable skills - for ML careers' - startOffset: 3371 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3371 - endOffset: 3626 -- name: 'CV and portfolio tips: visual résumé, GitHub visibility, and sample CV link' - startOffset: 3626 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3626 - endOffset: 3731 -- name: 'Closing remarks & links: CV, GitHub, and LinkedIn resources' - startOffset: 3731 - url: https://www.youtube.com/watch?v=-xumbiXOlA8&t=3731 - endOffset: 3684 --- Links: diff --git a/_podcast/s09e06-developer-advocacy-engineer-for-open-source.md b/_podcast/hugging-face-contributions-and-nlp-portfolio.md similarity index 97% rename from _podcast/s09e06-developer-advocacy-engineer-for-open-source.md rename to _podcast/hugging-face-contributions-and-nlp-portfolio.md index f5b142ae..be08054b 100644 --- a/_podcast/s09e06-developer-advocacy-engineer-for-open-source.md +++ b/_podcast/hugging-face-contributions-and-nlp-portfolio.md @@ -1,20 +1,137 @@ --- +title: "Contribute to Hugging Face & Build an NLP Portfolio: Open Source, Datasets, Spaces" +short: "Developer Advocacy Engineer for Open-Source" +season: 9 episode: 6 guests: - mervenoyan +image: images/podcast/hugging-face-contributions-and-nlp-portfolio.jpg ids: anchor: Developer-Advocacy-Engineer-for-Open-Source---Merve-Noyan-e1kcm3u youtube: SnEYvF-Ztb8 -image: images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Developer-Advocacy-Engineer-for-Open-Source---Merve-Noyan-e1kcm3u apple: https://podcasts.apple.com/us/podcast/developer-advocacy-engineer-for-open-source-merve-noyan/id1541710331?i=1000568463048 spotify: https://open.spotify.com/episode/5k60LWIwnMpvaIbTaryRv4?si=liHqmXVYT-uB1PO4uB65OQ youtube: https://www.youtube.com/watch?v=SnEYvF-Ztb8 -season: 9 -short: Developer Advocacy Engineer for Open-Source -title: 'Contribute to Hugging Face & Build an NLP Portfolio: Open Source, Datasets, - Spaces' + +description: "Build an NLP portfolio on Hugging Face: contribute to open source, publish datasets, deploy Spaces demos, gain PR skills and boost hiring odds." +intro: "How do you go from beginner projects to contributing to Hugging Face and building an visible NLP portfolio? In this episode, Merve Noyan — Google Developer Expert in Machine Learning, grad student in Data Science, and NLP-focused ML engineer — walks through practical steps for contributing to open source, datasets, and Hugging Face Spaces.

We cover Merve’s transition into NLP, finding open source via contribution sprints and good-first issues, and the nuts-and-bolts of datasets work: canonical datasets, scripts, and CI. Learn how the Hub, TensorFlow & Keras integrations, and model reproducibility features support a reproducible workflow and model registry concepts. Merve explains creating demo apps with Streamlit or Gradio on Spaces, using the Community tab and forums, and how workshops and sprints build confidence.

You’ll also get concrete advice on starting contributions while working full-time, non-code contributions, structured programs like Google Summer of Code and Hacktoberfest, handling PR feedback, and what hiring managers look for on GitHub. Tune in to walk away with actionable steps to contribute to Hugging Face, publish datasets and demos, and build an NLP portfolio recruiters can evaluate." +topics: +- machine learning +- NLP +- open-source +dateadded: 2022-07-02 + +duration: PT00H58M05S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=0 + endOffset: 85 +- name: Guest Welcome & Episode Overview + startOffset: 85 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=85 + endOffset: 122 +- name: 'Early Career: Industrial Engineering to NLP' + startOffset: 122 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=122 + endOffset: 252 +- name: 'Transition to NLP: First Projects & Sentiment Analysis' + startOffset: 252 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=252 + endOffset: 390 +- name: 'Open Source Discovery: Finding Hugging Face & Contribution Sprints' + startOffset: 390 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=390 + endOffset: 493 +- name: 'Datasets Work: Canonical Datasets, Scripts, and CI Learning' + startOffset: 493 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=493 + endOffset: 631 +- name: 'Contributor Onboarding: Sprints, Good-First Issues, and Confidence Building' + startOffset: 631 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=631 + endOffset: 693 +- name: 'Contributing as a Side Project: Motivation and Timing' + startOffset: 693 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=693 + endOffset: 766 +- name: 'Hugging Face Projects: Tasks, Hub, TensorFlow & Keras Integration' + startOffset: 766 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=766 + endOffset: 942 +- name: 'Model Reproducibility: Hub Features and Model Registry Concepts' + startOffset: 942 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=942 + endOffset: 1057 +- name: 'Spaces & Community Tab: Demos with Streamlit/Gradio and Community Collaboration' + startOffset: 1057 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1057 + endOffset: 1111 +- name: 'Developer Experience: Forum Support, Workshops, and Keras Sprints' + startOffset: 1111 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1111 + endOffset: 1288 +- name: 'Role Balance: Engineering vs. Advocacy Time Split' + startOffset: 1288 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1288 + endOffset: 1406 +- name: 'Hiring Signals: Evaluating Open Source Experience on GitHub' + startOffset: 1406 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1406 + endOffset: 1509 +- name: 'Getting Started with Open Source: Sprints, Documentation, and Non-Code Contributions' + startOffset: 1509 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1509 + endOffset: 1643 +- name: 'Structured Programs: Google Summer of Code and Hacktoberfest' + startOffset: 1643 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1643 + endOffset: 1766 +- name: 'Learning from PRs: Contributing to scikit-learn and Code Quality' + startOffset: 1766 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1766 + endOffset: 1821 +- name: 'Hiring Expectations: Working with Large Codebases and PR Workflows' + startOffset: 1821 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1821 + endOffset: 2003 +- name: 'Handling PR Rejections: Discussions, Design Decisions, and Unit Tests' + startOffset: 2003 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2003 + endOffset: 2282 +- name: 'NLP Learning Resources: Courses, spaCy, Keras Examples, and Transfer Learning' + startOffset: 2282 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2282 + endOffset: 2581 +- name: 'Beginner NLP Projects: Sentiment Analysis and Classification Tasks' + startOffset: 2581 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2581 + endOffset: 3072 +- name: 'Portfolio Advice: Deploying Demos with Streamlit, Gradio, and Hugging Face + Spaces' + startOffset: 3072 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3072 + endOffset: 3349 +- name: 'Content Creation: Twitch Streaming and Podcast Plans' + startOffset: 3349 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3349 + endOffset: 3462 +- name: 'Contact & Community: Slack, Twitter, and DataTalks.club Outreach' + startOffset: 3462 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3462 + endOffset: 3494 +- name: 'Personal Anecdote: Mario Kart at Hugging Face' + startOffset: 3494 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3494 + endOffset: 3551 +- name: Episode Outro & Next Steps + startOffset: 3551 + url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3551 + endOffset: 3485 + transcript: - header: Podcast Introduction - header: Guest Welcome & Episode Overview @@ -1099,132 +1216,6 @@ transcript: sec: 3570 time: '59:30' who: Alexey -description: 'Build an NLP portfolio on Hugging Face: contribute to open source, publish - datasets, deploy Spaces demos, gain PR skills and boost hiring odds.' -intro: 'How do you go from beginner projects to contributing to Hugging Face and building - an visible NLP portfolio? In this episode, Merve Noyan — Google Developer Expert - in Machine Learning, grad student in Data Science, and NLP-focused ML engineer — - walks through practical steps for contributing to open source, datasets, and Hugging - Face Spaces.

We cover Merve’s transition into NLP, finding open source - via contribution sprints and good-first issues, and the nuts-and-bolts of datasets - work: canonical datasets, scripts, and CI. Learn how the Hub, TensorFlow & Keras - integrations, and model reproducibility features support a reproducible workflow - and model registry concepts. Merve explains creating demo apps with Streamlit or - Gradio on Spaces, using the Community tab and forums, and how workshops and sprints - build confidence.

You’ll also get concrete advice on starting contributions - while working full-time, non-code contributions, structured programs like Google - Summer of Code and Hacktoberfest, handling PR feedback, and what hiring managers - look for on GitHub. Tune in to walk away with actionable steps to contribute to - Hugging Face, publish datasets and demos, and build an NLP portfolio recruiters - can evaluate.' -dateadded: '2022-07-02' -duration: PT00H58M05S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=0 - endOffset: 85 -- name: Guest Welcome & Episode Overview - startOffset: 85 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=85 - endOffset: 122 -- name: 'Early Career: Industrial Engineering to NLP' - startOffset: 122 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=122 - endOffset: 252 -- name: 'Transition to NLP: First Projects & Sentiment Analysis' - startOffset: 252 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=252 - endOffset: 390 -- name: 'Open Source Discovery: Finding Hugging Face & Contribution Sprints' - startOffset: 390 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=390 - endOffset: 493 -- name: 'Datasets Work: Canonical Datasets, Scripts, and CI Learning' - startOffset: 493 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=493 - endOffset: 631 -- name: 'Contributor Onboarding: Sprints, Good-First Issues, and Confidence Building' - startOffset: 631 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=631 - endOffset: 693 -- name: 'Contributing as a Side Project: Motivation and Timing' - startOffset: 693 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=693 - endOffset: 766 -- name: 'Hugging Face Projects: Tasks, Hub, TensorFlow & Keras Integration' - startOffset: 766 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=766 - endOffset: 942 -- name: 'Model Reproducibility: Hub Features and Model Registry Concepts' - startOffset: 942 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=942 - endOffset: 1057 -- name: 'Spaces & Community Tab: Demos with Streamlit/Gradio and Community Collaboration' - startOffset: 1057 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1057 - endOffset: 1111 -- name: 'Developer Experience: Forum Support, Workshops, and Keras Sprints' - startOffset: 1111 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1111 - endOffset: 1288 -- name: 'Role Balance: Engineering vs. Advocacy Time Split' - startOffset: 1288 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1288 - endOffset: 1406 -- name: 'Hiring Signals: Evaluating Open Source Experience on GitHub' - startOffset: 1406 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1406 - endOffset: 1509 -- name: 'Getting Started with Open Source: Sprints, Documentation, and Non-Code Contributions' - startOffset: 1509 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1509 - endOffset: 1643 -- name: 'Structured Programs: Google Summer of Code and Hacktoberfest' - startOffset: 1643 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1643 - endOffset: 1766 -- name: 'Learning from PRs: Contributing to scikit-learn and Code Quality' - startOffset: 1766 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1766 - endOffset: 1821 -- name: 'Hiring Expectations: Working with Large Codebases and PR Workflows' - startOffset: 1821 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=1821 - endOffset: 2003 -- name: 'Handling PR Rejections: Discussions, Design Decisions, and Unit Tests' - startOffset: 2003 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2003 - endOffset: 2282 -- name: 'NLP Learning Resources: Courses, spaCy, Keras Examples, and Transfer Learning' - startOffset: 2282 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2282 - endOffset: 2581 -- name: 'Beginner NLP Projects: Sentiment Analysis and Classification Tasks' - startOffset: 2581 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=2581 - endOffset: 3072 -- name: 'Portfolio Advice: Deploying Demos with Streamlit, Gradio, and Hugging Face - Spaces' - startOffset: 3072 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3072 - endOffset: 3349 -- name: 'Content Creation: Twitch Streaming and Podcast Plans' - startOffset: 3349 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3349 - endOffset: 3462 -- name: 'Contact & Community: Slack, Twitter, and DataTalks.club Outreach' - startOffset: 3462 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3462 - endOffset: 3494 -- name: 'Personal Anecdote: Mario Kart at Hugging Face' - startOffset: 3494 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3494 - endOffset: 3551 -- name: Episode Outro & Next Steps - startOffset: 3551 - url: https://www.youtube.com/watch?v=SnEYvF-Ztb8&t=3551 - endOffset: 3485 --- Links: diff --git a/_podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.md b/_podcast/human-centered-ai-automatic-speech-recognition.md similarity index 90% rename from _podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.md rename to _podcast/human-centered-ai-automatic-speech-recognition.md index dd67f6b8..2e2315c1 100644 --- a/_podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.md +++ b/_podcast/human-centered-ai-automatic-speech-recognition.md @@ -1,21 +1,144 @@ --- +title: "Human-Centered Speech Recognition: ASR for Disordered Speech and Accents" +short: "Human-Centered AI for Disordered Speech Recognition" +season: 19 episode: 2 guests: - katarzynaforemniak +image: images/podcast/human-centered-ai-automatic-speech-recognition.jpg ids: - anchor: atatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 + anchor: datatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 youtube: yTZ4cddD7DU -image: images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Human-Centered-AI-for-Disordered-Speech-Recognition---Katarzyna-Foremniak-e2p8360 apple: https://podcasts.apple.com/us/podcast/human-centered-ai-for-disordered-speech-recognition/id1541710331?i=1000671805368 spotify: https://open.spotify.com/show/0pck8zuiXdI0OrCg86DAPy?si=ac857db69d484277 youtube: https://www.youtube.com/watch?v=yTZ4cddD7DU -season: 19 -short: Human-Centered AI for Disordered Speech Recognition -title: 'Human-Centered ASR for Disordered Speech: Data, Multimodal Cues & Personalization' +description: "Discover ASR solutions for disordered speech and accents—boost recognition accuracy, reduce bias, and design accessible human-centered models now." +topics: +- AI +- NLP +- LLMs +- machine learning +- data governance +intro: "How can automatic speech recognition (ASR) better serve people with disordered speech and diverse accents? In this episode Katarzyna Foremniak, a computational linguist with over 10 years in NLP who has built language models for Audi and Porsche and teaches at the University of Warsaw, examines human-centered ASR for atypical and accented speech. We trace her move from linguistics to computational approaches and cover core phonetics and morpho-syntax concepts that matter for speech recognition.

Key topics include distinctions between accents and speech disorders, limitations of standard ASR datasets, strategies for disordered speech recognition such as specialized datasets, data augmentation and synthetic variations, multimodal ASR with lip-reading, and transfer learning for fine-tuning with limited data. We also discuss data collection challenges (GDPR, clinical data), bilingualism effects, stammering and fluency, pronunciation issues like Polish consonant clusters, and practical workflows including Amazon Transcribe plus LLM post-processing. Deployment tradeoffs—model size, on-device setups, automotive voice use cases—and assistive applications round out the conversation.

Listeners interested in speech recognition, disordered speech, accents, and ethical data practices will gain practical technical strategies and a clearer view of research and deployment priorities." +dateadded: 2024-10-10 +duration: PT00H57M19S +quotableClips: +- name: 'Episode Introduction: Human-Centered AI for Disordered Speech' + startOffset: 0 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=0 + endOffset: 486 +- name: Guest Introduction & Career Highlights (Katarzyna Foremniak) + startOffset: 486 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=486 + endOffset: 546 +- name: 'From Linguistics to Computational Linguistics: Transition & Skills' + startOffset: 546 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=546 + endOffset: 802 +- name: 'Linguistics Meets Computer Science: Data-driven Approaches' + startOffset: 802 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=802 + endOffset: 925 +- name: 'Phonetics & Morpho-syntax Explained: Core Concepts for ASR' + startOffset: 925 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=925 + endOffset: 1233 +- name: 'Phonetics and Speech Disorders: Articulation, Fluency, Voice Quality' + startOffset: 1233 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1233 + endOffset: 1399 +- name: 'Accents vs Speech Disorders: Variation, Identity, and Comprehension' + startOffset: 1399 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1399 + endOffset: 1481 +- name: 'ASR Progress: Modern Models (Whisper) and Improved Accent Handling' + startOffset: 1481 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1481 + endOffset: 1651 +- name: 'ASR Fundamentals: Standard Speech Datasets and Reference Speech' + startOffset: 1651 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1651 + endOffset: 1824 +- name: 'ASR Limitations with Atypical Speech: Training/Deployment Gaps' + startOffset: 1824 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1824 + endOffset: 1853 +- name: 'Strategies for Disordered Speech Recognition: Specialized Datasets & Adaptation' + startOffset: 1853 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1853 + endOffset: 2227 +- name: 'Data Augmentation for Disordered Speech: Synthetic Variations' + startOffset: 2227 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2227 + endOffset: 2253 +- name: 'Multimodal ASR: Integrating Lip-reading and Visual Cues' + startOffset: 2253 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2253 + endOffset: 2417 +- name: 'Transfer Learning for ASR: Fine-tuning with Limited Data' + startOffset: 2417 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2417 + endOffset: 2470 +- name: 'Data Collection Challenges: GDPR, Clinical Data, Language Coverage' + startOffset: 2470 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2470 + endOffset: 2538 +- name: 'Language & Dialect Effects: Bilingualism and Disorder Variability' + startOffset: 2538 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2538 + endOffset: 2671 +- name: 'Stammering & Fluency Issues: Characteristics and Recognition Needs' + startOffset: 2671 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2671 + endOffset: 2716 +- name: 'Pronunciation Challenges: Polish Consonant Clusters and Phonetics' + startOffset: 2716 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2716 + endOffset: 2777 +- name: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post-processing' + startOffset: 2777 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2777 + endOffset: 2848 +- name: 'Contextual Language Models in ASR: Meaning Preservation vs WER' + startOffset: 2848 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2848 + endOffset: 3087 +- name: 'Utterance Analysis in ASR: Phonemes, Words, and Contextual Prediction' + startOffset: 3087 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3087 + endOffset: 3245 +- name: 'Personalized ASR: User Adaptation, Fine-tuning, and On-device Setup' + startOffset: 3245 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3245 + endOffset: 3480 +- name: 'Assistive Applications: Communication Tools for People with Disorders' + startOffset: 3480 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3480 + endOffset: 3602 +- name: 'Model Size & Deployment Constraints: Mobile and Edge Considerations' + startOffset: 3602 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3602 + endOffset: 3713 +- name: 'In-Car Voice Recognition: Automotive Use Cases and Limitations' + startOffset: 3713 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3713 + endOffset: 3807 +- name: 'Notable Failure Examples: Elevator/Car Voice Recognition Humor' + startOffset: 3807 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3807 + endOffset: 3853 +- name: 'Closing Reflections: Human-Centered AI Priorities & Further Reading' + startOffset: 3853 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3853 + endOffset: 3925 +- name: Episode Sign-off and Guest Thanks + startOffset: 3925 + url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3925 + endOffset: 3439 transcript: -- header: 'Episode Introduction: Human‑Centered AI for Disordered Speech' +- header: 'Episode Introduction: Human-Centered AI for Disordered Speech' - header: Guest Introduction & Career Highlights (Katarzyna Foremniak) - line: This week, we'll talk about human-centered AI for disordered speech recognition. We have a special guest today—Katarzyna Foremniak is a computational linguist @@ -109,7 +232,7 @@ transcript: sec: 745 time: '12:25' who: Katarzyna -- header: 'Linguistics Meets Computer Science: Data‑driven Approaches' +- header: 'Linguistics Meets Computer Science: Data-driven Approaches' - line: You’ve touched on how important the use of data is. Is it safe to say that computational linguistics merges linguistics and computer science? sec: 802 @@ -146,7 +269,7 @@ transcript: sec: 904 time: '15:04' who: Katarzyna -- header: 'Phonetics & Morpho‑syntax Explained: Core Concepts for ASR' +- header: 'Phonetics & Morpho-syntax Explained: Core Concepts for ASR' - line: In your biography, summarized by GPT, it mentions that you specialize in phonetics, morpho-syntax, and sentiment analysis. I’m familiar with sentiment analysis, but could you explain what phonetics and morpho-syntax are? @@ -380,13 +503,13 @@ transcript: sec: 2251 time: '37:31' who: Alexey -- header: 'Multimodal ASR: Integrating Lip‑reading and Visual Cues' +- header: 'Multimodal ASR: Integrating Lip-reading and Visual Cues' - line: Another strategy is using multimodal outputs. While we learn from audio, adding visual data—such as lip reading or gesture recognition— sec: 2253 time: '37:33' who: Katarzyna -- header: 'Transfer Learning for ASR: Fine‑tuning with Limited Data' +- header: 'Transfer Learning for ASR: Fine-tuning with Limited Data' - line: Yeah, not yet, of course. But I've worked with images, and in a typical situation, you have an ImageNet neural network trained on ImageNet. Then you have your own data, which could be tractors or anything else not included in ImageNet. You might @@ -478,7 +601,7 @@ transcript: sec: 2751 time: '45:51' who: Katarzyna -- header: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post‑processing' +- header: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post-processing' - line: By the way, I use automatic speech recognition for podcast episodes after recording. I utilize Amazon Transcribe, which is supposed to recognize English. sec: 2777 @@ -550,7 +673,7 @@ transcript: sec: 3151 time: '52:31' who: Alexey -- header: 'Personalized ASR: User Adaptation, Fine‑tuning, and On‑device Setup' +- header: 'Personalized ASR: User Adaptation, Fine-tuning, and On-device Setup' - line: I guess with personalization, the way it works is I first need to train it as a user. It asks me, "Hey, can you pronounce this sentence?" I record myself saying the sentence, and then it asks me to pronounce something else. I do this @@ -711,7 +834,7 @@ transcript: sec: 3708 time: '1:01:48' who: Alexey -- header: 'In‑Car Voice Recognition: Automotive Use Cases and Limitations' +- header: 'In-Car Voice Recognition: Automotive Use Cases and Limitations' - line: And it's parking, and it's parking! Everything you need and what is planned by the producers and car designers includes opening the windows, air conditioning, seat heating, steering wheel heating, radio, calling, etc. That’s also an interesting @@ -753,7 +876,7 @@ transcript: sec: 3850 time: '1:04:10' who: Katarzyna -- header: 'Closing Reflections: Human‑Centered AI Priorities & Further Reading' +- header: 'Closing Reflections: Human-Centered AI Priorities & Further Reading' - line: I think we covered only three questions out of—I don’t know how many we prepared, but it was... sec: 3853 @@ -782,7 +905,7 @@ transcript: sec: 3913 time: '1:05:13' who: Alexey -- header: Episode Sign‑off and Guest Thanks +- header: Episode Sign-off and Guest Thanks - line: Thank you. Thank you for the invitation, and really congratulations on the great series of podcasts, but also for the fantastic platform that you created. I feel really impressed, and as I said at the beginning, I feel honored to be @@ -790,142 +913,20 @@ transcript: sec: 3925 time: '1:05:25' who: Katarzyna -description: 'Learn ASR strategies for disordered speech: data, multimodal cues and - personalization to build robust assistive voice systems and on-device speech tools.' -intro: 'How can automatic speech recognition (ASR) systems reliably understand disordered - and atypical speech without compromising user identity or privacy? In this episode - Katarzyna Foremniak, a computational linguist with 10+ years in NLP who developed - language models for Audi and Porsche and teaches at the University of Warsaw, tackles - that question through a human‑centered lens.

We explore core phonetics - and morpho‑syntax concepts that matter for disordered speech, distinctions between - accents and disorders, and practical limits of modern models (e.g., Whisper) when - faced with atypical articulation, stammering, and voice quality variation. Katarzyna - walks through data‑driven strategies: specialized datasets, data augmentation, transfer - learning and fine‑tuning with limited data, plus multimodal ASR approaches that - integrate lip‑reading and visual cues. The conversation also covers data collection - challenges (GDPR, clinical data, language and dialect coverage), personalization - and on‑device adaptation, and assistive and automotive use cases with deployment - constraints.

If you work on speech recognition, accessibility, or multilingual - NLP, this episode offers concrete technical strategies and ethical considerations - for building personalized, multimodal ASR systems that better serve people with - speech disorders.' -dateadded: '2024-10-10' -duration: PT00H57M19S -quotableClips: -- name: 'Episode Introduction: Human‑Centered AI for Disordered Speech' - startOffset: 0 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=0 - endOffset: 486 -- name: Guest Introduction & Career Highlights (Katarzyna Foremniak) - startOffset: 486 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=486 - endOffset: 546 -- name: 'From Linguistics to Computational Linguistics: Transition & Skills' - startOffset: 546 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=546 - endOffset: 802 -- name: 'Linguistics Meets Computer Science: Data‑driven Approaches' - startOffset: 802 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=802 - endOffset: 925 -- name: 'Phonetics & Morpho‑syntax Explained: Core Concepts for ASR' - startOffset: 925 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=925 - endOffset: 1233 -- name: 'Phonetics and Speech Disorders: Articulation, Fluency, Voice Quality' - startOffset: 1233 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1233 - endOffset: 1399 -- name: 'Accents vs Speech Disorders: Variation, Identity, and Comprehension' - startOffset: 1399 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1399 - endOffset: 1481 -- name: 'ASR Progress: Modern Models (Whisper) and Improved Accent Handling' - startOffset: 1481 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1481 - endOffset: 1651 -- name: 'ASR Fundamentals: Standard Speech Datasets and Reference Speech' - startOffset: 1651 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1651 - endOffset: 1824 -- name: 'ASR Limitations with Atypical Speech: Training/Deployment Gaps' - startOffset: 1824 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1824 - endOffset: 1853 -- name: 'Strategies for Disordered Speech Recognition: Specialized Datasets & Adaptation' - startOffset: 1853 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=1853 - endOffset: 2227 -- name: 'Data Augmentation for Disordered Speech: Synthetic Variations' - startOffset: 2227 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2227 - endOffset: 2253 -- name: 'Multimodal ASR: Integrating Lip‑reading and Visual Cues' - startOffset: 2253 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2253 - endOffset: 2417 -- name: 'Transfer Learning for ASR: Fine‑tuning with Limited Data' - startOffset: 2417 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2417 - endOffset: 2470 -- name: 'Data Collection Challenges: GDPR, Clinical Data, Language Coverage' - startOffset: 2470 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2470 - endOffset: 2538 -- name: 'Language & Dialect Effects: Bilingualism and Disorder Variability' - startOffset: 2538 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2538 - endOffset: 2671 -- name: 'Stammering & Fluency Issues: Characteristics and Recognition Needs' - startOffset: 2671 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2671 - endOffset: 2716 -- name: 'Pronunciation Challenges: Polish Consonant Clusters and Phonetics' - startOffset: 2716 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2716 - endOffset: 2777 -- name: 'Practical Transcription Workflow: Amazon Transcribe + LLM Post‑processing' - startOffset: 2777 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2777 - endOffset: 2848 -- name: 'Contextual Language Models in ASR: Meaning Preservation vs WER' - startOffset: 2848 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=2848 - endOffset: 3087 -- name: 'Utterance Analysis in ASR: Phonemes, Words, and Contextual Prediction' - startOffset: 3087 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3087 - endOffset: 3245 -- name: 'Personalized ASR: User Adaptation, Fine‑tuning, and On‑device Setup' - startOffset: 3245 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3245 - endOffset: 3480 -- name: 'Assistive Applications: Communication Tools for People with Disorders' - startOffset: 3480 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3480 - endOffset: 3602 -- name: 'Model Size & Deployment Constraints: Mobile and Edge Considerations' - startOffset: 3602 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3602 - endOffset: 3713 -- name: 'In‑Car Voice Recognition: Automotive Use Cases and Limitations' - startOffset: 3713 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3713 - endOffset: 3807 -- name: 'Notable Failure Examples: Elevator/Car Voice Recognition Humor' - startOffset: 3807 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3807 - endOffset: 3853 -- name: 'Closing Reflections: Human‑Centered AI Priorities & Further Reading' - startOffset: 3853 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3853 - endOffset: 3925 -- name: Episode Sign‑off and Guest Thanks - startOffset: 3925 - url: https://www.youtube.com/watch?v=yTZ4cddD7DU&t=3925 - endOffset: 3439 ---- +context: 'Context: The episode surveys how linguistics and computational methods intersect + to address limitations of mainstream ASR for people with disordered, accented, or + atypical speech — covering phonetics and morpho-syntax foundations, distinctions + between accent and disorder, modern ASR advances and failure modes, data collection + and GDPR constraints, targeted datasets and augmentation, multimodal and transfer + approaches, personalization and on-device deployment, and the ethical/assistive + implications. + Core: Build ASR systems that are human-centered and linguistically informed—prioritizing + inclusive data practices, phonetics-aware modeling, adaptive techniques (augmentation, + transfer learning, multimodal cues, personalization), and ethical deployment—so + speech technology recognizes and respects the communicative diversity and needs + of people with disordered or atypical speech.' +--- Links: * [Eleven elevator](https://www.youtube.com/live/NMS2VnDveP8){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s04e06-humans-in-the-loop.md b/_podcast/human-centered-mlops-and-model-monitoring.md similarity index 95% rename from _podcast/s04e06-humans-in-the-loop.md rename to _podcast/human-centered-mlops-and-model-monitoring.md index 96095426..2e5ee422 100644 --- a/_podcast/s04e06-humans-in-the-loop.md +++ b/_podcast/human-centered-mlops-and-model-monitoring.md @@ -1,12 +1,11 @@ --- -title: 'Master Human-Centered MLOps: Stakeholder Buy-In, Monitoring, Debugging & Incident - Response' -short: Humans in the Loop -guests: -- linaweichbrodt -image: images/podcast/s04e06-humans-in-the-loop.jpg +title: "Master Human-Centered MLOps: Stakeholder Buy-In, Monitoring, Debugging & Incident Response" +short: "Humans in the Loop" season: 4 episode: 6 +guests: +- linaweichbrodt +image: images/podcast/human-centered-mlops-and-model-monitoring.jpg ids: youtube: o50j_Ndx2Hg anchor: Humans-in-the-Loop---Lina-Weichbrodt-e14npgp @@ -15,6 +14,134 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Humans-in-the-Loop---Lina-Weichbrodt-e14npgp spotify: https://open.spotify.com/episode/23VxmAEkKUs1kjaludRQAR apple: https://podcasts.apple.com/us/podcast/humans-in-the-loop-lina-weichbrodt/id1541710331?i=1000530535704 + +description: "Master human-centered MLOps: actionable stakeholder buy-in tactics, model monitoring and incident response playbooks to debug and ship reliable ML." +intro: "How do you make MLOps human-centered so stakeholders actually trust models and teams can monitor, debug, and respond to incidents? In this episode, Lina Weichbrodt — a generalist machine learning developer who prototypes data-driven products end-to-end (design, implementation, A/B tests, operations) — walks through practical MLOps strategies that prioritize people as much as pipelines.

We cover a project intake checklist (business case, KPIs, alternatives), how to evaluate whether AI is needed, and scoping problems so outcomes are visible in the UI. Lina explains stakeholder engagement techniques (pairing, availability, converting fears into mitigations), demos vs reporting for buy-in, and building trust through domain understanding and data issue support. You’ll get concrete guidance on incident preparedness and ML incident response: service levels, impact assessment, post-mortems, Five Whys root-cause debugging, and turning findings into tickets. We also dive into model monitoring and detection (live test sets, small A/B tests, feature drift, data monitoring), observability practices, explainability vs debugging, and a credit-scoring case study to illustrate prioritization. Listen to learn repeatable, human-centered tactics for stakeholder buy-in, model monitoring, ML debugging, and incident response." +topics: +- MLOps +- machine learning +- production +- tools +- communication +dateadded: 2021-08-01 + +duration: PT00H58M19S + +quotableClips: +- name: 'Episode Introduction: Humans in the Loop — MLOps & human-centered ML' + startOffset: 0 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=0 + endOffset: 209 +- name: 'Guest Career Path: Lina Weichbrodt — business to ML engineering' + startOffset: 209 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=209 + endOffset: 290 +- name: 'Project Intake Checklist: business case, KPIs, and alternative solutions' + startOffset: 290 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=290 + endOffset: 583 +- name: 'Evaluate AI Necessity: quantify alternatives before modeling' + startOffset: 583 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=583 + endOffset: 626 +- name: 'Problem Scoping: make business problems specific and UI-visible' + startOffset: 626 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=626 + endOffset: 742 +- name: 'Stakeholder Engagement: pairing, availability, and buy-in' + startOffset: 742 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=742 + endOffset: 827 +- name: 'Communicating Across Teams: translating technical and business language' + startOffset: 827 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=827 + endOffset: 907 +- name: 'Trust Building: domain understanding and helping with data issues' + startOffset: 907 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=907 + endOffset: 1109 +- name: 'Addressing Concerns: convert stakeholder fears into mitigations and metrics' + startOffset: 1109 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1109 + endOffset: 1356 +- name: 'Demos vs Reporting: what stakeholders need to believe the solution works' + startOffset: 1356 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1356 + endOffset: 1474 +- name: 'Incident Preparedness: service levels and impact assessment with stakeholders' + startOffset: 1474 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1474 + endOffset: 1634 +- name: 'ML Incident Response: post-mortems and ML-specific recovery steps' + startOffset: 1634 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1634 + endOffset: 1763 +- name: Live Test Sets & Small A/B Tests for model monitoring and detection + startOffset: 1763 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1763 + endOffset: 1931 +- name: 'Root-Cause Debugging: applying Five Whys to ML product issues' + startOffset: 1931 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1931 + endOffset: 2201 +- name: 'User Feedback Channels: internal bug reports and product QA processes' + startOffset: 2201 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2201 + endOffset: 2232 +- name: 'Case Study: credit scoring surprises and interpreting feature importance' + startOffset: 2232 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2232 + endOffset: 2300 +- name: 'Prioritizing Bugs: investigating widespread user complaints' + startOffset: 2300 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2300 + endOffset: 2366 +- name: 'Post-Mortem Evidence: facts, blameless analysis, and investigation steps' + startOffset: 2366 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2366 + endOffset: 2523 +- name: 'Action Items: turning post-mortems into tickets and process changes' + startOffset: 2523 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2523 + endOffset: 2651 +- name: 'Explainability vs Debugging: when to use Explainable AI tools' + startOffset: 2651 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2651 + endOffset: 2788 +- name: 'Data Monitoring: input distribution, unit changes, and feature drift' + startOffset: 2788 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2788 + endOffset: 2840 +- name: 'Project Evaluation Tools: AI Canvas and online checklists' + startOffset: 2840 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2840 + endOffset: 2968 +- name: 'Observability Practices: logging features, feature stores, and reproducibility' + startOffset: 2968 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2968 + endOffset: 3030 +- name: 'End-User Research: mystery shopping and direct user testing' + startOffset: 3030 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3030 + endOffset: 3159 +- name: 'Idea Sourcing: proposing ML projects vs refining stakeholder problems' + startOffset: 3159 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3159 + endOffset: 3289 +- name: 'Data Literacy: educating teams and community building inside companies' + startOffset: 3289 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3289 + endOffset: 3388 +- name: 'People Skills & Tactical Hacks: convincing stakeholders and improving data + quality' + startOffset: 3388 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3388 + endOffset: 3566 +- name: 'Wrap-Up & Contact: where to find Lina and episode closing' + startOffset: 3566 + url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3566 + endOffset: 3499 + transcript: - header: 'Episode Introduction: Humans in the Loop — MLOps & human-centered ML' - line: Today, we will talk about the human aspect in ML Ops. We have a special guest @@ -213,7 +340,7 @@ transcript: sec: 729 time: '12:09' who: Lina -- header: 'Stakeholder Engagement: pairing, availability, and buy‑in' +- header: 'Stakeholder Engagement: pairing, availability, and buy-in' - line: This is before you even start doing anything, right? You have an idea about something cool, you sit down, and you spend some time in front of a Google document or Word document, or maybe just a notepad. You try to write everything down, you @@ -469,7 +596,7 @@ transcript: sec: 1615 time: '26:55' who: Lina -- header: 'ML Incident Response: post‑mortems and ML‑specific recovery steps' +- header: 'ML Incident Response: post-mortems and ML-specific recovery steps' - line: Let's say we agreed with everyone on this, and we say, “Okay, the system should be responsive within one hour. If something happens for 10 minutes, nothing bad happens, but it would come back in one hour.” So you will define all these service @@ -538,7 +665,7 @@ transcript: sec: 1928 time: '32:08' who: Alexey -- header: 'Root‑Cause Debugging: applying Five Whys to ML product issues' +- header: 'Root-Cause Debugging: applying Five Whys to ML product issues' - line: I thought that he must have. So let's use the post mortem format to debug this Okay. It's the ‘last seen’ box – some of my colleagues spend some time debugging the problem, not noticing it's not recommendation box. First thing, apply the @@ -667,7 +794,7 @@ transcript: sec: 2354 time: '39:14' who: Lina -- header: 'Post‑Mortem Evidence: facts, blameless analysis, and investigation steps' +- header: 'Post-Mortem Evidence: facts, blameless analysis, and investigation steps' - line: I wanted to ask you a bit about this ‘post mortem’ format. We also have a question in chat. What does the format look like? I think one thing that you mentioned is that you need to ask the “five why's” – you don't jump to conclusions immediately. @@ -704,7 +831,7 @@ transcript: sec: 2508 time: '41:48' who: Alexey -- header: 'Action Items: turning post‑mortems into tickets and process changes' +- header: 'Action Items: turning post-mortems into tickets and process changes' - line: First you get the facts. If it's a backend service, it's likely “The service was down from that time to that time.” As in our women’s bag example, it might be a screenshot, or it might be return values. We put all the factual information @@ -870,7 +997,7 @@ transcript: sec: 3019 time: '50:19' who: Alexey -- header: 'End‑User Research: mystery shopping and direct user testing' +- header: 'End-User Research: mystery shopping and direct user testing' - line: It depends on what project I'm working on. I do talk to end users in some cases. I also do mystery shopping. Mystery shopping is basically when you go through the process yourself. I was optimizing the credit process application in my current @@ -1058,7 +1185,7 @@ transcript: sec: 3557 time: '59:17' who: Alexey -- header: 'Wrap‑Up & Contact: where to find Lina and episode closing' +- header: 'Wrap-Up & Contact: where to find Lina and episode closing' - line: Thank you for having me. And if anyone wants to connect more – I'm hanging out in the MLOps channel sometimes. Also on LinkedIn. Or if anyone wants to write a blog post together or just generally share? Yeah. Look me up. @@ -1087,138 +1214,4 @@ transcript: sec: 3621 time: '1:00:21' who: Lina -description: 'Master human-centered MLOps: actionable stakeholder buy-in tactics, - model monitoring and incident response playbooks to debug and ship reliable ML.' -intro: 'How do you make MLOps human-centered so stakeholders actually trust models - and teams can monitor, debug, and respond to incidents? In this episode, Lina Weichbrodt - — a generalist machine learning developer who prototypes data-driven products end-to-end - (design, implementation, A/B tests, operations) — walks through practical MLOps - strategies that prioritize people as much as pipelines.

We cover a project - intake checklist (business case, KPIs, alternatives), how to evaluate whether AI - is needed, and scoping problems so outcomes are visible in the UI. Lina explains - stakeholder engagement techniques (pairing, availability, converting fears into - mitigations), demos vs reporting for buy-in, and building trust through domain understanding - and data issue support. You’ll get concrete guidance on incident preparedness and - ML incident response: service levels, impact assessment, post-mortems, Five Whys - root-cause debugging, and turning findings into tickets. We also dive into model - monitoring and detection (live test sets, small A/B tests, feature drift, data monitoring), - observability practices, explainability vs debugging, and a credit-scoring case - study to illustrate prioritization. Listen to learn repeatable, human-centered tactics - for stakeholder buy-in, model monitoring, ML debugging, and incident response.' -dateadded: '2021-08-01' -duration: PT00H58M19S -quotableClips: -- name: 'Episode Introduction: Humans in the Loop — MLOps & human-centered ML' - startOffset: 0 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=0 - endOffset: 209 -- name: 'Guest Career Path: Lina Weichbrodt — business to ML engineering' - startOffset: 209 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=209 - endOffset: 290 -- name: 'Project Intake Checklist: business case, KPIs, and alternative solutions' - startOffset: 290 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=290 - endOffset: 583 -- name: 'Evaluate AI Necessity: quantify alternatives before modeling' - startOffset: 583 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=583 - endOffset: 626 -- name: 'Problem Scoping: make business problems specific and UI-visible' - startOffset: 626 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=626 - endOffset: 742 -- name: 'Stakeholder Engagement: pairing, availability, and buy‑in' - startOffset: 742 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=742 - endOffset: 827 -- name: 'Communicating Across Teams: translating technical and business language' - startOffset: 827 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=827 - endOffset: 907 -- name: 'Trust Building: domain understanding and helping with data issues' - startOffset: 907 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=907 - endOffset: 1109 -- name: 'Addressing Concerns: convert stakeholder fears into mitigations and metrics' - startOffset: 1109 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1109 - endOffset: 1356 -- name: 'Demos vs Reporting: what stakeholders need to believe the solution works' - startOffset: 1356 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1356 - endOffset: 1474 -- name: 'Incident Preparedness: service levels and impact assessment with stakeholders' - startOffset: 1474 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1474 - endOffset: 1634 -- name: 'ML Incident Response: post‑mortems and ML‑specific recovery steps' - startOffset: 1634 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1634 - endOffset: 1763 -- name: Live Test Sets & Small A/B Tests for model monitoring and detection - startOffset: 1763 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1763 - endOffset: 1931 -- name: 'Root‑Cause Debugging: applying Five Whys to ML product issues' - startOffset: 1931 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=1931 - endOffset: 2201 -- name: 'User Feedback Channels: internal bug reports and product QA processes' - startOffset: 2201 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2201 - endOffset: 2232 -- name: 'Case Study: credit scoring surprises and interpreting feature importance' - startOffset: 2232 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2232 - endOffset: 2300 -- name: 'Prioritizing Bugs: investigating widespread user complaints' - startOffset: 2300 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2300 - endOffset: 2366 -- name: 'Post‑Mortem Evidence: facts, blameless analysis, and investigation steps' - startOffset: 2366 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2366 - endOffset: 2523 -- name: 'Action Items: turning post‑mortems into tickets and process changes' - startOffset: 2523 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2523 - endOffset: 2651 -- name: 'Explainability vs Debugging: when to use Explainable AI tools' - startOffset: 2651 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2651 - endOffset: 2788 -- name: 'Data Monitoring: input distribution, unit changes, and feature drift' - startOffset: 2788 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2788 - endOffset: 2840 -- name: 'Project Evaluation Tools: AI Canvas and online checklists' - startOffset: 2840 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2840 - endOffset: 2968 -- name: 'Observability Practices: logging features, feature stores, and reproducibility' - startOffset: 2968 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=2968 - endOffset: 3030 -- name: 'End‑User Research: mystery shopping and direct user testing' - startOffset: 3030 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3030 - endOffset: 3159 -- name: 'Idea Sourcing: proposing ML projects vs refining stakeholder problems' - startOffset: 3159 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3159 - endOffset: 3289 -- name: 'Data Literacy: educating teams and community building inside companies' - startOffset: 3289 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3289 - endOffset: 3388 -- name: 'People Skills & Tactical Hacks: convincing stakeholders and improving data - quality' - startOffset: 3388 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3388 - endOffset: 3566 -- name: 'Wrap‑Up & Contact: where to find Lina and episode closing' - startOffset: 3566 - url: https://www.youtube.com/watch?v=o50j_Ndx2Hg&t=3566 - endOffset: 3499 --- diff --git a/_podcast/s13e08-navigating-industrial-data-challenges.md b/_podcast/industrial-data-small-data-production-machine-learning.md similarity index 97% rename from _podcast/s13e08-navigating-industrial-data-challenges.md rename to _podcast/industrial-data-small-data-production-machine-learning.md index 558ad6ae..88515ff5 100644 --- a/_podcast/s13e08-navigating-industrial-data-challenges.md +++ b/_podcast/industrial-data-small-data-production-machine-learning.md @@ -1,20 +1,152 @@ --- +title: "Master Industrial Data: Synthetic Tabular Data, Small-Data Modeling, Sensors & MLOps" +short: "Navigating Industrial Data Challenges" +season: 13 episode: 8 guests: - rosonaeldred +image: images/podcast/industrial-data-small-data-production-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/Navigating-Industrial-Data-Challenges---Rosona-Eldred-e225aam youtube: rwuud5wr3J4 -image: images/podcast/s13e08-navigating-industrial-data-challenges.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Navigating-Industrial-Data-Challenges---Rosona-Eldred-e225aam apple: https://podcasts.apple.com/us/podcast/navigating-industrial-data-challenges-rosona-eldred/id1541710331?i=1000608992445 spotify: https://open.spotify.com/episode/1o6rtfFydBVoc0ER5ZUiRQ?si=rkgzEFquSfql4Za6cyjX2g youtube: https://www.youtube.com/watch?v=rwuud5wr3J4 -season: 13 -short: Navigating Industrial Data Challenges -title: 'Master Industrial Data: Synthetic Tabular Data, Small-Data Modeling, Sensors - & MLOps' + +description: "Master industrial data: learn synthetic tabular data and small-data modeling for sensors & MLOps—optimize QC, predictive maintenance and deploy models faster." +intro: "How do you build reliable machine learning when your datasets are generated by production lines, tiny R&D campaigns, or long-running quality tests instead of millions of web events? In this episode, Rosona Eldred — a mathematician-turned-machine learning engineer leading synthetic tabular data work in an AI Innovation team — walks us through mastering industrial data, from sensors and traceability to small-data modeling and MLOps trade-offs.

We explore what makes industrial data unique (R&D experiments, pilot plants, full production), concrete process examples like blue-paint scale-up and packing-peanuts manufacturing, and long-term quality tests such as the Florida weathering trial. Rosona breaks down sensor choices, batching and granularity challenges, inline versus destructive quality measurements, and how anomaly detection should feed human decisioning. She also covers regulatory and sustainability tracking, reusing historical experiments for reformulation, proxy metrics, optimization trade-offs, and practical methods for tiny-data problems — statistical techniques, transfer learning, and leveraging domain experts. Finally, she contrasts sparse R&D models with streaming, production-scale MLOps.

Listen to gain concrete strategies for synthetic tabular data, small-data modeling, sensor-driven monitoring, and when to adopt production MLOps versus lightweight R&D workflows" +topics: +- industrial data +- synthetic tabular data +- MLOps +dateadded: 2023-04-16 + +duration: PT01H01M28S + +quotableClips: +- name: 'Episode Intro: Guest Overview & Synthetic Tabular Data Focus' + startOffset: 83 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=83 + endOffset: 158 +- name: 'Career Pivot: From PhD Algebraic Topology to Industry' + startOffset: 158 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=158 + endOffset: 352 +- name: 'Academic Roots: 3D Topological Models and Research Background' + startOffset: 352 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=352 + endOffset: 468 +- name: 'Mathematical Mindset: Logical Reasoning, Proof-Style Thinking for Data' + startOffset: 468 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=468 + endOffset: 571 +- name: 'Transition Challenges: Seniority vs Domain Experience in Industry' + startOffset: 571 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=571 + endOffset: 645 +- name: 'Defining Industrial Data: Production-Generated Datasets Explained' + startOffset: 645 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=645 + endOffset: 743 +- name: 'Industrial Data Spectrum: R&D Experiments, Pilot Plants, Full Production' + startOffset: 743 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=743 + endOffset: 910 +- name: 'Process Example: Blue Paint R&D, Automation, and Scale-Up' + startOffset: 910 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=910 + endOffset: 968 +- name: 'Long-Term Quality Testing: Weathering & the Florida Paint Test' + startOffset: 968 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=968 + endOffset: 1049 +- name: 'Industrial vs Internet Data: Fixed Sensors and Heterogeneous Equipment' + startOffset: 1049 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1049 + endOffset: 1122 +- name: 'Process Illustration: Packing Peanuts Production and Sensor Choices' + startOffset: 1122 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1122 + endOffset: 1337 +- name: 'Data Granularity & Traceability: Batching, Mixing, and Coarseness Challenges' + startOffset: 1337 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1337 + endOffset: 1493 +- name: 'Business Use Cases: Quality Control, Predictive Maintenance, Monitoring' + startOffset: 1493 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1493 + endOffset: 1657 +- name: 'Quality Measurement Methods: Inline Monitoring vs Destructive Tests' + startOffset: 1657 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1657 + endOffset: 1734 +- name: 'From Alerts to Action: Anomaly Detection and Human Decisioning' + startOffset: 1734 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1734 + endOffset: 1870 +- name: 'Regulatory & Sustainability Tracking: New Requirements and Data Gaps' + startOffset: 1870 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1870 + endOffset: 2135 +- name: 'Tiny Data R&D: Reformulation and Experimental Design After Regulation' + startOffset: 2135 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2135 + endOffset: 2300 +- name: 'Reusing Historical Experiments: Informing Product Redevelopment' + startOffset: 2300 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2300 + endOffset: 2340 +- name: 'Industrial Data Types: Ingredients, Spectra, Material Properties, Tests' + startOffset: 2340 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2340 + endOffset: 2508 +- name: 'Proxy Metrics & Application Tests: Measuring End-Product Behavior' + startOffset: 2508 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2508 + endOffset: 2686 +- name: 'Optimization Problems: Logistics, Mathematical Solvers, Trade-offs' + startOffset: 2686 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2686 + endOffset: 2961 +- name: 'Modeling Small Data: Statistical Methods, Transfer Learning, Domain Experts' + startOffset: 2961 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2961 + endOffset: 3044 +- name: 'MLOps Fit: Sparse R&D Models vs High-Volume Production Deployments' + startOffset: 3044 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3044 + endOffset: 3123 +- name: 'Production-Scale Data: Streaming, Big Data Processing, Real-Time Alerts' + startOffset: 3123 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3123 + endOffset: 3250 +- name: 'Domain Knowledge Value: Tacit Expertise Beyond the CSV' + startOffset: 3250 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3250 + endOffset: 3344 +- name: 'Collaborative Workflow: EDA, Definitions, and Aligning Measurements' + startOffset: 3344 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3344 + endOffset: 3426 +- name: 'Learning Resources: Sensor Datasets and Semiconductor Anomaly Repos' + startOffset: 3426 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3426 + endOffset: 3545 +- name: 'Career Motivation: Choosing Industry Over Academia' + startOffset: 3545 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3545 + endOffset: 3640 +- name: 'Industry Work Culture: Shop Floor Interactions and Research Flavor' + startOffset: 3640 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3640 + endOffset: 3750 +- name: 'Conclusion: Key Takeaways and Next Steps' + startOffset: 3750 + url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3750 + endOffset: 3688 + transcript: - header: 'Episode Intro: Guest Overview & Synthetic Tabular Data Focus' - line: This week we'll talk about industrial data challenges. We have a special guest @@ -1509,150 +1641,6 @@ transcript: sec: 3771 time: '1:02:51' who: Rosona -description: 'Master industrial data: learn synthetic tabular data and small-data - modeling for sensors & MLOps—optimize QC, predictive maintenance and deploy models - faster.' -intro: How do you build reliable machine learning when your datasets are generated - by production lines, tiny R&D campaigns, or long-running quality tests instead of - millions of web events? In this episode, Rosona Eldred — a mathematician-turned-machine - learning engineer leading synthetic tabular data work in an AI Innovation team — - walks us through mastering industrial data, from sensors and traceability to small-data - modeling and MLOps trade-offs.

We explore what makes industrial data unique - (R&D experiments, pilot plants, full production), concrete process examples like - blue-paint scale-up and packing-peanuts manufacturing, and long-term quality tests - such as the Florida weathering trial. Rosona breaks down sensor choices, batching - and granularity challenges, inline versus destructive quality measurements, and - how anomaly detection should feed human decisioning. She also covers regulatory - and sustainability tracking, reusing historical experiments for reformulation, proxy - metrics, optimization trade-offs, and practical methods for tiny-data problems — - statistical techniques, transfer learning, and leveraging domain experts. Finally, - she contrasts sparse R&D models with streaming, production-scale MLOps.

- Listen to gain concrete strategies for synthetic tabular data, small-data modeling, - sensor-driven monitoring, and when to adopt production MLOps versus lightweight - R&D workflows. -dateadded: '2023-04-16' -duration: PT01H01M28S -quotableClips: -- name: 'Episode Intro: Guest Overview & Synthetic Tabular Data Focus' - startOffset: 83 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=83 - endOffset: 158 -- name: 'Career Pivot: From PhD Algebraic Topology to Industry' - startOffset: 158 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=158 - endOffset: 352 -- name: 'Academic Roots: 3D Topological Models and Research Background' - startOffset: 352 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=352 - endOffset: 468 -- name: 'Mathematical Mindset: Logical Reasoning, Proof-Style Thinking for Data' - startOffset: 468 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=468 - endOffset: 571 -- name: 'Transition Challenges: Seniority vs Domain Experience in Industry' - startOffset: 571 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=571 - endOffset: 645 -- name: 'Defining Industrial Data: Production-Generated Datasets Explained' - startOffset: 645 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=645 - endOffset: 743 -- name: 'Industrial Data Spectrum: R&D Experiments, Pilot Plants, Full Production' - startOffset: 743 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=743 - endOffset: 910 -- name: 'Process Example: Blue Paint R&D, Automation, and Scale-Up' - startOffset: 910 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=910 - endOffset: 968 -- name: 'Long-Term Quality Testing: Weathering & the Florida Paint Test' - startOffset: 968 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=968 - endOffset: 1049 -- name: 'Industrial vs Internet Data: Fixed Sensors and Heterogeneous Equipment' - startOffset: 1049 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1049 - endOffset: 1122 -- name: 'Process Illustration: Packing Peanuts Production and Sensor Choices' - startOffset: 1122 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1122 - endOffset: 1337 -- name: 'Data Granularity & Traceability: Batching, Mixing, and Coarseness Challenges' - startOffset: 1337 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1337 - endOffset: 1493 -- name: 'Business Use Cases: Quality Control, Predictive Maintenance, Monitoring' - startOffset: 1493 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1493 - endOffset: 1657 -- name: 'Quality Measurement Methods: Inline Monitoring vs Destructive Tests' - startOffset: 1657 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1657 - endOffset: 1734 -- name: 'From Alerts to Action: Anomaly Detection and Human Decisioning' - startOffset: 1734 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1734 - endOffset: 1870 -- name: 'Regulatory & Sustainability Tracking: New Requirements and Data Gaps' - startOffset: 1870 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=1870 - endOffset: 2135 -- name: 'Tiny Data R&D: Reformulation and Experimental Design After Regulation' - startOffset: 2135 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2135 - endOffset: 2300 -- name: 'Reusing Historical Experiments: Informing Product Redevelopment' - startOffset: 2300 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2300 - endOffset: 2340 -- name: 'Industrial Data Types: Ingredients, Spectra, Material Properties, Tests' - startOffset: 2340 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2340 - endOffset: 2508 -- name: 'Proxy Metrics & Application Tests: Measuring End-Product Behavior' - startOffset: 2508 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2508 - endOffset: 2686 -- name: 'Optimization Problems: Logistics, Mathematical Solvers, Trade-offs' - startOffset: 2686 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2686 - endOffset: 2961 -- name: 'Modeling Small Data: Statistical Methods, Transfer Learning, Domain Experts' - startOffset: 2961 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=2961 - endOffset: 3044 -- name: 'MLOps Fit: Sparse R&D Models vs High-Volume Production Deployments' - startOffset: 3044 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3044 - endOffset: 3123 -- name: 'Production-Scale Data: Streaming, Big Data Processing, Real-Time Alerts' - startOffset: 3123 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3123 - endOffset: 3250 -- name: 'Domain Knowledge Value: Tacit Expertise Beyond the CSV' - startOffset: 3250 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3250 - endOffset: 3344 -- name: 'Collaborative Workflow: EDA, Definitions, and Aligning Measurements' - startOffset: 3344 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3344 - endOffset: 3426 -- name: 'Learning Resources: Sensor Datasets and Semiconductor Anomaly Repos' - startOffset: 3426 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3426 - endOffset: 3545 -- name: 'Career Motivation: Choosing Industry Over Academia' - startOffset: 3545 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3545 - endOffset: 3640 -- name: 'Industry Work Culture: Shop Floor Interactions and Research Flavor' - startOffset: 3640 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3640 - endOffset: 3750 -- name: 'Conclusion: Key Takeaways and Next Steps' - startOffset: 3750 - url: https://www.youtube.com/watch?v=rwuud5wr3J4&t=3750 - endOffset: 3688 --- Links: diff --git a/_podcast/s16e07-cracking-code-machine-learning-made-understandable.md b/_podcast/interpretable-machine-learning.md similarity index 94% rename from _podcast/s16e07-cracking-code-machine-learning-made-understandable.md rename to _podcast/interpretable-machine-learning.md index b13537c7..4ba2fea3 100644 --- a/_podcast/s16e07-cracking-code-machine-learning-made-understandable.md +++ b/_podcast/interpretable-machine-learning.md @@ -1,20 +1,132 @@ --- +title: "Interpretable Machine Learning: SHAP, Conformal Prediction and Model Trust" +season: 16 episode: 7 guests: - christophmolnar +image: images/podcast/interpretable-machine-learning.jpg ids: - anchor: atatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 + anchor: datatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 youtube: LBuGzyOkx7c -image: images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Cracking-the-Code-Machine-Learning-Made-Understandable---Christoph-Molnar-e2c10n4 apple: https://podcasts.apple.com/us/podcast/cracking-the-code-machine-learning-made/id1541710331?i=1000636448000 spotify: https://open.spotify.com/episode/3SjDB0E2of9IS9TXn2Fof3?si=FwWH99FGTgmL1OGI3-sLAg youtube: https://www.youtube.com/watch?v=LBuGzyOkx7c -season: 16 -short: 'Cracking the Code: Machine Learning Made Understandable' -title: 'Interpretable ML & Technical Writing: SHAP, Conformal Prediction, Python & - Self-Publishing' +description: "Discover interpretable machine learning: learn SHAP, Conformal Prediction, calibrated uncertainty and model trust to debug models and boost reliability." + +topics: +- machine learning +- data science +- practices +- tools +- career transition +- interpretability +intro: "How can you reliably trust a machine learning model’s predictions in real-world settings? In this episode Christoph Molnar — statistician, machine learner, and author of Interpretable Machine Learning — walks through practical approaches for building model trust. Drawing on his experience from Kaggle competitions to authoring a technical book, Christoph explains the trade-offs between interpretability and accuracy and shows how interpretability techniques help debug models.

Key topics include a SHAP deep dive with practical Python examples for attributing predictions, conformal prediction for calibrated uncertainty and creating prediction sets, and the difference between explainable AI and interpretable machine learning. He also discusses using interpretability to debug models, maintain hands-on skills through competitions, and document experiments for reproducible insights.

If you want concrete tools to evaluate model trust—how to quantify uncertainty, interpret feature effects with SHAP, and produce reliable prediction sets with conformal methods—this episode offers clear, actionable guidance and directions for further reading. Ideal for data scientists and ML practitioners focused on interpretable machine learning, model debugging, and trustworthy AI." +dateadded: 2023-11-27 +duration: PT00H56M20S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=0 + endOffset: 42 +- name: 'Guest Intro: Christoph Molnar, Interpretable ML Author' + startOffset: 42 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=42 + endOffset: 92 +- name: 'Career Journey: From Statistics to Tech Writing' + startOffset: 92 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=92 + endOffset: 225 +- name: Becoming a Full-Time Technical Writer + startOffset: 225 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=225 + endOffset: 397 +- name: 'Kaggle Beginnings: Linear Models to Practical ML' + startOffset: 397 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=397 + endOffset: 470 +- name: 'Origin Story: Interest in Interpretable Machine Learning' + startOffset: 470 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=470 + endOffset: 567 +- name: 'Interpretability vs Accuracy: Debugging Models with SHAP' + startOffset: 567 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=567 + endOffset: 719 +- name: 'Active Competition: River Flow Forecasting Project' + startOffset: 719 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=719 + endOffset: 837 +- name: 'Choosing Book Topics: Audience Data and Personal Curiosity' + startOffset: 837 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=837 + endOffset: 955 +- name: 'Publishing in Public: Chapter-by-Chapter Workflow' + startOffset: 955 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=955 + endOffset: 1027 +- name: 'Self-Publishing vs Publishers: Control, Editors, Royalties' + startOffset: 1027 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1027 + endOffset: 1138 +- name: 'Book Overview: Interpretable ML; Modeling Mindsets; Conformal Prediction; + SHAP' + startOffset: 1138 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1138 + endOffset: 1227 +- name: 'Conformal Prediction: Calibrated Uncertainty and Prediction Sets' + startOffset: 1227 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1227 + endOffset: 1424 +- name: 'SHAP Deep Dive: Practical Guide and Python Examples' + startOffset: 1424 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1424 + endOffset: 1577 +- name: 'Terminology: Explainable AI vs Interpretable Machine Learning' + startOffset: 1577 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1577 + endOffset: 1800 +- name: 'Work Style: Solo Writing, Collaboration, and Co-authoring' + startOffset: 1800 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1800 + endOffset: 1987 +- name: 'Staying Hands-On: Competitions to Maintain Practical Skills' + startOffset: 1987 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1987 + endOffset: 2181 +- name: 'Logbook Practice: Obsidian Notes for Experiments and Reflection' + startOffset: 2181 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2181 + endOffset: 2541 +- name: 'Writing Expertise: Teaching to Learn vs Being a Beginner' + startOffset: 2541 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2541 + endOffset: 2691 +- name: 'Feedback Strategy: Open Drafts, Beta Readers, and Iteration' + startOffset: 2691 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2691 + endOffset: 2916 +- name: 'Advice for Aspiring Technical Writers: Start Small and Publish' + startOffset: 2916 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2916 + endOffset: 3000 +- name: 'Becoming a Full-Time Author: Timeframe, Income, and Workload' + startOffset: 3000 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3000 + endOffset: 3229 +- name: 'Publishing Logistics: Leanpub, Amazon KDP, and Print-on-Demand' + startOffset: 3229 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3229 + endOffset: 3376 +- name: 'Where to Find Christoph: Website, Newsletter, and Socials' + startOffset: 3376 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3376 + endOffset: 3413 +- name: Closing Remarks and Episode Wrap-Up + startOffset: 3413 + url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3413 + endOffset: 3380 transcript: - header: Podcast Introduction - header: 'Guest Intro: Christoph Molnar, Interpretable ML Author' @@ -92,7 +204,7 @@ transcript: sec: 208 time: '3:28' who: Alexey -- header: Becoming a Full‑Time Technical Writer +- header: Becoming a Full-Time Technical Writer - line: '[chuckles] Yeah. And I also didn''t feel so free when writing the Master''s thesis. But when I started the book, I wrote it in the open, got feedback, and could just write how I felt. I [could] put in some jokes and not hide behind math @@ -312,7 +424,7 @@ transcript: sec: 933 time: '15:33' who: Alexey -- header: 'Publishing in Public: Chapter‑by‑Chapter Workflow' +- header: 'Publishing in Public: Chapter-by-Chapter Workflow' - line: Yeah, good question. For me, I already did some blogging before, but I always quit after a few months. But I already had a little bit of experience with writing more freely. With the book, it wasn't like, “Hide in my room for two years, and @@ -327,7 +439,7 @@ transcript: sec: 955 time: '15:55' who: Christoph -- header: 'Self‑Publishing vs Publishers: Control, Editors, Royalties' +- header: 'Self-Publishing vs Publishers: Control, Editors, Royalties' - line: But also, as we spoke briefly at the beginning, you don't have a publisher – you published on your own, right? [Christoph agrees] This requires a lot of self-discipline. If you don't publish a chapter, in the case with a publisher, @@ -583,7 +695,7 @@ transcript: sec: 1743 time: '29:03' who: Christoph -- header: 'Work Style: Solo Writing, Collaboration, and Co‑authoring' +- header: 'Work Style: Solo Writing, Collaboration, and Co-authoring' - line: Okay. Now you’ve been working as a technical book writer for a year, right? Do you feel lonely when you just write by yourself? Do you miss colleagues? sec: 1800 @@ -634,7 +746,7 @@ transcript: sec: 1901 time: '31:41' who: Christoph -- header: 'Staying Hands‑On: Competitions to Maintain Practical Skills' +- header: 'Staying Hands-On: Competitions to Maintain Practical Skills' - line: So you said you would write it faster because your co-author has other commitments. Then you also don't invest as much time as you could potentially because you probably… What I'm trying to say is that you probably still have time [left over]. So are @@ -1009,7 +1121,7 @@ transcript: sec: 2964 time: '49:24' who: Christoph -- header: 'Becoming a Full‑Time Author: Timeframe, Income, and Workload' +- header: 'Becoming a Full-Time Author: Timeframe, Income, and Workload' - line: Maybe also the question is, “How can I be a full-time technical author? What are my steps?” sec: 3000 @@ -1094,7 +1206,7 @@ transcript: sec: 3202 time: '53:22' who: Christoph -- header: 'Publishing Logistics: Leanpub, Amazon KDP, and Print‑on‑Demand' +- header: 'Publishing Logistics: Leanpub, Amazon KDP, and Print-on-Demand' - line: What do you use for publishing? Because you also have physical copies. I know there are websites where you can sell digital products – PDFs, videos, whatever. But you have physical books. @@ -1190,7 +1302,7 @@ transcript: sec: 3405 time: '56:45' who: Christoph -- header: Closing Remarks and Episode Wrap‑Up +- header: Closing Remarks and Episode Wrap-Up - line: Yeah, thanks. Indeed, it was nice. Unfortunately, this is all the time we have for today. sec: 3413 @@ -1206,128 +1318,21 @@ transcript: sec: 3422 time: '57:02' who: Alexey -description: Discover Interpretable ML, SHAP and Conformal Prediction with Python - examples and self-publishing tips, debug models, calibrate uncertainty, and publish. -intro: How can we make machine learning interpretable in practice — and how do you - turn that expertise into clear, usable technical writing? In this episode, Christoph - Molnar, statistician, machine learner, and author of Interpretable ML, walks through - the tools and workflows he uses to answer that question.

Christoph traces - his path from statistics and Kaggle competitions to becoming a full‑time technical - writer, and drills into core topics like SHAP for debugging models, conformal prediction - for calibrated uncertainty and prediction sets, and practical Python examples. We - also cover interpretability vs. accuracy, terminology around explainable AI, and - keeping skills sharp through competitions and an Obsidian logbook.

On the - writing side, Christoph explains his chapter‑by‑chapter “publishing in public” workflow, - self‑publishing choices (Leanpub, Amazon KDP, print‑on‑demand), feedback strategies - with beta readers, and advice for aspiring technical writers. Listen for actionable - guidance on applying interpretable machine learning techniques and concrete steps - for turning technical work into publishable, useful content. -dateadded: '2023-11-27' -duration: PT00H56M20S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=0 - endOffset: 42 -- name: 'Guest Intro: Christoph Molnar, Interpretable ML Author' - startOffset: 42 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=42 - endOffset: 92 -- name: 'Career Journey: From Statistics to Tech Writing' - startOffset: 92 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=92 - endOffset: 225 -- name: Becoming a Full‑Time Technical Writer - startOffset: 225 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=225 - endOffset: 397 -- name: 'Kaggle Beginnings: Linear Models to Practical ML' - startOffset: 397 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=397 - endOffset: 470 -- name: 'Origin Story: Interest in Interpretable Machine Learning' - startOffset: 470 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=470 - endOffset: 567 -- name: 'Interpretability vs Accuracy: Debugging Models with SHAP' - startOffset: 567 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=567 - endOffset: 719 -- name: 'Active Competition: River Flow Forecasting Project' - startOffset: 719 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=719 - endOffset: 837 -- name: 'Choosing Book Topics: Audience Data and Personal Curiosity' - startOffset: 837 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=837 - endOffset: 955 -- name: 'Publishing in Public: Chapter‑by‑Chapter Workflow' - startOffset: 955 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=955 - endOffset: 1027 -- name: 'Self‑Publishing vs Publishers: Control, Editors, Royalties' - startOffset: 1027 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1027 - endOffset: 1138 -- name: 'Book Overview: Interpretable ML; Modeling Mindsets; Conformal Prediction; - SHAP' - startOffset: 1138 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1138 - endOffset: 1227 -- name: 'Conformal Prediction: Calibrated Uncertainty and Prediction Sets' - startOffset: 1227 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1227 - endOffset: 1424 -- name: 'SHAP Deep Dive: Practical Guide and Python Examples' - startOffset: 1424 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1424 - endOffset: 1577 -- name: 'Terminology: Explainable AI vs Interpretable Machine Learning' - startOffset: 1577 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1577 - endOffset: 1800 -- name: 'Work Style: Solo Writing, Collaboration, and Co‑authoring' - startOffset: 1800 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1800 - endOffset: 1987 -- name: 'Staying Hands‑On: Competitions to Maintain Practical Skills' - startOffset: 1987 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=1987 - endOffset: 2181 -- name: 'Logbook Practice: Obsidian Notes for Experiments and Reflection' - startOffset: 2181 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2181 - endOffset: 2541 -- name: 'Writing Expertise: Teaching to Learn vs Being a Beginner' - startOffset: 2541 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2541 - endOffset: 2691 -- name: 'Feedback Strategy: Open Drafts, Beta Readers, and Iteration' - startOffset: 2691 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2691 - endOffset: 2916 -- name: 'Advice for Aspiring Technical Writers: Start Small and Publish' - startOffset: 2916 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=2916 - endOffset: 3000 -- name: 'Becoming a Full‑Time Author: Timeframe, Income, and Workload' - startOffset: 3000 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3000 - endOffset: 3229 -- name: 'Publishing Logistics: Leanpub, Amazon KDP, and Print‑on‑Demand' - startOffset: 3229 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3229 - endOffset: 3376 -- name: 'Where to Find Christoph: Website, Newsletter, and Socials' - startOffset: 3376 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3376 - endOffset: 3413 -- name: Closing Remarks and Episode Wrap‑Up - startOffset: 3413 - url: https://www.youtube.com/watch?v=LBuGzyOkx7c&t=3413 - endOffset: 3380 ---- +context: 'Context: Christoph Molnar’s journey from statistician and Kaggle competitor + to full-time technical author frames a consistent practice: hands-on modeling, careful + documentation, and public, iterative teaching about interpretable machine learning + techniques (SHAP, conformal prediction, etc.), plus the practical mechanics of publishing + and staying current. + Core narrative: At the episode’s center is the idea that trustworthy, useful machine + learning emerges not from opaque accuracy chasing but from a disciplined loop of + hands-on experimentation, clear interpretation, and open communication — using interpretable + methods and calibrated uncertainty to debug and understand models, keeping meticulous + logs and competitions to stay sharp, and publishing incrementally (with feedback + and transparency) to teach others while refining your own understanding. This unified + through-line ties together the technical tools, the writing and publishing choices, + and the everyday workflows that make complex ML accessible, reproducible, and actionable.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/christoph-molnar/){:target="_blank"} diff --git a/_podcast/s15e02-investing-in-open-source-data-tools.md b/_podcast/investing-in-open-source-developer-tools.md similarity index 96% rename from _podcast/s15e02-investing-in-open-source-data-tools.md rename to _podcast/investing-in-open-source-developer-tools.md index eb46496d..33e3d400 100644 --- a/_podcast/s15e02-investing-in-open-source-data-tools.md +++ b/_podcast/investing-in-open-source-developer-tools.md @@ -1,21 +1,141 @@ --- +title: "Early-Stage Investing in Open Source Developer Tools: Deal Sourcing, Due Diligence & Commercialization Models" +short: "Investing in Open-Source Data Tools" +season: 15 episode: 2 guests: - belawiertz -date: 2025-11-07 +image: images/podcast/investing-in-open-source-developer-tools.jpg ids: - anchor: atatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 + anchor: datatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 youtube: 7Bg1JQLnCao -image: images/podcast/s15e02-investing-in-open-source-data-tools.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Investing-in-Open-Source-Data-Tools---Bela-Wiertz-e274dr8 apple: https://podcasts.apple.com/us/podcast/investing-in-open-source-data-tools-bela-wiertz/id1541710331?i=1000621912675 spotify: https://open.spotify.com/episode/6mHnZ3IswczK46UP3MBp4d?si=KkrbjXmJSaiWbq3d9BzTUQ youtube: https://www.youtube.com/watch?v=7Bg1JQLnCao -season: 15 -short: Investing in Open-Source Data Tools -title: 'Investing in Open Source Developer, Data & AI Tooling: Go-to-Market, Funding - & Monetization' + +description: "Discover early-stage investing in open-source developer tools: deal sourcing, due diligence, and commercialization models for data, AI & developer tooling startups." +intro: "How do early-stage investors evaluate open-source developer tools — and what signals actually predict commercial success? In this episode, Bela Wiertz — who invests in early-stage open-source startups at a German family office focused on Data, AI & Developer Tooling — breaks down the investor playbook for sourcing, evaluating, and funding OSS companies. Drawing from hands-on deal flow and due diligence experience, Bela reveals how investors screen GitHub repositories, conduct developer interviews, and assess community engagement beyond vanity metrics like stars.

We explore the mechanics of open-source commercialization: open-core versus hosted services, enterprise licensing models, support revenue limitations, and why community-driven distribution creates unique investment opportunities. Bela explains practical due diligence techniques (co-investor reference checks, user adoption analysis, founder-market fit), funding stage dynamics from angel to seed rounds, and geographic investment patterns in European OSS startups. Real-world case studies include Hugging Face's AI ecosystem play, Supabase's Firebase alternative, Kong's API gateway monetization, and Qdrant's vector database positioning.

Listen to understand how investors think about open-source deal sourcing, what community metrics matter for fundraising, and which monetization models actually scale — essential insights for founders building OSS developer tools and investors evaluating this unique category." +dateadded: 2023-07-23 +topics: +- open source +- tools +- investing +- fundraising +- early-stage startups +duration: PT01H01M26S + +quotableClips: +- name: Episode Start & Welcome + startOffset: 0 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=0 + endOffset: 75 +- name: 'Guest Overview: Bela’s Role at a Family Office' + startOffset: 75 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=75 + endOffset: 160 +- name: 'Career Path: From Business Studies to Open Source Investing' + startOffset: 160 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=160 + endOffset: 337 +- name: 'Commercializing Open Source Communities: Company Builder Insights' + startOffset: 337 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=337 + endOffset: 584 +- name: Why Venture Funding Matters for Early-Stage Startups + startOffset: 584 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=584 + endOffset: 822 +- name: 'Open Source as Go-to-Market: Community Trust and Distribution' + startOffset: 822 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=822 + endOffset: 1000 +- name: 'Bottom-Up Distribution: Developer Adoption Feeding Enterprise Sales' + startOffset: 1000 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1000 + endOffset: 1113 +- name: 'Investment Focus: Early-Stage B2B Developer, Data & AI Tooling' + startOffset: 1113 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1113 + endOffset: 1187 +- name: 'Funding Stage Primer: Angels, Pre-Seed, and Seed Explained' + startOffset: 1187 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1187 + endOffset: 1340 +- name: 'Fundraising Strategy: 12–18 Month Runway & Use of Proceeds' + startOffset: 1340 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1340 + endOffset: 1422 +- name: 'Geographic & Sector Focus: Europe and Developer Stack Niches' + startOffset: 1422 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1422 + endOffset: 1519 +- name: 'Investor Types Compared: Angels, VCs, and Family Offices' + startOffset: 1519 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1519 + endOffset: 1828 +- name: 'Check Sizes & Stage Variability: No One-Size-Fits-All' + startOffset: 1828 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1828 + endOffset: 1951 +- name: 'Investment Criteria: Team, Market Need, and Commercialization Plan' + startOffset: 1951 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1951 + endOffset: 2187 +- name: 'Early-Stage Signals: Assessing Problem Validity over PMF' + startOffset: 2187 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2187 + endOffset: 2239 +- name: 'Due Diligence: Founder Calls, User Interviews, and Co-Investor Checks' + startOffset: 2239 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2239 + endOffset: 2341 +- name: 'Community Metrics: Interpreting GitHub Stars vs. Active Engagement' + startOffset: 2341 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2341 + endOffset: 2538 +- name: 'Sourcing Deal Flow: GitHub Screening, Data Tools, and Networking' + startOffset: 2538 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2538 + endOffset: 2649 +- name: 'Daily Sourcing Routine: Allocating Time for Outbound Discovery' + startOffset: 2649 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2649 + endOffset: 2783 +- name: 'Inbound Outreach: How Founders Should Pitch Investors' + startOffset: 2783 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2783 + endOffset: 2968 +- name: 'Open-Core & Licensing Strategies: Mixing Open and Proprietary Code' + startOffset: 2968 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2968 + endOffset: 3069 +- name: 'Monetization Models: Hosted Services, Enterprise Licenses, Support' + startOffset: 3069 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3069 + endOffset: 3287 +- name: 'Scalability Considerations: Limits of Support-Based Revenue' + startOffset: 3287 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3287 + endOffset: 3333 +- name: 'Open Source Outlook: Paths to Market Leadership and Challenges' + startOffset: 3333 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3333 + endOffset: 3446 +- name: 'Recent Open Source Successes: Hugging Face, Supabase, Kong, Qdrant' + startOffset: 3446 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3446 + endOffset: 3618 +- name: Recommended Reading & Resources on Investing and Community Building + startOffset: 3618 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3618 + endOffset: 3739 +- name: Episode Wrap-Up & Closing Remarks + startOffset: 3739 + url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3739 + endOffset: 3686 + transcript: - header: Episode Start & Welcome - header: 'Guest Overview: Bela’s Role at a Family Office' @@ -1085,135 +1205,6 @@ transcript: sec: 3761 time: '1:02:41' who: Alexey -intro: How do you build a sustainable business around open source developer, data - and AI tooling — and what does it take to fund, commercialize and scale it? In this - episode Bela Wiertz, who works at a German family office investing in VC funds and - early-stage startups with a focus on open-source Data, AI & Developer Tooling, walks - through the practical playbook for founders and investors. Drawing on hands-on sourcing - and evaluation of early-stage open-source companies, Bela covers go-to-market strategies - that leverage community trust and bottom-up developer adoption, the role of venture - funding for angels, pre-seed and seed rounds, and fundraising hygiene like a 12–18 - month runway. We dig into open-core and licensing trade-offs, monetization models - (hosted services, enterprise licenses, support), limits to support-led revenue, - and how to read community metrics — GitHub stars versus active engagement. Bela - also explains sourcing and due diligence techniques (GitHub screening, user interviews, - co-investor checks), geographic and sector focus in Europe, and real-world examples - like Hugging Face, Supabase, Kong and Qdrant. Listen to gain actionable frameworks - for GTM, funding strategy, monetization and early-stage investment signals for open - source developer, data and AI tooling. -description: 'Discover open source go-to-market for developer tooling: funding, monetization - models, community metrics and fundraising tactics to scale early-stage startups.' -dateadded: '2023-07-23' -duration: PT01H01M26S -quotableClips: -- name: Episode Start & Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=0 - endOffset: 75 -- name: 'Guest Overview: Bela’s Role at a Family Office' - startOffset: 75 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=75 - endOffset: 160 -- name: 'Career Path: From Business Studies to Open Source Investing' - startOffset: 160 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=160 - endOffset: 337 -- name: 'Commercializing Open Source Communities: Company Builder Insights' - startOffset: 337 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=337 - endOffset: 584 -- name: Why Venture Funding Matters for Early-Stage Startups - startOffset: 584 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=584 - endOffset: 822 -- name: 'Open Source as Go-to-Market: Community Trust and Distribution' - startOffset: 822 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=822 - endOffset: 1000 -- name: 'Bottom-Up Distribution: Developer Adoption Feeding Enterprise Sales' - startOffset: 1000 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1000 - endOffset: 1113 -- name: 'Investment Focus: Early-Stage B2B Developer, Data & AI Tooling' - startOffset: 1113 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1113 - endOffset: 1187 -- name: 'Funding Stage Primer: Angels, Pre-Seed, and Seed Explained' - startOffset: 1187 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1187 - endOffset: 1340 -- name: 'Fundraising Strategy: 12–18 Month Runway & Use of Proceeds' - startOffset: 1340 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1340 - endOffset: 1422 -- name: 'Geographic & Sector Focus: Europe and Developer Stack Niches' - startOffset: 1422 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1422 - endOffset: 1519 -- name: 'Investor Types Compared: Angels, VCs, and Family Offices' - startOffset: 1519 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1519 - endOffset: 1828 -- name: 'Check Sizes & Stage Variability: No One-Size-Fits-All' - startOffset: 1828 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1828 - endOffset: 1951 -- name: 'Investment Criteria: Team, Market Need, and Commercialization Plan' - startOffset: 1951 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=1951 - endOffset: 2187 -- name: 'Early-Stage Signals: Assessing Problem Validity over PMF' - startOffset: 2187 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2187 - endOffset: 2239 -- name: 'Due Diligence: Founder Calls, User Interviews, and Co-Investor Checks' - startOffset: 2239 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2239 - endOffset: 2341 -- name: 'Community Metrics: Interpreting GitHub Stars vs. Active Engagement' - startOffset: 2341 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2341 - endOffset: 2538 -- name: 'Sourcing Deal Flow: GitHub Screening, Data Tools, and Networking' - startOffset: 2538 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2538 - endOffset: 2649 -- name: 'Daily Sourcing Routine: Allocating Time for Outbound Discovery' - startOffset: 2649 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2649 - endOffset: 2783 -- name: 'Inbound Outreach: How Founders Should Pitch Investors' - startOffset: 2783 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2783 - endOffset: 2968 -- name: 'Open-Core & Licensing Strategies: Mixing Open and Proprietary Code' - startOffset: 2968 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=2968 - endOffset: 3069 -- name: 'Monetization Models: Hosted Services, Enterprise Licenses, Support' - startOffset: 3069 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3069 - endOffset: 3287 -- name: 'Scalability Considerations: Limits of Support-Based Revenue' - startOffset: 3287 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3287 - endOffset: 3333 -- name: 'Open Source Outlook: Paths to Market Leadership and Challenges' - startOffset: 3333 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3333 - endOffset: 3446 -- name: 'Recent Open Source Successes: Hugging Face, Supabase, Kong, Qdrant' - startOffset: 3446 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3446 - endOffset: 3618 -- name: Recommended Reading & Resources on Investing and Community Building - startOffset: 3618 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3618 - endOffset: 3739 -- name: Episode Wrap-Up & Closing Remarks - startOffset: 3739 - url: https://www.youtube.com/watch?v=7Bg1JQLnCao&t=3739 - endOffset: 3686 --- Links: diff --git a/_podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md similarity index 95% rename from _podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md rename to _podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md index c6d64488..84f6e67e 100644 --- a/_podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.md +++ b/_podcast/job-search-strategy-in-tech-projects-skills-cv-networking.md @@ -1,20 +1,138 @@ --- +title: "Tech Job Search Strategy: Portfolio Projects, Resume Tips and Networking" +short: "Accelerating The Job Hunt for The Perfect Job in Tech" +season: 17 episode: 6 guests: - sarahmestiri +image: images/podcast/job-search-strategy-in-tech-projects-skills-cv-networking.jpg ids: - anchor: atatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 + anchor: datatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 youtube: PchwbIs0tOg -image: images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Accelerating-The-Job-Hunt-for-The-Perfect-Job-in-Tech---Sarah-Mestiri-e2f93r6 apple: https://podcasts.apple.com/us/podcast/accelerating-the-job-hunt-for-the-perfect-job-in/id1541710331?i=1000643971899 spotify: https://open.spotify.com/episode/7giHGC86pjtIYrLOvwP7g4?si=NB9w6S6QTfCBHB_n93LkBQ youtube: https://www.youtube.com/watch?v=PchwbIs0tOg -season: 17 -short: Accelerating The Job Hunt for The Perfect Job in Tech -title: 'Data Science Career Change: 4-Pillar Job Search, Networking & Informational - Interview Guide' +description: "Learn a four-pillar tech job search: build portfolio projects, sharpen your resume and network strategically to land ML/data roles faster with outreach tactics." +topics: +- MLOps +- data engineering +- machine learning +- career transition +- job search +intro: "How do you turn portfolio projects, a sharper resume, and targeted networking into a successful tech job search? In this episode Sarah Mestiri — Data Scientist and Certified Career & Interview Coach with 6+ years in tech across startups, international firms and financial services (FIS) — walks through a practical job search strategy for career changers and return-to-work professionals. Sarah outlines a four-pillar framework (goals, networking, CV, strategy) and shows how to define your ideal role, choose a specialization (ML engineering, data engineering, MLOps), and validate skills through projects versus courses. You’ll hear step-by-step advice on building a top-5 target company list, crafting personalized outreach and informational interview questions, and creating a weekly networking action plan that leverages weak ties and referrals. The episode also covers resume tactics — prioritizing projects, skills, and storytelling — self-research methods, assessments, part-time strategies, and age or career-change considerations. Listen for actionable takeaways: how to build portfolio projects that prove impact, write concise outreach messages, and structure a job search you can maintain — plus recommended resources and follow-up support (links and Slack) to help you execute." +dateadded: 2024-02-03 +duration: PT01H26S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=0 + endOffset: 171 +- name: Guest Introduction & Coaching Mission + startOffset: 171 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=171 + endOffset: 240 +- name: 'Career Path: Computer Science, Full-Stack to Data Science' + startOffset: 240 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=240 + endOffset: 339 +- name: AdTech Experience & Thriving Career Moms Project + startOffset: 339 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=339 + endOffset: 405 +- name: 'Becoming a Career Coach: Community, Mentorship, First Mentees' + startOffset: 405 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=405 + endOffset: 567 +- name: 'Client Profiles: Career Changers and Return-to-Work Support' + startOffset: 567 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=567 + endOffset: 659 +- name: 'Four-Pillar Job Search Framework: Goals, Networking, CV, Strategy' + startOffset: 659 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=659 + endOffset: 870 +- name: 'Defining Your Ideal Role: Tasks, Skills, and Future Vision' + startOffset: 870 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=870 + endOffset: 907 +- name: 'Job Research: Role Analysis and Informational Interviews' + startOffset: 907 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=907 + endOffset: 1072 +- name: 'Choosing a Specialization: ML Engineering, Data Engineering, MLOps' + startOffset: 1072 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1072 + endOffset: 1201 +- name: 'Narrowing Focus: Aligning Skills, Interests, and Market Demand' + startOffset: 1201 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1201 + endOffset: 1588 +- name: 'Courses vs Projects: Validate Skills Through Practical Work' + startOffset: 1588 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1588 + endOffset: 1775 +- name: 'Target Company Selection: Build a Top-5 Company List' + startOffset: 1775 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1775 + endOffset: 1900 +- name: 'Networking Value: Weak Ties, Referrals, and Opportunity Sources' + startOffset: 1900 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1900 + endOffset: 1937 +- name: 'Informational Interview Best Practices: Outreach and Preparation' + startOffset: 1937 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1937 + endOffset: 2058 +- name: 'Crafting Outreach Messages: Personalization and Short Asks' + startOffset: 2058 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2058 + endOffset: 2170 +- name: 'Key Informational Interview Questions: Day-to-Day & Success Factors' + startOffset: 2170 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2170 + endOffset: 2272 +- name: 'Building Mutual Value: Offer Help and Maintain Relationships' + startOffset: 2272 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2272 + endOffset: 2477 +- name: 'Networking Action Plan: Weekly Outreach and Re-engagement' + startOffset: 2477 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2477 + endOffset: 2705 +- name: 'Resume Strategy: Prioritizing Projects, Skills, and Storytelling' + startOffset: 2705 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2705 + endOffset: 2852 +- name: 'Self-Research Methods: Company Analysis and Skill Matching' + startOffset: 2852 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2852 + endOffset: 2958 +- name: 'Strength & Interest Assessments: Gallup, HIGH5, MyNextMove' + startOffset: 2958 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2958 + endOffset: 3008 +- name: 'Part-Time Work Strategy: Timing, Negotiation, and Flexibility' + startOffset: 3008 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3008 + endOffset: 3210 +- name: 'Age and Career Change: Emphasize Results and Transferable Skills' + startOffset: 3210 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3210 + endOffset: 3358 +- name: 'Applying During Courses: Share Learnings and Build Visibility' + startOffset: 3358 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3358 + endOffset: 3626 +- name: 'Recommended Resources: Tests, Books, Podcasts, MyNextMove' + startOffset: 3626 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3626 + endOffset: 3722 +- name: 'Follow-Up & Support: Links, Slack, and Further Questions' + startOffset: 3722 + url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3722 + endOffset: 3626 transcript: - header: Podcast Introduction - header: Guest Introduction & Coaching Mission @@ -1037,135 +1155,16 @@ transcript: sec: 3797 time: '1:03:17' who: Sarah -description: 'Master data science career change with a 4-pillar job search: informational - interviews, resume strategy & specialization tips to land your role.' -intro: 'Facing a career change into data science but unsure how to structure your - job search, networking, and informational interviews? In this episode, Sarah Mestiri - — data scientist and certified career & interview coach with 6+ years in tech (international - companies, FIS, startups) — breaks down a practical Four‑Pillar Job Search Framework: - goals, networking, CV, and strategy. Sarah draws on her transition from full‑stack - engineering to data science and her work supporting women returning to work to show - how to define your ideal role, choose a specialization (ML engineering, data engineering, - MLOps), and validate skills through projects versus courses.

You’ll hear - step‑by‑step guidance on job research and informational interviews: outreach messaging, - key questions to ask, and how to build mutual value and referrals. The episode also - covers resume strategy, weekly networking action plans, target company selection, - part‑time transition tactics, and assessment tools to align strengths and interests. - Listen to gain an actionable job search framework, templates for outreach and interviews, - and resources to accelerate a successful data science career change.' -dateadded: '2024-02-03' -duration: PT01H26S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=0 - endOffset: 171 -- name: Guest Introduction & Coaching Mission - startOffset: 171 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=171 - endOffset: 240 -- name: 'Career Path: Computer Science, Full-Stack to Data Science' - startOffset: 240 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=240 - endOffset: 339 -- name: AdTech Experience & Thriving Career Moms Project - startOffset: 339 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=339 - endOffset: 405 -- name: 'Becoming a Career Coach: Community, Mentorship, First Mentees' - startOffset: 405 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=405 - endOffset: 567 -- name: 'Client Profiles: Career Changers and Return-to-Work Support' - startOffset: 567 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=567 - endOffset: 659 -- name: 'Four-Pillar Job Search Framework: Goals, Networking, CV, Strategy' - startOffset: 659 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=659 - endOffset: 870 -- name: 'Defining Your Ideal Role: Tasks, Skills, and Future Vision' - startOffset: 870 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=870 - endOffset: 907 -- name: 'Job Research: Role Analysis and Informational Interviews' - startOffset: 907 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=907 - endOffset: 1072 -- name: 'Choosing a Specialization: ML Engineering, Data Engineering, MLOps' - startOffset: 1072 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1072 - endOffset: 1201 -- name: 'Narrowing Focus: Aligning Skills, Interests, and Market Demand' - startOffset: 1201 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1201 - endOffset: 1588 -- name: 'Courses vs Projects: Validate Skills Through Practical Work' - startOffset: 1588 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1588 - endOffset: 1775 -- name: 'Target Company Selection: Build a Top-5 Company List' - startOffset: 1775 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1775 - endOffset: 1900 -- name: 'Networking Value: Weak Ties, Referrals, and Opportunity Sources' - startOffset: 1900 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1900 - endOffset: 1937 -- name: 'Informational Interview Best Practices: Outreach and Preparation' - startOffset: 1937 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=1937 - endOffset: 2058 -- name: 'Crafting Outreach Messages: Personalization and Short Asks' - startOffset: 2058 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2058 - endOffset: 2170 -- name: 'Key Informational Interview Questions: Day-to-Day & Success Factors' - startOffset: 2170 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2170 - endOffset: 2272 -- name: 'Building Mutual Value: Offer Help and Maintain Relationships' - startOffset: 2272 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2272 - endOffset: 2477 -- name: 'Networking Action Plan: Weekly Outreach and Re-engagement' - startOffset: 2477 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2477 - endOffset: 2705 -- name: 'Resume Strategy: Prioritizing Projects, Skills, and Storytelling' - startOffset: 2705 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2705 - endOffset: 2852 -- name: 'Self-Research Methods: Company Analysis and Skill Matching' - startOffset: 2852 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2852 - endOffset: 2958 -- name: 'Strength & Interest Assessments: Gallup, HIGH5, MyNextMove' - startOffset: 2958 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=2958 - endOffset: 3008 -- name: 'Part-Time Work Strategy: Timing, Negotiation, and Flexibility' - startOffset: 3008 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3008 - endOffset: 3210 -- name: 'Age and Career Change: Emphasize Results and Transferable Skills' - startOffset: 3210 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3210 - endOffset: 3358 -- name: 'Applying During Courses: Share Learnings and Build Visibility' - startOffset: 3358 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3358 - endOffset: 3626 -- name: 'Recommended Resources: Tests, Books, Podcasts, MyNextMove' - startOffset: 3626 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3626 - endOffset: 3722 -- name: 'Follow-Up & Support: Links, Slack, and Further Questions' - startOffset: 3722 - url: https://www.youtube.com/watch?v=PchwbIs0tOg&t=3722 - endOffset: 3626 ---- +context: 'Context: A coach-led roadmap for technical career changers (often returning + parents) that covers defining an ideal role, choosing a specialization, validating + skills through projects, targeting companies, crafting resumes/stories, and running + consistent, relationship-driven outreach. + Core theme: Intentionally design a focused, market-aligned career identity and then + convert it into tangible evidence and relationships—using targeted projects, tailored + applications, informational interviews, and a weekly, measurable outreach plan—to + turn validated skills and clear storytelling into job offers.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/sarahmestiri/){:target="_blank"} diff --git a/_podcast/s20e02-competitive-machine-learning-and-teaching.md b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md similarity index 87% rename from _podcast/s20e02-competitive-machine-learning-and-teaching.md rename to _podcast/kaggle-grandmaster-to-production-ml-and-education.md index dde3bc62..810ca2c8 100644 --- a/_podcast/s20e02-competitive-machine-learning-and-teaching.md +++ b/_podcast/kaggle-grandmaster-to-production-ml-and-education.md @@ -1,19 +1,136 @@ --- +title: "From Kaggle Grandmaster to Production ML: Competition Rigor, System Design & Large-Scale Education" +short: "Competitive Machine Learning and Teaching" +season: 20 episode: 2 guests: - alexanderguschin +image: images/podcast/kaggle-grandmaster-to-production-ml-and-education.jpg ids: - anchor: atalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 + anchor: datatalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 youtube: NfAJAr7FvyY&t -image: images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Competitive-Machine-Leaning-And-Teaching--Alexander-Guschin-e2uslu8 apple: https://podcasts.apple.com/us/podcast/competitive-machine-leaning-and-teaching-alexander/id1541710331?i=1000692309866 spotify: https://open.spotify.com/episode/6xsov9a1US8D8w5xKcjkNm youtube: https://www.youtube.com/watch?v=NfAJAr7FvyY&t -season: 20 -short: Competitive Machine Learning and Teaching -title: 'From Kaggle to Production: MLOps, Competition Strategies & Curriculum Design' +description: "Discover Production ML, system design, and competition rigor from a Kaggle Grandmaster—practical deployment tactics, model scaling tips, and education strategies." +topics: +- machine learning +- MLOps +- data science +- open-source +- tools +- teaching +- career transition +intro: "How do you take the rigor and creativity that wins Kaggle competitions and turn it into reliable, maintainable production ML? In this episode we explore that question with Alexander Guschin — a Machine Learning Engineer with 10+ years of experience, a Kaggle Grandmaster ranked 5th globally, a leader of DS and SE teams, contributor to open-source ML tools, and instructor to 100K+ students.

Alexander breaks down the differences between competition modeling and production constraints, practical approaches to system design for machine learning, and lessons for scaling education and teams around technical content. Key topics include competition rigor versus maintainability, production ML and model deployment considerations, designing ML systems at scale, leveraging open-source tooling, and approaches to teaching complex ML concepts to large audiences.

Listeners will come away with actionable perspective on translating research and contest solutions into production-ready pipelines, questions to ask when designing ML systems, and guidance on building reproducible workflows and scalable learning programs. Ideal for machine learning engineers, technical leaders, and educators focused on production ML, MLOps, and large-scale education." +dateadded: 2025-02-26 +duration: PT01H05M09S +quotableClips: +- name: Episode Start + startOffset: 0 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=0 + endOffset: 230 +- name: Guest Introduction & Kaggle Grandmaster Credentials + startOffset: 230 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=230 + endOffset: 388 +- name: Early Industry Roles & Open-Source Contributions + startOffset: 388 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=388 + endOffset: 516 +- name: MLEM Story & Tooling Anecdotes + startOffset: 516 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=516 + endOffset: 701 +- name: Kaggle Beginnings & Local Community Influence + startOffset: 701 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=701 + endOffset: 887 +- name: Balancing Competitions with University Studies + startOffset: 887 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=887 + endOffset: 956 +- name: Time Investment & Learning Curve on Competitions + startOffset: 956 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=956 + endOffset: 1030 +- name: Kaggle for Skill Broadening, Domain Exposure & Interviews + startOffset: 1030 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1030 + endOffset: 1302 +- name: 'Competition Preparation: Iteration, Baselines & Infrastructure' + startOffset: 1302 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1302 + endOffset: 1365 +- name: Applying Competition Experience to Production ML + startOffset: 1365 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1365 + endOffset: 1578 +- name: Regional Differences in Kaggle’s Career Value + startOffset: 1578 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1578 + endOffset: 1748 +- name: 'Collaboration Strategies: Teamwork vs. Solo Competitions' + startOffset: 1748 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1748 + endOffset: 1890 +- name: Teaching Teens & Participation in AI Olympiads + startOffset: 1890 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1890 + endOffset: 2005 +- name: 'Transition to Teaching: From Competitor to Instructor' + startOffset: 2005 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2005 + endOffset: 2277 +- name: 'Practical Curriculum Design: Production ML & MLOps' + startOffset: 2277 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2277 + endOffset: 2470 +- name: Machine Learning System Design Projects & Real-World Work + startOffset: 2470 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2470 + endOffset: 2810 +- name: 'Problem-Centered Assignments: Bot Detection Case Study' + startOffset: 2810 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2810 + endOffset: 3010 +- name: Teamwork, Communication & Dual Leaderboards (ML + Technical) + startOffset: 3010 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3010 + endOffset: 3250 +- name: 'Online Education at Scale: Coursera Course & 100k Students' + startOffset: 3250 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3250 + endOffset: 3382 +- name: Teaching Platform Development & Student-Built Software + startOffset: 3382 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3382 + endOffset: 3455 +- name: Documentation, Mentorship & Industry Partnerships + startOffset: 3455 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3455 + endOffset: 3591 +- name: Demonstrating Kaggle’s Business Value to Managers + startOffset: 3591 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3591 + endOffset: 3708 +- name: 'Competition Essentials: EDA, Validation & No Single Trick' + startOffset: 3708 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3708 + endOffset: 3791 +- name: 'Generative AI & AutoML: Productivity vs. Winning Solutions' + startOffset: 3791 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3791 + endOffset: 3913 +- name: 'Career Reflections: Current Activity and Kaggle Legacy' + startOffset: 3913 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3913 + endOffset: 4134 +- name: Closing Remarks & Episode Wrap-Up + startOffset: 4134 + url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=4134 + endOffset: 3909 transcript: - header: Episode Start - header: Guest Introduction & Kaggle Grandmaster Credentials @@ -608,132 +725,29 @@ transcript: sec: 4139 time: '1:08:59' who: Alexey -description: Master Kaggle strategies, MLOps and curriculum design to convert competition - skills into production ML, scalable courses, teamwork and career boosts. -intro: How do you turn Kaggle competition wins into production-ready machine learning - and effective teaching? In this episode, Alexander Guschin — a machine learning engineer - with 10+ years’ experience, a Kaggle Grandmaster ranked 5th globally, leader of - DS and SE teams, open-source contributor, and instructor to 100K+ students — walks - through that transition. We cover MLOps and tooling anecdotes (including the MLEM - story), practical competition strategies like baselines, iteration and infrastructure, - and how those practices map to production ML. Alexander also discusses preparing - for competitions while studying, regional career differences, solo vs. team collaboration, - and demoing Kaggle’s business value to managers. For educators and program leads, - he outlines curriculum design grounded in machine learning system design projects, - problem-centered assignments (a bot-detection case study), dual leaderboards for - ML and engineering, and scaling online courses—drawing on his Coursera work and - student-built software. Listeners will gain actionable guidance on competition strategy, - MLOps best practices, designing real-world assignments, and how to use competitive - experience to deliver production-grade ML and teach it effectively. -dateadded: '2025-02-26' -duration: PT01H05M09S -quotableClips: -- name: Episode Start - startOffset: 0 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=0 - endOffset: 230 -- name: Guest Introduction & Kaggle Grandmaster Credentials - startOffset: 230 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=230 - endOffset: 388 -- name: Early Industry Roles & Open-Source Contributions - startOffset: 388 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=388 - endOffset: 516 -- name: MLEM Story & Tooling Anecdotes - startOffset: 516 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=516 - endOffset: 701 -- name: Kaggle Beginnings & Local Community Influence - startOffset: 701 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=701 - endOffset: 887 -- name: Balancing Competitions with University Studies - startOffset: 887 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=887 - endOffset: 956 -- name: Time Investment & Learning Curve on Competitions - startOffset: 956 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=956 - endOffset: 1030 -- name: Kaggle for Skill Broadening, Domain Exposure & Interviews - startOffset: 1030 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1030 - endOffset: 1302 -- name: 'Competition Preparation: Iteration, Baselines & Infrastructure' - startOffset: 1302 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1302 - endOffset: 1365 -- name: Applying Competition Experience to Production ML - startOffset: 1365 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1365 - endOffset: 1578 -- name: Regional Differences in Kaggle’s Career Value - startOffset: 1578 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1578 - endOffset: 1748 -- name: 'Collaboration Strategies: Teamwork vs. Solo Competitions' - startOffset: 1748 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1748 - endOffset: 1890 -- name: Teaching Teens & Participation in AI Olympiads - startOffset: 1890 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=1890 - endOffset: 2005 -- name: 'Transition to Teaching: From Competitor to Instructor' - startOffset: 2005 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2005 - endOffset: 2277 -- name: 'Practical Curriculum Design: Production ML & MLOps' - startOffset: 2277 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2277 - endOffset: 2470 -- name: Machine Learning System Design Projects & Real-World Work - startOffset: 2470 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2470 - endOffset: 2810 -- name: 'Problem-Centered Assignments: Bot Detection Case Study' - startOffset: 2810 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=2810 - endOffset: 3010 -- name: Teamwork, Communication & Dual Leaderboards (ML + Technical) - startOffset: 3010 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3010 - endOffset: 3250 -- name: 'Online Education at Scale: Coursera Course & 100k Students' - startOffset: 3250 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3250 - endOffset: 3382 -- name: Teaching Platform Development & Student-Built Software - startOffset: 3382 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3382 - endOffset: 3455 -- name: Documentation, Mentorship & Industry Partnerships - startOffset: 3455 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3455 - endOffset: 3591 -- name: Demonstrating Kaggle’s Business Value to Managers - startOffset: 3591 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3591 - endOffset: 3708 -- name: 'Competition Essentials: EDA, Validation & No Single Trick' - startOffset: 3708 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3708 - endOffset: 3791 -- name: 'Generative AI & AutoML: Productivity vs. Winning Solutions' - startOffset: 3791 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3791 - endOffset: 3913 -- name: 'Career Reflections: Current Activity and Kaggle Legacy' - startOffset: 3913 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=3913 - endOffset: 4134 -- name: Closing Remarks & Episode Wrap-Up - startOffset: 4134 - url: https://www.youtube.com/watch?v=NfAJAr7FvyY&t&t=4134 - endOffset: 3909 ---- +context: 'Context: A Kaggle Grandmaster recounts a career arc from competitive modeling + and open-source tooling (MLEM) through industry roles, curriculum design, large-scale + online teaching, and mentoring. Episodes segments cover how competitions teach iterative + problem-solving, validation, infrastructure and teamwork; how those skills map (and + sometimes must be adapted) to production ML and MLOps; how to design practical coursework + and assessments; and how to show business value and respond to new tools like AutoML + and generative AI. + + Core through-line (single high-level theme): Hands-on, competition-driven practice—grounded + in iteration, rigorous validation, tooling, and community—is the crucible that converts + data-science craft into production-ready systems, scalable education, and demonstrable + career and business impact. + Key themes that support this through-line: - Competitions as accelerated, low-risk + labs for learning baselines, feature engineering, and workflows. - The necessity + of infrastructure, repeatable pipelines, and MLOps to make contest solutions production-ready. + - Teaching and curriculum design that mirror real-world system projects to transfer + practical skills at scale. - Community, mentorship, documentation, and open-source + tooling as force multipliers for learning and adoption. - Communicating business + value and adapting competitive techniques to regional and organizational contexts. + - New productivity tools (AutoML, generative AI) change how work is done but reinforce + the need for sound validation and system design.' +--- Links: * [Linkedin](https://www.linkedin.com/in/1aguschin/){:target="_blank"} diff --git a/_podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md similarity index 95% rename from _podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md rename to _podcast/knowledge-graphs-and-llms-for-automotive-rnd.md index fcb7e20b..1f5aed8f 100644 --- a/_podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.md +++ b/_podcast/knowledge-graphs-and-llms-for-automotive-rnd.md @@ -1,19 +1,120 @@ --- +title: "Using Knowledge Graphs & LLMs for Automotive R&D: RAG, Graph ML & Crash Simulation" +short: "Knowledge Graphs and LLMs Across Academia and Industry" +season: 18 episode: 2 guests: - anahitapakiman +image: images/podcast/knowledge-graphs-and-llms-for-automotive-rnd.jpg ids: - anchor: atatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 + anchor: datatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 youtube: YncdlUscUOo -image: images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Knowledge-Graphs-and-LLMs-Across-Academia-and-Industry---Anahita-Pakiman-e2hpo20 apple: https://podcasts.apple.com/us/podcast/knowledge-graphs-and-llms-across-academia-and/id1541710331?i=1000651561079 spotify: https://open.spotify.com/episode/1yDgx6uNaSQxKTjGU1qtIj?si=g0xQjWmDTRinzxhoYV3sdA youtube: https://www.youtube.com/watch?v=YncdlUscUOo -season: 18 -short: Knowledge Graphs and LLMs Across Academia and Industry -title: 'Using Knowledge Graphs & LLMs for Automotive R&D: RAG, Graph ML & Crash Simulation' +description: "Learn Knowledge Graphs, LLMs & RAG for automotive R&D: optimize crash simulation, apply Graph ML to FEA, reduce hallucination and speed prototyping" +topics: +- LLMs +- knowledge graphs +- graph ML +- retrieval-augmented generation +- embeddings +- vector databases +intro: "How can knowledge graphs and large language models (LLMs) be combined to accelerate automotive R&D — from crash simulation insights to reproducible reports? In this episode Anahita Pakiman, a data scientist-engineer who moved from mechanical engineering and finite element analysis (FEA) into applied AI and now works as Senior Knowledge Graph-Data Scientist Consultant at brox IT-Solutions, walks through practical strategies and tradeoffs.

We cover FEA vs machine learning, optimization and topology in crash simulations, and why teams adopt Neo4j for semantic reporting and load-path detection. Anahita explains graph vs tabular representations, moving from knowledge graphs to computational graphs with NetworkX, and applying Graph Data Science and Graph ML techniques like SimRank. She demonstrates grounding LLMs with retrieval-augmented generation (RAG), contrasts embeddings and vector databases with KG semantics, and shows Cypher-driven prompt templates. The episode also addresses trust, hallucination and verification limits of LLM-extracted knowledge, plus the ADPT-LRN-PHYS project for adaptive paper reading and graph visualization.

Listen to learn concrete approaches for combining knowledge graphs, RAG, graph ML and LLMs to improve crash simulation analysis, semantic reporting, and deployable pipelines for automotive R&D." +dateadded: 2024-04-07 +duration: PT00H59M24S +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=0 + endOffset: 100 +- name: 'Guest Bio: career path from mechanical engineering to applied AI' + startOffset: 100 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=100 + endOffset: 177 +- name: Guest Background & Career Transition + startOffset: 177 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=177 + endOffset: 337 +- name: Applied Mechanics & Finite Element Analysis (FEA) overview + startOffset: 337 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=337 + endOffset: 485 +- name: 'FEA vs Machine Learning: numerical modeling vs data-driven approaches' + startOffset: 485 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=485 + endOffset: 530 +- name: Optimization, Topology & Semantic Reporting in crash simulations + startOffset: 530 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=530 + endOffset: 958 +- name: 'Knowledge Graphs for Automotive R&D: motivation and Neo4j adoption' + startOffset: 958 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=958 + endOffset: 1232 +- name: 'Graph vs Tabular Representations: visualization, clustering, load-path detection' + startOffset: 1232 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1232 + endOffset: 1575 +- name: From Knowledge Graphs to Computational Graphs (NetworkX & graph analytics) + startOffset: 1575 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1575 + endOffset: 1680 +- name: 'Graph Data Science & Graph ML: similarity measures and SimRank' + startOffset: 1680 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1680 + endOffset: 2023 +- name: 'Combining Knowledge Graphs & LLMs: grounding and retrieval-augmented generation + (RAG)' + startOffset: 2023 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2023 + endOffset: 2290 +- name: Text Chunking, Embeddings & Vector Databases vs Knowledge Graph Semantics + startOffset: 2290 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2290 + endOffset: 2396 +- name: Prompt Templates & KG-driven Retrieval (Cypher-based examples) + startOffset: 2396 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2396 + endOffset: 2423 +- name: 'RAG vs Transfer Learning: embeddings, fine-tuning, and distinctions' + startOffset: 2423 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2423 + endOffset: 2562 +- name: Trust, Hallucination & Verification Limits of LLM-extracted Knowledge + startOffset: 2562 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2562 + endOffset: 2653 +- name: 'ADPT-LRN-PHYS Project Overview: LLM + KG for adaptive learning and paper + reading' + startOffset: 2653 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2653 + endOffset: 2830 +- name: 'Paper Parsing & KG Visualization: sections, keywords, PageRank and reference + mapping' + startOffset: 2830 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2830 + endOffset: 3258 +- name: 'Project Challenges: automating graph generation and scoping the demo' + startOffset: 3258 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3258 + endOffset: 3336 +- name: 'Deployment & Frontend Issues: Streamlit limits and state management for graph + UIs' + startOffset: 3336 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3336 + endOffset: 3466 +- name: 'Learning Resources: graph ML courses, Jure Leskovec, Graph Conference and + KG+LLM courses' + startOffset: 3466 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3466 + endOffset: 3641 +- name: Episode Conclusion and Takeaways + startOffset: 3641 + url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3641 + endOffset: 3564 transcript: - header: Episode Introduction - header: 'Guest Bio: career path from mechanical engineering to applied AI' @@ -1037,119 +1138,19 @@ transcript: sec: 3664 time: '1:01:04' who: Alexey -description: Discover how knowledge graphs, LLMs and RAG boost automotive R&D—improve - crash simulation insights, grounded retrieval, graph ML and faster paper parsing. -intro: How can knowledge graphs and large language models (LLMs) be combined to improve - automotive R&D workflows like crash simulation and paper reading? In this episode - Anahita Pakiman—Senior Knowledge Graph-Data Scientist Consultant at brox IT-Solutions—walks - us from her mechanical engineering roots into applied AI, explaining how finite - element analysis (FEA) and optimization intersect with data-driven approaches.

- We cover practical topics including FEA vs. machine learning, topology optimization, - semantic reporting for crash simulations, and the motivation for adopting knowledge - graphs (Neo4j) in automotive R&D. Anahita compares graph and tabular representations, - shows how NetworkX and graph analytics bridge knowledge graphs to computational - graphs, and dives into graph data science techniques like similarity measures and - SimRank. She also explains grounding LLMs with retrieval-augmented generation (RAG), - the trade-offs between embeddings/vector databases and KG semantics, Cypher-driven - retrieval, prompt templates, and limits around trust and hallucination.

- Listeners will get concrete guidance on building KG+LLM systems (including the ADPT-LRN-PHYS - project), parsing papers into graphs, deployment and frontend considerations, and - recommended graph ML learning resources—valuable for engineers and data scientists - working on crash simulation, knowledge graphs, and RAG workflows. -dateadded: '2024-04-07' -duration: PT00H59M24S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=0 - endOffset: 100 -- name: 'Guest Bio: career path from mechanical engineering to applied AI' - startOffset: 100 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=100 - endOffset: 177 -- name: Guest Background & Career Transition - startOffset: 177 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=177 - endOffset: 337 -- name: Applied Mechanics & Finite Element Analysis (FEA) overview - startOffset: 337 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=337 - endOffset: 485 -- name: 'FEA vs Machine Learning: numerical modeling vs data-driven approaches' - startOffset: 485 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=485 - endOffset: 530 -- name: Optimization, Topology & Semantic Reporting in crash simulations - startOffset: 530 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=530 - endOffset: 958 -- name: 'Knowledge Graphs for Automotive R&D: motivation and Neo4j adoption' - startOffset: 958 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=958 - endOffset: 1232 -- name: 'Graph vs Tabular Representations: visualization, clustering, load-path detection' - startOffset: 1232 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1232 - endOffset: 1575 -- name: From Knowledge Graphs to Computational Graphs (NetworkX & graph analytics) - startOffset: 1575 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1575 - endOffset: 1680 -- name: 'Graph Data Science & Graph ML: similarity measures and SimRank' - startOffset: 1680 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=1680 - endOffset: 2023 -- name: 'Combining Knowledge Graphs & LLMs: grounding and retrieval-augmented generation - (RAG)' - startOffset: 2023 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2023 - endOffset: 2290 -- name: Text Chunking, Embeddings & Vector Databases vs Knowledge Graph Semantics - startOffset: 2290 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2290 - endOffset: 2396 -- name: Prompt Templates & KG-driven Retrieval (Cypher-based examples) - startOffset: 2396 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2396 - endOffset: 2423 -- name: 'RAG vs Transfer Learning: embeddings, fine-tuning, and distinctions' - startOffset: 2423 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2423 - endOffset: 2562 -- name: Trust, Hallucination & Verification Limits of LLM-extracted Knowledge - startOffset: 2562 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2562 - endOffset: 2653 -- name: 'ADPT-LRN-PHYS Project Overview: LLM + KG for adaptive learning and paper - reading' - startOffset: 2653 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2653 - endOffset: 2830 -- name: 'Paper Parsing & KG Visualization: sections, keywords, PageRank and reference - mapping' - startOffset: 2830 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=2830 - endOffset: 3258 -- name: 'Project Challenges: automating graph generation and scoping the demo' - startOffset: 3258 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3258 - endOffset: 3336 -- name: 'Deployment & Frontend Issues: Streamlit limits and state management for graph - UIs' - startOffset: 3336 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3336 - endOffset: 3466 -- name: 'Learning Resources: graph ML courses, Jure Leskovec, Graph Conference and - KG+LLM courses' - startOffset: 3466 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3466 - endOffset: 3641 -- name: Episode Conclusion and Takeaways - startOffset: 3641 - url: https://www.youtube.com/watch?v=YncdlUscUOo&t=3641 - endOffset: 3564 ---- +context: 'Context: The episode follows a mechanical-engineer-turned-applied-AI practitioner + exploring how finite element analysis, crash-simulation optimization, and automotive + R&D can be augmented by graph-based representations and modern language models—covering + knowledge graphs, computational/graph analytics, embeddings/RAG, trust and hallucination, + and practical deployment lessons from a project that parses papers and links domain + artifacts. + Core unifying theme: Knowledge graphs serve as the essential bridge between physics-based + engineering models and data-driven AI (graph ML and LLMs), providing a structured, + explainable substrate that grounds retrieval and reasoning, enables graph-native + analytics and optimization workflows, and thereby accelerates trustworthy, automatable + engineering discovery and decision-making.' +--- Links: * [Github repo](https://github.com/antahiap/ADPT-LRN-PHYS/tree/main){:target="_blank"} diff --git a/_podcast/s05e08-the-last-mile-in-data.md b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md similarity index 97% rename from _podcast/s05e08-the-last-mile-in-data.md rename to _podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md index 433ab3d6..9bb636ff 100644 --- a/_podcast/s05e08-the-last-mile-in-data.md +++ b/_podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.md @@ -1,12 +1,11 @@ --- -title: 'Last-Mile Data Delivery for the Modern Data Stack: Build Data Products to - Boost Adoption' -short: Conquering the Last Mile in Data -guests: -- caitlinmoorman -image: images/podcast/s05e08-the-last-mile-in-data.jpg +title: "Last-Mile Data Delivery for the Modern Data Stack: Build Data Products to Boost Adoption" +short: "Conquering the Last Mile in Data" season: 5 episode: 8 +guests: +- caitlinmoorman +image: images/podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg ids: youtube: HfMpG2zpa2I anchor: Conquering-the-Last-Mile-in-Data---Caitlin-Moorman-e1958c1 @@ -15,6 +14,121 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Conquering-the-Last-Mile-in-Data---Caitlin-Moorman-e1958c1 spotify: https://open.spotify.com/episode/6SGjBev8koFDRpDvLV76ZQ apple: https://podcasts.apple.com/us/podcast/conquering-the-last-mile-in-data-caitlin-moorman/id1541710331?i=1000539421886 + +description: "Learn last-mile data delivery, build data products for the modern data stack, boost adoption, embed analytics in decisions, and prove measurable ROI" +intro: "How do you turn a powerful modern data stack into analytics people actually use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove Recommerce and former data lead in crowdfunding and self-publishing, walks through the last-mile data delivery challenges that block adoption and offers practical approaches to build data products that drive decisions.

We define the “last mile” in data delivery and contrast modern data stack capabilities with last-mile execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), treating data as a product, user research to diagnose poor adoption, and simplifying A/B testing reporting for decision-makers. Caitlin outlines a product-design mindset—outcome-first projects, persona-driven abstractions, low-fidelity prototyping, and embedding metrics in meetings—to prove impact and build advocacy. She also covers cultural barriers, measuring hard-to-track work with proxies, scoping narrow slices, recruiting advocates, and using growth marketing as an early use case.

Listen to learn actionable frameworks and experiments you can use to improve data adoption, design usable data products, and measure tangible wins that create momentum in your organization." +topics: +- data analytics +- tools +- product management +- leadership +dateadded: 2021-10-23 + +duration: PT01H01M58S + +quotableClips: +- name: Episode introduction & Locally Optimistic community + startOffset: 0 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=0 + endOffset: 280 +- name: 'Career journey: private equity to modern data stacks' + startOffset: 280 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=280 + endOffset: 528 +- name: Defining the "last mile" in data delivery + startOffset: 528 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=528 + endOffset: 804 +- name: Modern data stack vs last-mile execution challenges + startOffset: 804 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=804 + endOffset: 1005 +- name: 'Pareto thinking for analytics: 80/20 and high-leverage work' + startOffset: 1005 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1005 + endOffset: 1202 +- name: 'Cultural barriers to adoption: incentives and behavior' + startOffset: 1202 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1202 + endOffset: 1453 +- name: 'Trust and usability: discoverability, interpretability, and data quality' + startOffset: 1453 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1453 + endOffset: 1581 +- name: 'Diagnosing poor adoption: treat data as a product and do user research' + startOffset: 1581 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1581 + endOffset: 1722 +- name: 'A/B testing reporting: simplify statistics for decision-makers' + startOffset: 1722 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1722 + endOffset: 1945 +- name: 'Product-design mindset for analytics: abstractions and personas' + startOffset: 1945 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1945 + endOffset: 2040 +- name: 'Outcome-first design: start projects from the decision you want to enable' + startOffset: 2040 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2040 + endOffset: 2295 +- name: 'Embedding data in meetings: mapping metrics to real decisions' + startOffset: 2295 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2295 + endOffset: 2372 +- name: 'Low-fidelity prototyping: sketches, whiteboards, and rapid feedback' + startOffset: 2372 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2372 + endOffset: 2478 +- name: 'Proving impact: creating measurable wins to build advocacy' + startOffset: 2478 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2478 + endOffset: 2538 +- name: 'Measuring hard-to-track work: proxies, time studies, and practical metrics' + startOffset: 2538 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2538 + endOffset: 2735 +- name: Driving change by scoping narrow slices and building momentum + startOffset: 2735 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2735 + endOffset: 2850 +- name: 'Identifying high-leverage questions: start with financials and cost centers' + startOffset: 2850 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2850 + endOffset: 2965 +- name: 'Handling resistance: recruiting advocates and selling upside' + startOffset: 2965 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2965 + endOffset: 3165 +- name: Growth marketing as a starter use case for data-driven change + startOffset: 3165 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3165 + endOffset: 3226 +- name: 'Interviewing domain experts: curiosity, rapport, and job documentation' + startOffset: 3226 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3226 + endOffset: 3335 +- name: 'Building influence: soft skills and recommended reading' + startOffset: 3335 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3335 + endOffset: 3491 +- name: 'Managing uncertainty: linear projects vs circular (exploratory) projects' + startOffset: 3491 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3491 + endOffset: 3690 +- name: 'Advice for aspiring analysts: curiosity, business impact, and on-the-job + learning' + startOffset: 3690 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3690 + endOffset: 3833 +- name: Where to find Caitlin and the Locally Optimistic community + startOffset: 3833 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3833 + endOffset: 3877 +- name: Episode wrap-up and key takeaways + startOffset: 3877 + url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3877 + endOffset: 3718 + transcript: - header: Episode introduction & Locally Optimistic community - line: This week, we'll talk about the “last mile of data” and we have a special @@ -893,7 +1007,7 @@ transcript: you do step two, you can do step three. And then there are circular projects, where you don't know what you don't know. And a lot of data projects fall into this category. -- header: 'Advice for aspiring analysts: curiosity, business impact, and on‑the‑job +- header: 'Advice for aspiring analysts: curiosity, business impact, and on-the-job learning' - line: I'll share more about this, but the very high-level overview is first just to set expectations. Acknowledge ahead of time that it is a circular project. @@ -1022,127 +1136,6 @@ transcript: sec: 3905 time: '1:05:05' who: Caitlin -description: Learn last-mile data delivery, build data products for the modern data - stack, boost adoption, embed analytics in decisions, and prove measurable ROI. -intro: 'How do you turn a powerful modern data stack into analytics people actually - use? In this episode, Caitlin Moorman, VP of Data and Business Operations at Trove - Recommerce and former data lead in crowdfunding and self-publishing, walks through - the last-mile data delivery challenges that block adoption and offers practical - approaches to build data products that drive decisions.

We define the “last - mile” in data delivery and contrast modern data stack capabilities with last‑mile - execution gaps, then dive into concrete tactics: Pareto thinking for analytics (80/20), - treating data as a product, user research to diagnose poor adoption, and simplifying - A/B testing reporting for decision‑makers. Caitlin outlines a product‑design mindset—outcome‑first - projects, persona-driven abstractions, low‑fidelity prototyping, and embedding metrics - in meetings—to prove impact and build advocacy. She also covers cultural barriers, - measuring hard‑to‑track work with proxies, scoping narrow slices, recruiting advocates, - and using growth marketing as an early use case.

Listen to learn actionable - frameworks and experiments you can use to improve data adoption, design usable data - products, and measure tangible wins that create momentum in your organization.' -dateadded: '2021-10-23' -duration: PT01H01M58S -quotableClips: -- name: Episode introduction & Locally Optimistic community - startOffset: 0 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=0 - endOffset: 280 -- name: 'Career journey: private equity to modern data stacks' - startOffset: 280 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=280 - endOffset: 528 -- name: Defining the "last mile" in data delivery - startOffset: 528 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=528 - endOffset: 804 -- name: Modern data stack vs last-mile execution challenges - startOffset: 804 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=804 - endOffset: 1005 -- name: 'Pareto thinking for analytics: 80/20 and high-leverage work' - startOffset: 1005 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1005 - endOffset: 1202 -- name: 'Cultural barriers to adoption: incentives and behavior' - startOffset: 1202 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1202 - endOffset: 1453 -- name: 'Trust and usability: discoverability, interpretability, and data quality' - startOffset: 1453 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1453 - endOffset: 1581 -- name: 'Diagnosing poor adoption: treat data as a product and do user research' - startOffset: 1581 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1581 - endOffset: 1722 -- name: 'A/B testing reporting: simplify statistics for decision-makers' - startOffset: 1722 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1722 - endOffset: 1945 -- name: 'Product-design mindset for analytics: abstractions and personas' - startOffset: 1945 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=1945 - endOffset: 2040 -- name: 'Outcome-first design: start projects from the decision you want to enable' - startOffset: 2040 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2040 - endOffset: 2295 -- name: 'Embedding data in meetings: mapping metrics to real decisions' - startOffset: 2295 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2295 - endOffset: 2372 -- name: 'Low-fidelity prototyping: sketches, whiteboards, and rapid feedback' - startOffset: 2372 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2372 - endOffset: 2478 -- name: 'Proving impact: creating measurable wins to build advocacy' - startOffset: 2478 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2478 - endOffset: 2538 -- name: 'Measuring hard-to-track work: proxies, time studies, and practical metrics' - startOffset: 2538 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2538 - endOffset: 2735 -- name: Driving change by scoping narrow slices and building momentum - startOffset: 2735 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2735 - endOffset: 2850 -- name: 'Identifying high-leverage questions: start with financials and cost centers' - startOffset: 2850 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2850 - endOffset: 2965 -- name: 'Handling resistance: recruiting advocates and selling upside' - startOffset: 2965 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=2965 - endOffset: 3165 -- name: Growth marketing as a starter use case for data-driven change - startOffset: 3165 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3165 - endOffset: 3226 -- name: 'Interviewing domain experts: curiosity, rapport, and job documentation' - startOffset: 3226 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3226 - endOffset: 3335 -- name: 'Building influence: soft skills and recommended reading' - startOffset: 3335 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3335 - endOffset: 3491 -- name: 'Managing uncertainty: linear projects vs circular (exploratory) projects' - startOffset: 3491 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3491 - endOffset: 3690 -- name: 'Advice for aspiring analysts: curiosity, business impact, and on‑the‑job - learning' - startOffset: 3690 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3690 - endOffset: 3833 -- name: Where to find Caitlin and the Locally Optimistic community - startOffset: 3833 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3833 - endOffset: 3877 -- name: Episode wrap-up and key takeaways - startOffset: 3877 - url: https://www.youtube.com/watch?v=HfMpG2zpa2I&t=3877 - endOffset: 3718 --- Links: diff --git a/_podcast/s04e07-launching-a-startup.md b/_podcast/launch-and-build-retail-startup.md similarity index 97% rename from _podcast/s04e07-launching-a-startup.md rename to _podcast/launch-and-build-retail-startup.md index a7fb2081..79debfff 100644 --- a/_podcast/s04e07-launching-a-startup.md +++ b/_podcast/launch-and-build-retail-startup.md @@ -1,11 +1,11 @@ --- -title: Build a Grocery Retail OS to Cut Supermarket Food Waste & Scale Your Startup -short: 'Launching a Startup: From Idea to First Hire' -guests: -- carminepaolino -image: images/podcast/s04e07-launching-a-startup.jpg +title: "Build a Grocery Retail OS to Cut Supermarket Food Waste & Scale Your Startup" +short: "Launching a Startup: From Idea to First Hire" season: 4 episode: 7 +guests: +- carminepaolino +image: images/podcast/launch-and-build-retail-startup.jpg ids: youtube: s-w8_GDgIlU anchor: Launching-a-Startup-From-Idea-to-First-Hire---Carmine-Paolino-e15sk4i @@ -14,6 +14,139 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Launching-a-Startup-From-Idea-to-First-Hire---Carmine-Paolino-e15sk4i spotify: https://open.spotify.com/episode/2zlqwEOamFD8YVGkf4VsFW apple: https://podcasts.apple.com/us/podcast/launching-a-startup-from-idea-to-first-hire-carmine-paolino/id1541710331?i=1000531945076 + +description: "Build a Grocery Retail OS to cut supermarket food waste, master JIT supply-chain forecasting, land pilots & investors, and scale your startup faster" +intro: "How do you build a grocery retail OS that actually cuts supermarket food waste while scaling a startup? In this episode, Carmine Paolino — CTO and co-founder of FreshFlow and former programmer/researcher in academia and data science — walks through translating technical expertise into a product that solves fresh-product challenges for retailers.

We cover FreshFlow’s mission and early problem discovery (including Edeka and Volg pilots), customer discovery techniques like shadowing store teams and The Mom Test, and how their idea evolved from computer vision to an ordering and inventory forecasting platform. Carmine explains Entrepreneur First’s role in co-founder matching and fundraising, pilot timelines and sales cycle realities, and risks around investor selection and board dynamics. He also shares technical lessons (moving off Kubeflow to managed GCP services), hiring priorities, building a product roadmap toward a grocery retail OS, and leveraging just-in-time supply chain and forecasting to reduce food waste.

Listen to learn practical guidance on pilot programs, prototype-before-pitch validation (banana ripeness demo), co-founder formation, and the operational and technical trade-offs when scaling a startup focused on supermarket food waste reduction" +topics: +- startup +- founder +- leadership +- entrepreneurship +- product management +- tools +dateadded: 2021-08-15 + +duration: PT01H07M24S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 106 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=106 + endOffset: 136 +- name: 'Early Career: Programming, Academia, and Data Science' + startOffset: 136 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=136 + endOffset: 306 +- name: 'FreshFlow Overview: CTO Role and Ordering System Mission' + startOffset: 306 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=306 + endOffset: 346 +- name: 'Problem Discovery: Supermarket Fresh-Product Challenges & Edeka Pilot' + startOffset: 346 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=346 + endOffset: 433 +- name: 'Customer Discovery: Shadowing Store Teams and Research Methods (The Mom Test)' + startOffset: 433 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=433 + endOffset: 796 +- name: 'Entrepreneur First Experience: Program Structure and Benefits' + startOffset: 796 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=796 + endOffset: 955 +- name: 'Co-founder Matching: "Edges" Framework for Team Formation' + startOffset: 955 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=955 + endOffset: 1130 +- name: 'Mentorship & Investment Committee: Scoring, Feedback, and EF Phases' + startOffset: 1130 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1130 + endOffset: 1487 +- name: 'Idea Evolution: From Computer Vision App to Ordering Solution' + startOffset: 1487 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1487 + endOffset: 1714 +- name: 'Market Opportunity: Food Waste Impact and Competitive Landscape' + startOffset: 1714 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1714 + endOffset: 1859 +- name: 'Accelerator Value: Networking, Validation, and Founder Support' + startOffset: 1859 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1859 + endOffset: 2004 +- name: 'First Pilots & Clients: Volg and Edeka Engagements' + startOffset: 2004 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2004 + endOffset: 2090 +- name: 'Sales Cycle Realities: Time to First Client and Pilot Timelines' + startOffset: 2090 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2090 + endOffset: 2200 +- name: 'Fundraising Path: EF Investment, Angels, and Demo Day Strategy' + startOffset: 2200 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2200 + endOffset: 2413 +- name: 'Investor Selection Risks: Term Sheets, Board Dynamics, and Fit' + startOffset: 2413 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2413 + endOffset: 2544 +- name: 'Founder Roles: Splitting CTO and CEO Responsibilities' + startOffset: 2544 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2544 + endOffset: 2627 +- name: 'Hiring Strategy: First Hires, Freelancers, Delegation, and Remote Talent' + startOffset: 2627 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2627 + endOffset: 2932 +- name: 'Product Roadmap: Scaling to a Grocery Retail OS and Supply-Chain Expansion' + startOffset: 2932 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2932 + endOffset: 3014 +- name: 'Forecasting & Just-in-Time Supply Chain: Reducing Waste and Inventory' + startOffset: 3014 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3014 + endOffset: 3189 +- name: 'Tech Infrastructure Lesson: Kubeflow Challenges and Choosing Managed Cloud + (GCP)' + startOffset: 3189 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3189 + endOffset: 3351 +- name: 'Startup Advice: Resilience, Focus, and Emotional Intelligence' + startOffset: 3351 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3351 + endOffset: 3429 +- name: 'Hiring Criteria: Prioritizing Motivation and Behavior Over Skills' + startOffset: 3429 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3429 + endOffset: 3554 +- name: 'Sustaining Motivation: Mission-Driven Work on Food Waste and Climate' + startOffset: 3554 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3554 + endOffset: 3665 +- name: 'CTO Readiness: Skill Gaps, Learning, and Using Managed Services' + startOffset: 3665 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3665 + endOffset: 3825 +- name: 'MBA Relevance: Business School Not Required for Early-Stage Startups' + startOffset: 3825 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3825 + endOffset: 3907 +- name: 'Co-founder Imperative: Form Phase Advice and Team Formation Timing' + startOffset: 3907 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3907 + endOffset: 3957 +- name: 'Validating Pre-Existing Ideas: Why EF Helps Even with a Clear Idea' + startOffset: 3957 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3957 + endOffset: 4026 +- name: 'Prototype Before Pitch: Banana Ripeness Demo and Early Technical Traction' + startOffset: 4026 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4026 + endOffset: 4105 +- name: Contact Details & Episode Closing Remarks + startOffset: 4105 + url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4105 + endOffset: 4044 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we'll talk about building a startup as a technical person. And @@ -1053,144 +1186,6 @@ transcript: sec: 4150 time: '1:09:10' who: Alexey -description: Build a Grocery Retail OS to cut supermarket food waste, master JIT supply-chain - forecasting, land pilots & investors, and scale your startup faster. -intro: How do you build a grocery retail OS that actually cuts supermarket food waste - while scaling a startup? In this episode, Carmine Paolino — CTO and co-founder of - FreshFlow and former programmer/researcher in academia and data science — walks - through translating technical expertise into a product that solves fresh-product - challenges for retailers.

We cover FreshFlow’s mission and early problem - discovery (including Edeka and Volg pilots), customer discovery techniques like - shadowing store teams and The Mom Test, and how their idea evolved from computer - vision to an ordering and inventory forecasting platform. Carmine explains Entrepreneur - First’s role in co-founder matching and fundraising, pilot timelines and sales cycle - realities, and risks around investor selection and board dynamics. He also shares - technical lessons (moving off Kubeflow to managed GCP services), hiring priorities, - building a product roadmap toward a grocery retail OS, and leveraging just-in-time - supply chain and forecasting to reduce food waste.

Listen to learn practical - guidance on pilot programs, prototype-before-pitch validation (banana ripeness demo), - co-founder formation, and the operational and technical trade-offs when scaling - a startup focused on supermarket food waste reduction. -dateadded: '2021-08-15' -duration: PT01H07M24S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 106 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=106 - endOffset: 136 -- name: 'Early Career: Programming, Academia, and Data Science' - startOffset: 136 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=136 - endOffset: 306 -- name: 'FreshFlow Overview: CTO Role and Ordering System Mission' - startOffset: 306 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=306 - endOffset: 346 -- name: 'Problem Discovery: Supermarket Fresh-Product Challenges & Edeka Pilot' - startOffset: 346 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=346 - endOffset: 433 -- name: 'Customer Discovery: Shadowing Store Teams and Research Methods (The Mom Test)' - startOffset: 433 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=433 - endOffset: 796 -- name: 'Entrepreneur First Experience: Program Structure and Benefits' - startOffset: 796 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=796 - endOffset: 955 -- name: 'Co-founder Matching: "Edges" Framework for Team Formation' - startOffset: 955 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=955 - endOffset: 1130 -- name: 'Mentorship & Investment Committee: Scoring, Feedback, and EF Phases' - startOffset: 1130 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1130 - endOffset: 1487 -- name: 'Idea Evolution: From Computer Vision App to Ordering Solution' - startOffset: 1487 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1487 - endOffset: 1714 -- name: 'Market Opportunity: Food Waste Impact and Competitive Landscape' - startOffset: 1714 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1714 - endOffset: 1859 -- name: 'Accelerator Value: Networking, Validation, and Founder Support' - startOffset: 1859 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=1859 - endOffset: 2004 -- name: 'First Pilots & Clients: Volg and Edeka Engagements' - startOffset: 2004 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2004 - endOffset: 2090 -- name: 'Sales Cycle Realities: Time to First Client and Pilot Timelines' - startOffset: 2090 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2090 - endOffset: 2200 -- name: 'Fundraising Path: EF Investment, Angels, and Demo Day Strategy' - startOffset: 2200 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2200 - endOffset: 2413 -- name: 'Investor Selection Risks: Term Sheets, Board Dynamics, and Fit' - startOffset: 2413 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2413 - endOffset: 2544 -- name: 'Founder Roles: Splitting CTO and CEO Responsibilities' - startOffset: 2544 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2544 - endOffset: 2627 -- name: 'Hiring Strategy: First Hires, Freelancers, Delegation, and Remote Talent' - startOffset: 2627 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2627 - endOffset: 2932 -- name: 'Product Roadmap: Scaling to a Grocery Retail OS and Supply-Chain Expansion' - startOffset: 2932 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=2932 - endOffset: 3014 -- name: 'Forecasting & Just-in-Time Supply Chain: Reducing Waste and Inventory' - startOffset: 3014 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3014 - endOffset: 3189 -- name: 'Tech Infrastructure Lesson: Kubeflow Challenges and Choosing Managed Cloud - (GCP)' - startOffset: 3189 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3189 - endOffset: 3351 -- name: 'Startup Advice: Resilience, Focus, and Emotional Intelligence' - startOffset: 3351 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3351 - endOffset: 3429 -- name: 'Hiring Criteria: Prioritizing Motivation and Behavior Over Skills' - startOffset: 3429 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3429 - endOffset: 3554 -- name: 'Sustaining Motivation: Mission-Driven Work on Food Waste and Climate' - startOffset: 3554 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3554 - endOffset: 3665 -- name: 'CTO Readiness: Skill Gaps, Learning, and Using Managed Services' - startOffset: 3665 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3665 - endOffset: 3825 -- name: 'MBA Relevance: Business School Not Required for Early-Stage Startups' - startOffset: 3825 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3825 - endOffset: 3907 -- name: 'Co-founder Imperative: Form Phase Advice and Team Formation Timing' - startOffset: 3907 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3907 - endOffset: 3957 -- name: 'Validating Pre-Existing Ideas: Why EF Helps Even with a Clear Idea' - startOffset: 3957 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=3957 - endOffset: 4026 -- name: 'Prototype Before Pitch: Banana Ripeness Demo and Early Technical Traction' - startOffset: 4026 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4026 - endOffset: 4105 -- name: Contact Details & Episode Closing Remarks - startOffset: 4105 - url: https://www.youtube.com/watch?v=s-w8_GDgIlU&t=4105 - endOffset: 4044 --- diff --git a/_podcast/s20e04-mlops-in-corporations-and-startups.md b/_podcast/lean-mlops-for-startups.md similarity index 93% rename from _podcast/s20e04-mlops-in-corporations-and-startups.md rename to _podcast/lean-mlops-for-startups.md index e227ade0..152960c3 100644 --- a/_podcast/s20e04-mlops-in-corporations-and-startups.md +++ b/_podcast/lean-mlops-for-startups.md @@ -1,20 +1,144 @@ --- +title: "Lean MLOps for Startups: SaaS-First MVP Stack, Avoid Vendor Lock-In & Manage Tech Debt" +short: "MLOps in Corporations and Startups" +season: 20 episode: 4 guests: - nemanjaradojkovic +image: images/podcast/lean-mlops-for-startups.jpg ids: - anchor: atalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 + anchor: datatalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 youtube: DX9c__a4jzg -image: images/podcast/s20e04-mlops-in-corporations-and-startups.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/MLOps-in-Corporations-and-Startups---Nemanja-Radojkovic-e304g53 apple: https://podcasts.apple.com/us/podcast/mlops-in-corporations-and-startups-nemanja-radojkovic/id1541710331?i=1000699195928 spotify: https://open.spotify.com/episode/6V8gkTSz7LuPjQYC4rO019 youtube: https://www.youtube.com/watch?v=DX9c__a4jzg -season: 20 -short: MLOps in Corporations and Startups -title: 'Lean MLOps for Startups: SaaS-First MVP Stack, Avoid Vendor Lock-In & Manage - Tech Debt' +description: "Learn lean MLOps for startups: build a SaaS-first MVP stack, avoid vendor lock-in, and manage tech debt to ship faster, cut costs, and scale safely." +topics: +- MLOps +- data engineering +- tools +- production +- career transition +- startups + +intro: "How can a startup implement Lean MLOps that gets models into production quickly without incurring vendor lock-in or crushing tech debt? In this episode Nemanja Radojkovic — an Electrical Engineer turned Data Scientist and MLOps Engineer, former consultant at Big4 and boutique firms, DataCamp course author, and teacher of Python and machine learning — walks through practical strategies for building a SaaS-first MVP stack while preserving future flexibility.

We dig into the core trade-offs of a SaaS-first approach for an MVP, patterns to avoid vendor lock-in, and pragmatic ways to manage accumulating tech debt in machine learning systems. Nemanja draws on hands-on experience across data science, MLOps, and product environments to explain how startups can choose tooling, limit integration risk, and plan safe migration paths as needs change.

Listeners will come away with concrete considerations for designing a lean MLOps stack, assessing SaaS versus self-hosted options, and thinking ahead about maintainability and portability — essential guidance for founders, ML engineers, and product teams building production-ready ML on a startup timeline." +dateadded: 2025-03-15 +duration: PT01H01M06S +quotableClips: +- name: Episode Introduction & Topic Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=0 + endOffset: 135 +- name: 'Career Journey: Academia → Consulting → Finance Machine Learning Engineering' + startOffset: 135 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=135 + endOffset: 363 +- name: 'Startup Pace: Agility, Speed, and Managerial Insights' + startOffset: 363 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=363 + endOffset: 474 +- name: 'Lean MLOps: Shoestring Strategies for Early-Stage Companies' + startOffset: 474 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=474 + endOffset: 714 +- name: 'SaaS-First Approach: Vendor Solutions for Small Teams' + startOffset: 714 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=714 + endOffset: 774 +- name: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock-in' + startOffset: 774 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=774 + endOffset: 906 +- name: 'Cloud Complexity: Infrastructure as Code and Operational Overhead' + startOffset: 906 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=906 + endOffset: 1058 +- name: 'MVP Stack: Prioritizing Tools for Rapid Prototyping and Launch' + startOffset: 1058 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1058 + endOffset: 1159 +- name: 'Portability vs Managed Services: Avoiding Vendor Lock-In (Vertex AI, SageMaker)' + startOffset: 1159 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1159 + endOffset: 1295 +- name: 'Low-Code Trade-offs: Speed vs Future Flexibility' + startOffset: 1295 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1295 + endOffset: 1342 +- name: 'Career Decision Framework: Choosing Startups or Corporations' + startOffset: 1342 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1342 + endOffset: 1650 +- name: 'End-to-End Ownership: Multidisciplinary Work in Startups' + startOffset: 1650 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1650 + endOffset: 1777 +- name: 'Corporate Processes: "Agile" vs Bureaucratic Planning Cycles' + startOffset: 1777 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1777 + endOffset: 1997 +- name: 'Platform & Frameworks: Automating Developer Workflows' + startOffset: 1997 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1997 + endOffset: 2072 +- name: 'Team Scale Advantages: Redundancy, Support, and Internal Mobility' + startOffset: 2072 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2072 + endOffset: 2148 +- name: 'Startup Intensity: Learning Curve, Burnout Risk, and Rewards' + startOffset: 2148 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2148 + endOffset: 2274 +- name: 'AI-Assisted Coding: Productivity Gains and Technical Debt Risks' + startOffset: 2274 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2274 + endOffset: 2401 +- name: 'Technical Debt Management: Notes, Awareness, and Security Implications' + startOffset: 2401 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2401 + endOffset: 2592 +- name: 'Early-Career Advice: Mentorship, Pairing, and Role Selection' + startOffset: 2592 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2592 + endOffset: 2650 +- name: 'Minimal MLOps Stack: Python, CI/CD Orchestration, and Dagster' + startOffset: 2650 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2650 + endOffset: 2755 +- name: 'Observability Choices: Logfire, Prometheus/Grafana, and Streamlit' + startOffset: 2755 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2755 + endOffset: 2891 +- name: 'Product Modularity: Desire for Standalone Model Registries & Observability' + startOffset: 2891 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2891 + endOffset: 2940 +- name: 'Skill Investment: Foundational Tools (Linux, Python, Bash) vs New Tech' + startOffset: 2940 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2940 + endOffset: 3087 +- name: 'Market Signals for Learning: Job Postings, Airflow, and Targeted Skills' + startOffset: 3087 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3087 + endOffset: 3343 +- name: 'Data Engineering Reliability: Quality, Lineage, and LLM Unpredictability' + startOffset: 3343 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3343 + endOffset: 3429 +- name: 'On-Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' + startOffset: 3429 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3429 + endOffset: 3609 +- name: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade-offs' + startOffset: 3609 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3609 + endOffset: 3701 +- name: Closing Remarks and Next Steps + startOffset: 3701 + url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3701 + endOffset: 3666 transcript: - header: Episode Introduction & Topic Overview - line: This week, we’ll talk about MLOps in corporations versus startups. Our special @@ -182,7 +306,7 @@ transcript: sec: 714 time: '11:54' who: Nemanja -- header: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock‑in' +- header: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock-in' - line: Going on-premise is hard for a startup unless it makes a lot of sense. I think it’s a no-brainer for startups to go for the cloud. However, there needs to be a clever decision because migrating from one cloud to another can be slow and @@ -280,7 +404,7 @@ transcript: sec: 1109 time: '18:29' who: Nemanja -- header: 'Portability vs Managed Services: Avoiding Vendor Lock‑In (Vertex AI, SageMaker)' +- header: 'Portability vs Managed Services: Avoiding Vendor Lock-In (Vertex AI, SageMaker)' - line: I tried Kubeflow, and it was a huge pain because of all the YAML files and Kubernetes complexity. Maybe it makes sense in the long run, but at the beginning, you might just need Flask or something simpler. @@ -317,7 +441,7 @@ transcript: sec: 1274 time: '21:14' who: Alexey -- header: 'Low‑Code Trade-offs: Speed vs Future Flexibility' +- header: 'Low-Code Trade-offs: Speed vs Future Flexibility' - line: Yes, that makes you more portable. Some startups might want to start as fast as possible using low-code solutions. If you can only hire a data scientist and not a proper software or systems engineer, you might go with a low-code platform. @@ -414,7 +538,7 @@ transcript: sec: 1631 time: '27:11' who: Nemanja -- header: 'End‑to‑End Ownership: Multidisciplinary Work in Startups' +- header: 'End-to-End Ownership: Multidisciplinary Work in Startups' - line: Startups also pivot frequently. A small, young startup might shift directions completely based on client demands. One client might leave, and another might request something entirely different. This kind of abrupt change keeps things @@ -629,7 +753,7 @@ transcript: sec: 2267 time: '37:47' who: Nemanja -- header: 'AI‑Assisted Coding: Productivity Gains and Technical Debt Risks' +- header: 'AI-Assisted Coding: Productivity Gains and Technical Debt Risks' - line: But you know what I see as a risk now with LLMs and AI-assisted coding? sec: 2274 time: '37:54' @@ -777,7 +901,7 @@ transcript: sec: 2586 time: '43:06' who: Alexey -- header: 'Early‑Career Advice: Mentorship, Pairing, and Role Selection' +- header: 'Early-Career Advice: Mentorship, Pairing, and Role Selection' - line: For juniors, is it better to join a corporation or a more established company? sec: 2592 time: '43:12' @@ -1129,7 +1253,7 @@ transcript: sec: 3412 time: '56:52' who: Nemanja -- header: 'On‑Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' +- header: 'On-Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' - line: 'Right. So maybe let’s take one last question. You mentioned you have experience with on-premise systems. Most corporations you’ve worked with have preferred on-premise over cloud solutions. Luka is asking: Do you think on-premise will be the future @@ -1197,7 +1321,7 @@ transcript: sec: 3604 time: '1:00:04' who: Nemanja -- header: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade‑offs' +- header: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade-offs' - line: Dask is a mature tool, and I know it works in a distributed manner like Spark. However, I haven’t seen it widely used in the industry. Companies usually default to Spark for distributed processing. My limited success with Dask doesn’t mean @@ -1229,141 +1353,18 @@ transcript: sec: 3726 time: '1:02:06' who: Nemanja -description: 'Learn Lean MLOps strategies for startups: build a SaaS-first MVP stack, - avoid vendor lock-in, and manage technical debt for faster, portable ML launches.' -intro: How can an early-stage startup ship ML features fast without getting locked - into cloud vendors or drowning in technical debt? In this episode, Nemanja Radojkovic—an - electrical engineer turned data scientist and MLOps engineer, DataCamp instructor, - and long-time practitioner—walks through pragmatic, lean MLOps strategies for startups. -

We cover shoestring tactics for rapid prototyping, a SaaS‑first MVP stack - and its trade‑offs, cloud credits versus migration friction, and how to avoid vendor - lock‑in with managed services like Vertex AI or SageMaker. Nemanja unpacks priorities - for an MVP stack, low‑code speed versus future flexibility, minimal stacks (Python, - CI/CD orchestration, Dagster), and observability options (Logfire, Prometheus/Grafana, - Streamlit). The conversation also addresses technical debt management, data engineering - reliability, on‑premise vs cloud decisions, and distributed compute choices (Dask, - Spark).

Listen to learn concrete frameworks for choosing tools, balancing - portability and managed services, and practical steps to manage tech debt while - moving quickly. This episode is for startup engineers and founders who need actionable - guidance on lean MLOps, SaaS‑first approaches, vendor lock‑in avoidance, and building - a resilient MVP stack. -dateadded: '2025-03-15' -duration: PT01H01M06S -quotableClips: -- name: Episode Introduction & Topic Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=0 - endOffset: 135 -- name: 'Career Journey: Academia → Consulting → Finance Machine Learning Engineering' - startOffset: 135 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=135 - endOffset: 363 -- name: 'Startup Pace: Agility, Speed, and Managerial Insights' - startOffset: 363 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=363 - endOffset: 474 -- name: 'Lean MLOps: Shoestring Strategies for Early-Stage Companies' - startOffset: 474 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=474 - endOffset: 714 -- name: 'SaaS-First Approach: Vendor Solutions for Small Teams' - startOffset: 714 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=714 - endOffset: 774 -- name: 'Cloud Trade-offs: Startup Credits, Migration Friction, and Lock‑in' - startOffset: 774 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=774 - endOffset: 906 -- name: 'Cloud Complexity: Infrastructure as Code and Operational Overhead' - startOffset: 906 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=906 - endOffset: 1058 -- name: 'MVP Stack: Prioritizing Tools for Rapid Prototyping and Launch' - startOffset: 1058 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1058 - endOffset: 1159 -- name: 'Portability vs Managed Services: Avoiding Vendor Lock‑In (Vertex AI, SageMaker)' - startOffset: 1159 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1159 - endOffset: 1295 -- name: 'Low‑Code Trade-offs: Speed vs Future Flexibility' - startOffset: 1295 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1295 - endOffset: 1342 -- name: 'Career Decision Framework: Choosing Startups or Corporations' - startOffset: 1342 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1342 - endOffset: 1650 -- name: 'End‑to‑End Ownership: Multidisciplinary Work in Startups' - startOffset: 1650 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1650 - endOffset: 1777 -- name: 'Corporate Processes: "Agile" vs Bureaucratic Planning Cycles' - startOffset: 1777 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1777 - endOffset: 1997 -- name: 'Platform & Frameworks: Automating Developer Workflows' - startOffset: 1997 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=1997 - endOffset: 2072 -- name: 'Team Scale Advantages: Redundancy, Support, and Internal Mobility' - startOffset: 2072 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2072 - endOffset: 2148 -- name: 'Startup Intensity: Learning Curve, Burnout Risk, and Rewards' - startOffset: 2148 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2148 - endOffset: 2274 -- name: 'AI‑Assisted Coding: Productivity Gains and Technical Debt Risks' - startOffset: 2274 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2274 - endOffset: 2401 -- name: 'Technical Debt Management: Notes, Awareness, and Security Implications' - startOffset: 2401 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2401 - endOffset: 2592 -- name: 'Early‑Career Advice: Mentorship, Pairing, and Role Selection' - startOffset: 2592 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2592 - endOffset: 2650 -- name: 'Minimal MLOps Stack: Python, CI/CD Orchestration, and Dagster' - startOffset: 2650 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2650 - endOffset: 2755 -- name: 'Observability Choices: Logfire, Prometheus/Grafana, and Streamlit' - startOffset: 2755 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2755 - endOffset: 2891 -- name: 'Product Modularity: Desire for Standalone Model Registries & Observability' - startOffset: 2891 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2891 - endOffset: 2940 -- name: 'Skill Investment: Foundational Tools (Linux, Python, Bash) vs New Tech' - startOffset: 2940 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=2940 - endOffset: 3087 -- name: 'Market Signals for Learning: Job Postings, Airflow, and Targeted Skills' - startOffset: 3087 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3087 - endOffset: 3343 -- name: 'Data Engineering Reliability: Quality, Lineage, and LLM Unpredictability' - startOffset: 3343 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3343 - endOffset: 3429 -- name: 'On‑Premise vs Cloud: Privacy, Cost Efficiency, and Migration Strategy' - startOffset: 3429 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3429 - endOffset: 3609 -- name: 'Distributed Compute Alternatives: Dask, Spark, and Performance Trade‑offs' - startOffset: 3609 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3609 - endOffset: 3701 -- name: Closing Remarks and Next Steps - startOffset: 3701 - url: https://www.youtube.com/watch?v=DX9c__a4jzg&t=3701 - endOffset: 3666 +context: 'The episode’s single unifying idea is pragmatic trade-offs: how to move + fast and deliver value in ML-driven products and careers while deliberately managing + the risks that speed introduces—technical debt, vendor lock-in, operational overhead, + and team burnout. Every segment circles back to the same decision framework: choose + lean, observable, portable primitives and SaaS or managed services pragmatically + to ship quickly; invest in minimal, automatable MLOps and instrumentation so you + can iterate safely; and prioritize foundational skills, mentorship, and ownership + to sustain learning and long-term flexibility. In short, be intentional about early + architectural, tooling, and career choices—opt for simplicity and visibility to + accelerate outcomes today while preserving the ability to evolve, scale, and de-risk + tomorrow.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/radojkovic/){:target="_blank"} diff --git a/_podcast/s13e07-mastering-self-learning-in-machine-learning.md b/_podcast/learning-machine-learning-self-taught-bioinformatics.md similarity index 97% rename from _podcast/s13e07-mastering-self-learning-in-machine-learning.md rename to _podcast/learning-machine-learning-self-taught-bioinformatics.md index ea05baeb..109fef0e 100644 --- a/_podcast/s13e07-mastering-self-learning-in-machine-learning.md +++ b/_podcast/learning-machine-learning-self-taught-bioinformatics.md @@ -1,22 +1,143 @@ --- +title: "How to Teach Yourself Bioinformatics & ML: Project-First Learning, Resources, and MLOps" +short: "Mastering Self-Learning in Machine Learning" +season: 13 episode: 7 guests: - aaishamuhammad +image: images/podcast/learning-machine-learning-self-taught-bioinformatics.jpg ids: anchor: ow/datatalksclub/episodes/Mastering-Self-Learning-in-Machine-Learning---Aaisha-Muhammad-e21ud62 youtube: Kc3Puh3UCRQ -image: images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/Mastering-Self-Learning-in-Machine-Learning---Aaisha-Muhammad-e21ud62 apple: https://podcasts.apple.com/us/podcast/mastering-self-learning-in-machine-learning-aaisha/id1541710331?i=1000607892159 spotify: https://open.spotify.com/episode/2XdKHrmVuytXd5kzLVSbFn?si=ETbkUdT2Q1yJlKCI-d9Rcg youtube: https://www.youtube.com/watch?v=Kc3Puh3UCRQ -season: 13 -short: Mastering Self-Learning in Machine Learning -title: 'How to Teach Yourself Bioinformatics & ML: Project-First Learning, Resources, - and MLOps' + +description: "Learn bioinformatics & machine learning via project-first workflows, dataset-first ideation, study hacks and MLOps deployment tips to gain practical skills" +intro: "How do you teach yourself bioinformatics and machine learning in a way that leads to real projects and deployable models? In this episode, Aaisha Muhammad — a self-taught bioinformatician, machine learning engineer and scientific illustrator from Johannesburg and a Datatalks.Club ML Zoomcamp graduate — walks through a project-first path for learning bioinformatics and ML. We cover prioritization and avoiding FOMO, open curricula like OSSU, skill mapping with ML Zoomcamp, and practical resource evaluation (free vs paid, syllabus skimming, instructor credibility). Aaisha explains dataset-first project ideation, finding datasets and papers via Google Scholar and PubMed, and building capstone projects such as frog toxicity and landscape classifiers. You’ll hear pragmatic study tactics — self-imposed deadlines, note-taking, time tracking, community study groups, and using ChatGPT as a study companion — plus strategies to approach PhD-level papers while avoiding burnout. For engineers interested in production, the conversation addresses deployment and MLOps basics including Docker and Kubernetes. Tune in to gain concrete guidance on projects, vetted resources, and the study habits that make self-directed bioinformatics and ML learning sustainable" +topics: +- bioinformatics +- machine learning +- self-learning +dateadded: 2023-04-08 + +duration: PT00H58M57S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=0 + endOffset: 74 +- name: 'Guest Overview: Aaisha — self-taught bioinformatician, ML engineer, scientific + illustrator' + startOffset: 74 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=74 + endOffset: 137 +- name: 'Early Learning & Homeschooling: Python, web development, and flexible study' + startOffset: 137 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=137 + endOffset: 513 +- name: 'Choosing What to Learn: prioritization, filtering, and avoiding FOMO' + startOffset: 513 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=513 + endOffset: 561 +- name: 'Open Curricula: OSSU pathway for bioinformatics' + startOffset: 561 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=561 + endOffset: 768 +- name: 'Skill Mapping with ML Zoomcamp: building machine learning fundamentals' + startOffset: 768 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=768 + endOffset: 829 +- name: 'Evaluating Resources: syllabus skimming and instructor credibility' + startOffset: 829 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=829 + endOffset: 962 +- name: Free vs Paid Resources and Vetting Paid Courses + startOffset: 962 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=962 + endOffset: 1071 +- name: 'Practical Relevance: identifying industry-useful ML topics (SVM anecdote)' + startOffset: 1071 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1071 + endOffset: 1362 +- name: 'Learning Strategy: balancing theory and project-based practice' + startOffset: 1362 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1362 + endOffset: 1470 +- name: Project Selection & Dataset-First Ideation + startOffset: 1470 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1470 + endOffset: 1555 +- name: 'Research Papers & Dataset Discovery: Google Scholar, PubMed, citation graphs' + startOffset: 1555 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1555 + endOffset: 1718 +- name: 'ML Zoomcamp Experience: why the course appealed and structure' + startOffset: 1718 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1718 + endOffset: 1865 +- name: 'Zoomcamp Projects: frog toxicity capstone and landscape classifier' + startOffset: 1865 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1865 + endOffset: 2156 +- name: 'Bioinformatics Motivation: research interest meeting practical tech' + startOffset: 2156 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2156 + endOffset: 2215 +- name: 'Deadlines & Productivity Tactics: self-imposed deadlines and sticky-note + hacks' + startOffset: 2215 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2215 + endOffset: 2522 +- name: 'Study Habits: note-taking, time tracking, and personal workflow' + startOffset: 2522 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2522 + endOffset: 2630 +- name: 'Drawbacks of Independent Study: discipline risks and curriculum gaps' + startOffset: 2630 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2630 + endOffset: 2740 +- name: 'Community Learning: study groups, Slack, and teaching-to-learn' + startOffset: 2740 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2740 + endOffset: 2885 +- name: 'Deployment & MLOps: Docker, Kubernetes, and deployment discomfort' + startOffset: 2885 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2885 + endOffset: 3062 +- name: ChatGPT as a Study Companion and Pseudo Study Group + startOffset: 3062 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3062 + endOffset: 3114 +- name: 'Advanced Learning: approaching PhD-level topics via papers' + startOffset: 3114 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3114 + endOffset: 3218 +- name: 'Research Access & Publishing Challenges: paywalls and library access' + startOffset: 3218 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3218 + endOffset: 3386 +- name: 'Avoiding Burnout: switching topics and juggling parallel projects' + startOffset: 3386 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3386 + endOffset: 3509 +- name: 'Recommended Resources: Python for Everybody, ML Zoomcamp, further reading' + startOffset: 3509 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3509 + endOffset: 3593 +- name: Closing Remarks and Final Thoughts + startOffset: 3593 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3593 + endOffset: 3611 +- name: Episode Sign-Off + startOffset: 3611 + url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3611 + endOffset: 3537 + transcript: -- header: Podcast Introduction - header: 'Guest Overview: Aaisha — self-taught bioinformatician, ML engineer, scientific illustrator' - line: This week we'll talk about self-studying and continuous learning in machine @@ -1271,137 +1392,6 @@ transcript: sec: 3611 time: '1:00:11' who: Alexey -description: Learn bioinformatics & machine learning via project-first workflows, - dataset-first ideation, study hacks and MLOps deployment tips to gain practical - skills. -intro: How do you teach yourself bioinformatics and machine learning in a way that - leads to real projects and deployable models? In this episode, Aaisha Muhammad — - a self-taught bioinformatician, machine learning engineer and scientific illustrator - from Johannesburg and a Datatalks.Club ML Zoomcamp graduate — walks through a project-first - path for learning bioinformatics and ML. We cover prioritization and avoiding FOMO, - open curricula like OSSU, skill mapping with ML Zoomcamp, and practical resource - evaluation (free vs paid, syllabus skimming, instructor credibility). Aaisha explains - dataset-first project ideation, finding datasets and papers via Google Scholar and - PubMed, and building capstone projects such as frog toxicity and landscape classifiers. - You’ll hear pragmatic study tactics — self-imposed deadlines, note-taking, time - tracking, community study groups, and using ChatGPT as a study companion — plus - strategies to approach PhD-level papers while avoiding burnout. For engineers interested - in production, the conversation addresses deployment and MLOps basics including - Docker and Kubernetes. Tune in to gain concrete guidance on projects, vetted resources, - and the study habits that make self-directed bioinformatics and ML learning sustainable. -dateadded: '2023-04-08' -duration: PT00H58M57S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=0 - endOffset: 74 -- name: 'Guest Overview: Aaisha — self-taught bioinformatician, ML engineer, scientific - illustrator' - startOffset: 74 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=74 - endOffset: 137 -- name: 'Early Learning & Homeschooling: Python, web development, and flexible study' - startOffset: 137 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=137 - endOffset: 513 -- name: 'Choosing What to Learn: prioritization, filtering, and avoiding FOMO' - startOffset: 513 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=513 - endOffset: 561 -- name: 'Open Curricula: OSSU pathway for bioinformatics' - startOffset: 561 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=561 - endOffset: 768 -- name: 'Skill Mapping with ML Zoomcamp: building machine learning fundamentals' - startOffset: 768 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=768 - endOffset: 829 -- name: 'Evaluating Resources: syllabus skimming and instructor credibility' - startOffset: 829 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=829 - endOffset: 962 -- name: Free vs Paid Resources and Vetting Paid Courses - startOffset: 962 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=962 - endOffset: 1071 -- name: 'Practical Relevance: identifying industry-useful ML topics (SVM anecdote)' - startOffset: 1071 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1071 - endOffset: 1362 -- name: 'Learning Strategy: balancing theory and project-based practice' - startOffset: 1362 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1362 - endOffset: 1470 -- name: Project Selection & Dataset-First Ideation - startOffset: 1470 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1470 - endOffset: 1555 -- name: 'Research Papers & Dataset Discovery: Google Scholar, PubMed, citation graphs' - startOffset: 1555 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1555 - endOffset: 1718 -- name: 'ML Zoomcamp Experience: why the course appealed and structure' - startOffset: 1718 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1718 - endOffset: 1865 -- name: 'Zoomcamp Projects: frog toxicity capstone and landscape classifier' - startOffset: 1865 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=1865 - endOffset: 2156 -- name: 'Bioinformatics Motivation: research interest meeting practical tech' - startOffset: 2156 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2156 - endOffset: 2215 -- name: 'Deadlines & Productivity Tactics: self-imposed deadlines and sticky-note - hacks' - startOffset: 2215 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2215 - endOffset: 2522 -- name: 'Study Habits: note-taking, time tracking, and personal workflow' - startOffset: 2522 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2522 - endOffset: 2630 -- name: 'Drawbacks of Independent Study: discipline risks and curriculum gaps' - startOffset: 2630 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2630 - endOffset: 2740 -- name: 'Community Learning: study groups, Slack, and teaching-to-learn' - startOffset: 2740 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2740 - endOffset: 2885 -- name: 'Deployment & MLOps: Docker, Kubernetes, and deployment discomfort' - startOffset: 2885 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=2885 - endOffset: 3062 -- name: ChatGPT as a Study Companion and Pseudo Study Group - startOffset: 3062 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3062 - endOffset: 3114 -- name: 'Advanced Learning: approaching PhD-level topics via papers' - startOffset: 3114 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3114 - endOffset: 3218 -- name: 'Research Access & Publishing Challenges: paywalls and library access' - startOffset: 3218 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3218 - endOffset: 3386 -- name: 'Avoiding Burnout: switching topics and juggling parallel projects' - startOffset: 3386 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3386 - endOffset: 3509 -- name: 'Recommended Resources: Python for Everybody, ML Zoomcamp, further reading' - startOffset: 3509 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3509 - endOffset: 3593 -- name: Closing Remarks and Final Thoughts - startOffset: 3593 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3593 - endOffset: 3611 -- name: Episode Sign-Off - startOffset: 3611 - url: https://www.youtube.com/watch?v=Kc3Puh3UCRQ&t=3611 - endOffset: 3537 --- Links: diff --git a/_podcast/s12e06-preparing-for-data-science-interview.md b/_podcast/machine-learning-data-science-interview-prep.md similarity index 97% rename from _podcast/s12e06-preparing-for-data-science-interview.md rename to _podcast/machine-learning-data-science-interview-prep.md index b928b713..58f683bf 100644 --- a/_podcast/s12e06-preparing-for-data-science-interview.md +++ b/_podcast/machine-learning-data-science-interview-prep.md @@ -1,46 +1,122 @@ --- +title: "Master Machine Learning & Data Science Interviews: Recruiter-Proven Stages, Prep & Resources" +short: "Master Machine Learning & Data Science Interviews" +season: 12 episode: 6 guests: - lukewhipps -date: 2025-11-07 -topics: -- Job search -- Career Growth -- Hiring -- Technical Interviews -- Data Science -- Portfolio Building -- Interview -intro: How do you reliably prepare for ML and data science technical interviews — - from the initial recruiter screen to coding and scenario-based rounds? In this episode - Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast, draws - on 8+ years recruiting data scientists and AI professionals to lay out recruiter-proven - interview stages and practical prep tactics.

Luke walks through the full - interview lifecycle — Stage Zero recruiter screening and role-fit filtering, the - intro interview for relationship building, and the technical rounds that include - binary, scenario, example, and coding components. He explains how to research interviewers, - craft elevator pitches and STAR stories, and align expectations with recruiters - so you prepare to the right depth. You’ll learn how to prioritize fundamentals before - secondary skills, use question-flow strategies to probe deeper, and balance theory - versus practical math in machine learning interviews.

The episode also - covers recovering from failed interviews, targeted internal applications and outreach, - and concrete practice resources like LeetCode, HackerRank, Codeforces, and Educative. - If you’re preparing for ML technical interviews or data science interviews, this - episode gives recruiter-led structure, concrete prep priorities, and resource recommendations - to maximize your chances in each interview stage. +image: images/podcast/machine-learning-data-science-interview-prep.jpg ids: anchor: Preparing-for-a-Data-Science-Interview---Luke-Whipps-e1tsh5d youtube: NnZjlMowkWA -image: images/podcast/s12e06-preparing-for-data-science-interview.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Preparing-for-a-Data-Science-Interview---Luke-Whipps-e1tsh5d apple: https://podcasts.apple.com/us/podcast/preparing-for-a-data-science-interview-luke-whipps/id1541710331?i=1000596975225 spotify: https://open.spotify.com/episode/3JAmnWie8pS58Kok9Sjr2V?si=FDpX4O74Qi2kqzMGumqMpw youtube: https://www.youtube.com/watch?v=NnZjlMowkWA -season: 12 -short: Preparing for a Data Science Interview -title: 'Master ML & Data Science Technical Interviews: Recruiter-Proven Stages, Prep - & Resources' + +intro: "How do you reliably prepare for ML and data science technical interviews — from the initial recruiter screen to coding and scenario-based rounds? In this episode Luke Whipps, co-founder of Neural.AI and host of the AI Game Changer podcast, draws on 8+ years recruiting data scientists and AI professionals to lay out recruiter-proven interview stages and practical prep tactics.

Luke walks through the full interview lifecycle — Stage Zero recruiter screening and role-fit filtering, the intro interview for relationship building, and the technical rounds that include binary, scenario, example, and coding components. He explains how to research interviewers, craft elevator pitches and STAR stories, and align expectations with recruiters so you prepare to the right depth. You’ll learn how to prioritize fundamentals before secondary skills, use question-flow strategies to probe deeper, and balance theory versus practical math in machine learning interviews.

The episode also covers recovering from failed interviews, targeted internal applications and outreach, and concrete practice resources like LeetCode, HackerRank, Codeforces, and Educative. If you’re preparing for ML technical interviews or data science interviews, this episode gives recruiter-led structure, concrete prep priorities, and resource recommendations to maximize your chances in each interview stage" +topics: +- job search +- career growth +- hiring +- data science + +dateadded: 2023-01-28 +date: 2025-11-07 + +duration: PT01H29S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=0 + endOffset: 101 +- name: 'Guest Introduction: Luke Whipps & Neural AI' + startOffset: 101 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=101 + endOffset: 183 +- name: 'Recruitment Career Overview: ML focus, startups, Germany' + startOffset: 183 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=183 + endOffset: 280 +- name: 'Remote Work & Client Geography: UK base serving German market' + startOffset: 280 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=280 + endOffset: 515 +- name: 'Podcast Purpose: AI Game Changers format and goals' + startOffset: 515 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=515 + endOffset: 714 +- name: 'Recruiter Strategy: Embedded talent specialist and candidate coaching' + startOffset: 714 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=714 + endOffset: 932 +- name: 'Market Snapshot: hiring trends, layoffs, and candidate concerns' + startOffset: 932 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=932 + endOffset: 1322 +- name: 'Interview Process Overview: stages, scope, and assumptions' + startOffset: 1322 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1322 + endOffset: 1550 +- name: 'Stage Zero: recruiter screening and role-fit filtering' + startOffset: 1550 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1550 + endOffset: 1686 +- name: 'Intro Interview Prep: objectives, structure, and relationship building' + startOffset: 1686 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1686 + endOffset: 1826 +- name: 'Interviewer Research: personality signals and communication matching' + startOffset: 1826 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1826 + endOffset: 2315 +- name: 'Message Preparation: elevator pitches and STAR storytelling' + startOffset: 2315 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2315 + endOffset: 2495 +- name: 'Technical Interview Components: binary, scenario, example, and coding' + startOffset: 2495 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2495 + endOffset: 2696 +- name: 'Aligning Expectations: clarifying technical depth with recruiters' + startOffset: 2696 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2696 + endOffset: 2890 +- name: 'Prep Prioritization: fundamentals first, then secondary and ideal skills' + startOffset: 2890 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2890 + endOffset: 3060 +- name: 'Question Flow Strategy: follow-ups to probe deeper understanding' + startOffset: 3060 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3060 + endOffset: 3178 +- name: 'Theory vs. Practice: relevance of mathematical and theoretical questions' + startOffset: 3178 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3178 + endOffset: 3317 +- name: 'Recovering from Failure: bombing interviews, feedback, and retakes' + startOffset: 3317 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3317 + endOffset: 3527 +- name: 'Applying Internally: focused applications and direct outreach tactics' + startOffset: 3527 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3527 + endOffset: 3605 +- name: 'Practice Resources: LeetCode, HackerRank, Codeforces, Educative' + startOffset: 3605 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3605 + endOffset: 3703 +- name: 'Supplemental Material: Luke’s interview prep document (show notes)' + startOffset: 3703 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3703 + endOffset: 3717 +- name: Closing Remarks and Episode Wrap-up + startOffset: 3717 + url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3717 + endOffset: 3629 + transcript: - header: 'Guest Introduction: Luke Whipps & Neural AI' - header: 'Guest Introduction: Luke Whipps & Neural AI' @@ -1272,95 +1348,4 @@ transcript: sec: 3730 time: '1:02:10' who: Alexey -dateadded: '2023-01-28' -duration: PT01H29S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=0 - endOffset: 101 -- name: 'Guest Introduction: Luke Whipps & Neural AI' - startOffset: 101 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=101 - endOffset: 183 -- name: 'Recruitment Career Overview: ML focus, startups, Germany' - startOffset: 183 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=183 - endOffset: 280 -- name: 'Remote Work & Client Geography: UK base serving German market' - startOffset: 280 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=280 - endOffset: 515 -- name: 'Podcast Purpose: AI Game Changers format and goals' - startOffset: 515 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=515 - endOffset: 714 -- name: 'Recruiter Strategy: Embedded talent specialist and candidate coaching' - startOffset: 714 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=714 - endOffset: 932 -- name: 'Market Snapshot: hiring trends, layoffs, and candidate concerns' - startOffset: 932 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=932 - endOffset: 1322 -- name: 'Interview Process Overview: stages, scope, and assumptions' - startOffset: 1322 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1322 - endOffset: 1550 -- name: 'Stage Zero: recruiter screening and role-fit filtering' - startOffset: 1550 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1550 - endOffset: 1686 -- name: 'Intro Interview Prep: objectives, structure, and relationship building' - startOffset: 1686 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1686 - endOffset: 1826 -- name: 'Interviewer Research: personality signals and communication matching' - startOffset: 1826 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=1826 - endOffset: 2315 -- name: 'Message Preparation: elevator pitches and STAR storytelling' - startOffset: 2315 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2315 - endOffset: 2495 -- name: 'Technical Interview Components: binary, scenario, example, and coding' - startOffset: 2495 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2495 - endOffset: 2696 -- name: 'Aligning Expectations: clarifying technical depth with recruiters' - startOffset: 2696 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2696 - endOffset: 2890 -- name: 'Prep Prioritization: fundamentals first, then secondary and ideal skills' - startOffset: 2890 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=2890 - endOffset: 3060 -- name: 'Question Flow Strategy: follow-ups to probe deeper understanding' - startOffset: 3060 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3060 - endOffset: 3178 -- name: 'Theory vs. Practice: relevance of mathematical and theoretical questions' - startOffset: 3178 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3178 - endOffset: 3317 -- name: 'Recovering from Failure: bombing interviews, feedback, and retakes' - startOffset: 3317 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3317 - endOffset: 3527 -- name: 'Applying Internally: focused applications and direct outreach tactics' - startOffset: 3527 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3527 - endOffset: 3605 -- name: 'Practice Resources: LeetCode, HackerRank, Codeforces, Educative' - startOffset: 3605 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3605 - endOffset: 3703 -- name: 'Supplemental Material: Luke’s interview prep document (show notes)' - startOffset: 3703 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3703 - endOffset: 3717 -- name: Closing Remarks and Episode Wrap-up - startOffset: 3717 - url: https://www.youtube.com/watch?v=NnZjlMowkWA&t=3717 - endOffset: 3629 --- diff --git a/_podcast/s02e06-decision-optimization.md b/_podcast/machine-learning-decision-optimization.md similarity index 70% rename from _podcast/s02e06-decision-optimization.md rename to _podcast/machine-learning-decision-optimization.md index f0900a4c..1c0ae1e8 100644 --- a/_podcast/s02e06-decision-optimization.md +++ b/_podcast/machine-learning-decision-optimization.md @@ -1,12 +1,11 @@ --- -title: 'Optimize Decisions with ML: Prescriptive & Robust Optimization for Supply - Chain and Pricing' -short: Decision Optimization -guests: -- danbecker -image: images/podcast/s02e06-decision-optimization.jpg +title: "Optimize Decisions with ML: Prescriptive & Robust Optimization for Supply Chain and Pricing" +short: "Decision Optimization" season: 2 episode: 6 +guests: +- danbecker +image: images/podcast/machine-learning-decision-optimization.jpg ids: youtube: SJuzQ4bcU2c anchor: Translating-ML-Predictions-Into-Better-Real-World-Results-with-Decision-Optimization---Dan-Becker-eqk0b1/a-a4maq87 @@ -15,27 +14,15 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Translating-ML-Predictions-Into-Better-Real-World-Results-with-Decision-Optimization---Dan-Becker-eqk0b1/a-a4maq87 spotify: https://open.spotify.com/episode/42eAhI6F31DZ96Mnq2I4bJ apple: https://podcasts.apple.com/us/podcast/translating-ml-predictions-into-better-real-world-results/id1541710331?i=1000509855317 -description: 'Learn prescriptive analytics & robust optimization for supply chain - pricing: align ML predictions to decisions, scale models, pick solvers, and boost - revenue.' -intro: 'How do you turn machine learning predictions into better real-world decisions—especially - under uncertainty in supply chains and pricing? In this episode, Dan Becker, Founder - & CEO of Decision AI and former Google data scientist and Product Director at DataRobot, - walks through prescriptive analytics and decision optimization for practical business - impact. With a background that includes top Kaggle performance and contributions - to TensorFlow and Keras, Dan explains how to formulate optimization problems, choose - objectives and constraints, and integrate ML forecasts into prescriptive and robust - optimization models.

We cover robust vs. stochastic optimization, aligning - loss functions with business objectives, and the solvers and tools that make this - work—OR-Tools, Gurobi, Pyomo and open-source options. Dan also digs into scalability, - approximation techniques, and deployment: pipelines, monitoring, and feedback loops. - Use cases include supply chain optimization, resource allocation, and pricing/bidding - strategies, plus operational, legal, and ethical constraints. Listeners will get - practical guidance on evaluation metrics, common pitfalls like mis-specified objectives - and overfitting decisions, and the cross-functional skills needed—data science, - operations research, and software engineering—to get started with prescriptive optimization - projects.' -dateadded: '2021-02-23' + +description: "Learn prescriptive analytics & robust optimization for supply chain pricing: align ML predictions to decisions, scale models, pick solvers, and boost revenue." +intro: "How do you turn machine learning predictions into better real-world decisions—especially under uncertainty in supply chains and pricing? In this episode, Dan Becker, Founder & CEO of Decision AI and former Google data scientist and Product Director at DataRobot, walks through prescriptive analytics and decision optimization for practical business impact. With a background that includes top Kaggle performance and contributions to TensorFlow and Keras, Dan explains how to formulate optimization problems, choose objectives and constraints, and integrate ML forecasts into prescriptive and robust optimization models.

We cover robust vs. stochastic optimization, aligning loss functions with business objectives, and the solvers and tools that make this work—OR-Tools, Gurobi, Pyomo and open-source options. Dan also digs into scalability, approximation techniques, and deployment: pipelines, monitoring, and feedback loops. Use cases include supply chain optimization, resource allocation, and pricing/bidding strategies, plus operational, legal, and ethical constraints. Listeners will get practical guidance on evaluation metrics, common pitfalls like mis-specified objectives and overfitting decisions, and the cross-functional skills needed—data science, operations research, and software engineering—to get started with prescriptive optimization projects." +topics: +- machine learning +- decision optimization +dateadded: 2021-02-23 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -129,4 +116,5 @@ quotableClips: startOffset: 3720 url: https://www.youtube.com/watch?v=SJuzQ4bcU2c&t=3720 endOffset: 3720 + --- diff --git a/_podcast/s04e05-running-from-complexity.md b/_podcast/machine-learning-engineering-production-best-practices.md similarity index 97% rename from _podcast/s04e05-running-from-complexity.md rename to _podcast/machine-learning-engineering-production-best-practices.md index 0a1abbda..12326afe 100644 --- a/_podcast/s04e05-running-from-complexity.md +++ b/_podcast/machine-learning-engineering-production-best-practices.md @@ -1,12 +1,11 @@ --- -title: 'Practical Machine Learning Engineering for Production: Ship Maintainable Models, - Avoid Complexity' -short: Running from Complexity -guests: -- benwilson -image: images/podcast/s04e05-running-from-complexity.jpg +title: "Practical Machine Learning Engineering for Production: Ship Maintainable Models, Avoid Complexity" +short: "Running from Complexity" season: 4 episode: 5 +guests: +- benwilson +image: images/podcast/machine-learning-engineering-production-best-practices.jpg ids: youtube: sMy8NYZnsy8 anchor: Running-from-Complexity---Ben-Wilson-e14np51 @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Running-from-Complexity---Ben-Wilson-e14np51 spotify: https://open.spotify.com/episode/2TxcU3eF7hjkAEzAJcYMAg apple: https://podcasts.apple.com/us/podcast/running-from-complexity-ben-wilson/id1541710331?i=1000529834651 + +description: "Learn practical ML engineering to ship maintainable machine learning models to production: avoid complexity, use prototypes, explainability, testing." +intro: "Are your ML projects collapsing under their own complexity—or never making it to production at all? In this episode, Ben Wilson, Practice Lead Resident Solutions Architect at Databricks and author of an upcoming Manning book, walks through practical machine learning engineering strategies for shipping maintainable models and avoiding needless complexity. Drawing on 12 years across industries, Ben emphasizes prioritizing maintainability over novelty: refactoring monolithic code into modular, testable components, running timeboxed experiments and bake-offs, and choosing SQL or statistical solutions before jumping to deep learning.

We cover why production failures often stem from lack of business buy-in and “search-driven” complexity, how to involve subject-matter experts and executives to simplify designs, and techniques for explainability that translate model behavior into business terms. Ben also discusses team composition (statistics plus ML engineering skills), agile sprints for feature engineering and testing, the IKEA effect of emotional attachment to complex systems, and pitfalls in reproducing academic papers in production. Listen to learn concrete practices—from experimentation limits to mentoring and deployment tradeoffs—that help you move ideas into production and keep models reliable and maintainable." +topics: +- machine learning +- career growth +- production +dateadded: 2021-07-23 + +duration: PT01H11M41S + +quotableClips: +- name: 'Podcast Introduction: Running from Complexity' + startOffset: 0 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=0 + endOffset: 134 +- name: 'Guest Introduction: Ben Wilson, Databricks and ML engineering focus' + startOffset: 134 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=134 + endOffset: 186 +- name: 'Career Path: Navy nuclear tech to process engineering and data science' + startOffset: 186 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=186 + endOffset: 410 +- name: 'Consulting Approach: Prioritizing maintainability over novelty' + startOffset: 410 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=410 + endOffset: 529 +- name: 'Code Quality: Refactoring "walls of text" into modular, testable code' + startOffset: 529 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=529 + endOffset: 635 +- name: 'Production Failures: Lack of business buy-in and overcomplicated solutions' + startOffset: 635 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=635 + endOffset: 799 +- name: 'Motivation Drivers: Tech hype, "flexing," and engineering pragmatism' + startOffset: 799 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=799 + endOffset: 1097 +- name: 'Avoiding Search-Driven Complexity: Use experts, communities, and Bayesian + methods' + startOffset: 1097 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1097 + endOffset: 1299 +- name: 'Cross-Functional Collaboration: Involving SMEs to simplify solutions' + startOffset: 1299 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1299 + endOffset: 1564 +- name: 'Explainability: Translating models into business terms to build trust' + startOffset: 1564 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1564 + endOffset: 1746 +- name: 'From Idea to Production: Rapid prototypes, selling to SMEs, and executive + sponsorship' + startOffset: 1746 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1746 + endOffset: 1923 +- name: 'Experimentation Process: Timeboxed bake-offs and cost–benefit tradeoffs' + startOffset: 1923 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1923 + endOffset: 2173 +- name: 'The IKEA Effect: Emotional attachment to complex, hard-to-maintain systems' + startOffset: 2173 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2173 + endOffset: 2357 +- name: 'Novel Algorithm Risks: Transfer learning vs building white-paper solutions' + startOffset: 2357 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2357 + endOffset: 2663 +- name: 'Prefer Simplicity First: Solve with SQL or stats before deep learning' + startOffset: 2663 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2663 + endOffset: 2782 +- name: 'Paper Pitfalls: Reproducibility, environment assumptions, and cloud cost' + startOffset: 2782 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2782 + endOffset: 2994 +- name: 'Team Composition: Importance of statistics expertise and coding/ML engineering + skills' + startOffset: 2994 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2994 + endOffset: 3134 +- name: 'Agile for ML: Iterative sprints, MVPs, feature engineering, and testing' + startOffset: 3134 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3134 + endOffset: 3341 +- name: 'Timeboxing Research: Limit experiments to avoid sunk-costs' + startOffset: 3341 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3341 + endOffset: 3458 +- name: 'Mentoring & Training: Databricks programs and production ML capstones' + startOffset: 3458 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3458 + endOffset: 3553 +- name: 'Book Summary: Machine Learning Engineering in Action — process, automation, + testing' + startOffset: 3553 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3553 + endOffset: 3747 +- name: 'AI-First Tradeoffs: Talent needs, retention, and budget realities' + startOffset: 3747 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3747 + endOffset: 3874 +- name: 'Manager Enablement: Tech leads translating ML for nontechnical managers' + startOffset: 3874 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3874 + endOffset: 4078 +- name: 'Career Path Advice: Core fundamentals, specialization timeline, and leadership' + startOffset: 4078 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4078 + endOffset: 4371 +- name: 'Contact & Resources: LinkedIn, podcast appearances, and early-access book' + startOffset: 4371 + url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4371 + endOffset: 4301 + transcript: - header: 'Podcast Introduction: Running from Complexity' - header: 'Guest Introduction: Ben Wilson, Databricks and ML engineering focus' @@ -608,7 +724,7 @@ transcript: sec: 2353 time: '39:13' who: Ben -- header: 'Novel Algorithm Risks: Transfer learning vs building white‑paper solutions' +- header: 'Novel Algorithm Risks: Transfer learning vs building white-paper solutions' - line: Let's say we have something more complex – maybe a novel algorithm – that we want to try. We heard that right now deep learning is very popular, so we want to try it for our problem. Should we do this? Is this necessary? What kind of @@ -980,7 +1096,7 @@ transcript: sec: 3745 time: '1:02:25' who: Ben -- header: 'AI‑First Tradeoffs: Talent needs, retention, and budget realities' +- header: 'AI-First Tradeoffs: Talent needs, retention, and budget realities' - line: One question from Akshat. “It makes sense to solve problems with uncool techniques. But there are companies who are AI First – they want to show off and say that they have AI capabilities. So what about them?” @@ -1151,7 +1267,7 @@ transcript: sec: 4310 time: '1:11:50' who: Ben -- header: 'Contact & Resources: LinkedIn, podcast appearances, and early‑access book' +- header: 'Contact & Resources: LinkedIn, podcast appearances, and early-access book' - line: Okay, thanks. So, let's finish. How can people find you? sec: 4371 time: '1:12:51' @@ -1184,131 +1300,6 @@ transcript: sec: 4435 time: '1:13:55' who: Ben -description: 'Learn practical ML engineering to ship maintainable machine learning - models to production: avoid complexity, use prototypes, explainability, testing.' -intro: 'Are your ML projects collapsing under their own complexity—or never making - it to production at all? In this episode, Ben Wilson, Practice Lead Resident Solutions - Architect at Databricks and author of an upcoming Manning book, walks through practical - machine learning engineering strategies for shipping maintainable models and avoiding - needless complexity. Drawing on 12 years across industries, Ben emphasizes prioritizing - maintainability over novelty: refactoring monolithic code into modular, testable - components, running timeboxed experiments and bake-offs, and choosing SQL or statistical - solutions before jumping to deep learning.

We cover why production failures - often stem from lack of business buy-in and “search-driven” complexity, how to involve - subject-matter experts and executives to simplify designs, and techniques for explainability - that translate model behavior into business terms. Ben also discusses team composition - (statistics plus ML engineering skills), agile sprints for feature engineering and - testing, the IKEA effect of emotional attachment to complex systems, and pitfalls - in reproducing academic papers in production. Listen to learn concrete practices—from - experimentation limits to mentoring and deployment tradeoffs—that help you move - ideas into production and keep models reliable and maintainable.' -dateadded: '2021-07-23' -duration: PT01H11M41S -quotableClips: -- name: 'Podcast Introduction: Running from Complexity' - startOffset: 0 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=0 - endOffset: 134 -- name: 'Guest Introduction: Ben Wilson, Databricks and ML engineering focus' - startOffset: 134 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=134 - endOffset: 186 -- name: 'Career Path: Navy nuclear tech to process engineering and data science' - startOffset: 186 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=186 - endOffset: 410 -- name: 'Consulting Approach: Prioritizing maintainability over novelty' - startOffset: 410 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=410 - endOffset: 529 -- name: 'Code Quality: Refactoring "walls of text" into modular, testable code' - startOffset: 529 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=529 - endOffset: 635 -- name: 'Production Failures: Lack of business buy-in and overcomplicated solutions' - startOffset: 635 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=635 - endOffset: 799 -- name: 'Motivation Drivers: Tech hype, "flexing," and engineering pragmatism' - startOffset: 799 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=799 - endOffset: 1097 -- name: 'Avoiding Search-Driven Complexity: Use experts, communities, and Bayesian - methods' - startOffset: 1097 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1097 - endOffset: 1299 -- name: 'Cross-Functional Collaboration: Involving SMEs to simplify solutions' - startOffset: 1299 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1299 - endOffset: 1564 -- name: 'Explainability: Translating models into business terms to build trust' - startOffset: 1564 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1564 - endOffset: 1746 -- name: 'From Idea to Production: Rapid prototypes, selling to SMEs, and executive - sponsorship' - startOffset: 1746 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1746 - endOffset: 1923 -- name: 'Experimentation Process: Timeboxed bake-offs and cost–benefit tradeoffs' - startOffset: 1923 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=1923 - endOffset: 2173 -- name: 'The IKEA Effect: Emotional attachment to complex, hard-to-maintain systems' - startOffset: 2173 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2173 - endOffset: 2357 -- name: 'Novel Algorithm Risks: Transfer learning vs building white‑paper solutions' - startOffset: 2357 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2357 - endOffset: 2663 -- name: 'Prefer Simplicity First: Solve with SQL or stats before deep learning' - startOffset: 2663 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2663 - endOffset: 2782 -- name: 'Paper Pitfalls: Reproducibility, environment assumptions, and cloud cost' - startOffset: 2782 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2782 - endOffset: 2994 -- name: 'Team Composition: Importance of statistics expertise and coding/ML engineering - skills' - startOffset: 2994 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=2994 - endOffset: 3134 -- name: 'Agile for ML: Iterative sprints, MVPs, feature engineering, and testing' - startOffset: 3134 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3134 - endOffset: 3341 -- name: 'Timeboxing Research: Limit experiments to avoid sunk-costs' - startOffset: 3341 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3341 - endOffset: 3458 -- name: 'Mentoring & Training: Databricks programs and production ML capstones' - startOffset: 3458 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3458 - endOffset: 3553 -- name: 'Book Summary: Machine Learning Engineering in Action — process, automation, - testing' - startOffset: 3553 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3553 - endOffset: 3747 -- name: 'AI‑First Tradeoffs: Talent needs, retention, and budget realities' - startOffset: 3747 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3747 - endOffset: 3874 -- name: 'Manager Enablement: Tech leads translating ML for nontechnical managers' - startOffset: 3874 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=3874 - endOffset: 4078 -- name: 'Career Path Advice: Core fundamentals, specialization timeline, and leadership' - startOffset: 4078 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4078 - endOffset: 4371 -- name: 'Contact & Resources: LinkedIn, podcast appearances, and early‑access book' - startOffset: 4371 - url: https://www.youtube.com/watch?v=sMy8NYZnsy8&t=4371 - endOffset: 4301 --- Links: diff --git a/_podcast/s09e02-using-data-for-asteroid-mining.md b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md similarity index 97% rename from _podcast/s09e02-using-data-for-asteroid-mining.md rename to _podcast/machine-learning-for-asteroid-mining-and-water-detection.md index e106b24f..02c968d7 100644 --- a/_podcast/s09e02-using-data-for-asteroid-mining.md +++ b/_podcast/machine-learning-for-asteroid-mining-and-water-detection.md @@ -1,20 +1,123 @@ --- +title: "Asteroid Mining: Using ML & Hyperspectral Spectroscopy to Detect Water for ISRU" +short: "Using Data for Asteroid Mining" +season: 9 episode: 2 guests: - daynancrull +image: images/podcast/machine-learning-for-asteroid-mining-and-water-detection.jpg ids: anchor: Using-Data-for-Asteroid-Mining---Daynan-Crull-e1jbhr0 youtube: YxijEUoDCfw -image: images/podcast/s09e02-using-data-for-asteroid-mining.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Using-Data-for-Asteroid-Mining---Daynan-Crull-e1jbhr0 apple: https://podcasts.apple.com/us/podcast/machine-learning-in-marketing-juan-orduz/id1541710331?i=1000564219176 spotify: https://open.spotify.com/episode/7wjKCbCsD4ytuNrE8JrH2B?si=1WPAtw6PSZGVib0qSsoLvA youtube: https://www.youtube.com/watch?v=YxijEUoDCfw -season: 9 -short: Using Data for Asteroid Mining -title: 'Asteroid Mining: Using ML & Hyperspectral Spectroscopy to Detect Water for - ISRU' + +description: "Discover asteroid mining: machine learning & hyperspectral spectroscopy to detect water for ISRU—learn detection methods, datasets, mission design & tools." +intro: "How can we reliably detect water on near-Earth asteroids using machine learning and hyperspectral spectroscopy to enable in-situ resource utilization (ISRU)? In this episode Daynan Crull—co-founder of Karman+ and lead of its science and technology effort—walks through the science and engineering needed to find and characterize asteroid water for space missions. Drawing on his background in remote sensing and ML, Daynan explains hyperspectral infrared signatures for water detection, spectral classification approaches, and the limits of ground truth from returned samples and meteorites. Along the way we cover relevant astronomical data types (images, hyperspectral bands, time series), asteroid features like photometry and rotation, observability challenges, and ML tasks from signal processing to orbit linking and synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), sampling and extraction concepts, economic use cases for water-as-fuel, and the cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, machine learning for water detection, and the datasets and infrastructure to get involved in ISRU research and missions" +topics: +- machine learning +- astronomy +dateadded: 2022-06-04 + +duration: PT01H16S + +quotableClips: +- name: Podcast Introduction + startOffset: 83 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=83 + endOffset: 111 +- name: 'Career & Data Science Pivot: From Astronomy to Asteroid Mining' + startOffset: 111 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=111 + endOffset: 292 +- name: 'Cosmology vs. Astronomy: Timescales, Theory & Observation' + startOffset: 292 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=292 + endOffset: 363 +- name: 'Machine Learning in Astronomy: Tasks, Signal Processing & Scaling' + startOffset: 363 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=363 + endOffset: 440 +- name: 'Gravitational Wave Detection: Signal, Noise & Instrument Glitches' + startOffset: 440 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=440 + endOffset: 765 +- name: 'Astronomical Data Types: Images, Hyperspectral Bands & Time Series' + startOffset: 765 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=765 + endOffset: 864 +- name: 'Hyperspectral Spectroscopy: Infrared Signatures & Water Detection' + startOffset: 864 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=864 + endOffset: 1004 +- name: 'Asteroid Features: Photometry, Light Curves, Rotation & Polarimetry' + startOffset: 1004 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1004 + endOffset: 1175 +- name: Spectral Classification & ML Approaches for Water Identification + startOffset: 1175 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1175 + endOffset: 1320 +- name: 'Ground Truth Limitations: Returned Samples, Meteorites & Validation' + startOffset: 1320 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1320 + endOffset: 1542 +- name: 'ISRU & Water-as-Fuel: Economics and Use Cases for Space Resources' + startOffset: 1542 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1542 + endOffset: 1818 +- name: 'Other Resources on Asteroids: Metals, Organics & Scientific Value' + startOffset: 1818 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1818 + endOffset: 1932 +- name: 'Asteroid Origins: Main Belt, Resonances & Near-Earth Populations' + startOffset: 1932 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1932 + endOffset: 2148 +- name: 'Observability Challenges: Angles, Dawn/Dusk Windows & Detection Biases' + startOffset: 2148 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2148 + endOffset: 2293 +- name: 'Data Organization: Team Roles, Data Engineering & Bayesian Engines' + startOffset: 2293 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2293 + endOffset: 2543 +- name: 'Cloud & Infrastructure: Storage, COGs/STAC and Querying Large Imagery' + startOffset: 2543 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2543 + endOffset: 2726 +- name: 'Open Datasets & APIs: Minor Planet Center, JPL Horizons, NEOWISE' + startOffset: 2726 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2726 + endOffset: 2956 +- name: 'Orbit Linking & Synthetic Tracking: ML for Large-Scale Detection' + startOffset: 2956 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2956 + endOffset: 3054 +- name: 'Mission Architecture: CubeSats, COTS Components & Partnership Strategy' + startOffset: 3054 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3054 + endOffset: 3202 +- name: 'Sampling & Extraction Methods: Scooping, Surface Interaction & R&D' + startOffset: 3202 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3202 + endOffset: 3436 +- name: 'Mathematical Models: Bayesian Frameworks, Thermal Models & Yarkovsky' + startOffset: 3436 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3436 + endOffset: 3611 +- name: 'Tools & Workflows: Notebooks, Reproducibility & Research Practices' + startOffset: 3611 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3611 + endOffset: 3668 +- name: 'Get Involved: Job Openings, Links, Contact & Further Resources' + startOffset: 3668 + url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3668 + endOffset: 3616 + transcript: - header: Podcast Introduction - line: This week, we'll talk about extracting space resources from asteroids. We @@ -1119,119 +1222,6 @@ transcript: sec: 3699 time: '1:01:39' who: Alexey -description: 'Discover asteroid mining: machine learning & hyperspectral spectroscopy - to detect water for ISRU—learn detection methods, datasets, mission design & tools.' -intro: How can we reliably detect water on near‑Earth asteroids using machine learning - and hyperspectral spectroscopy to enable in‑situ resource utilization (ISRU)? In - this episode Daynan Crull—co‑founder of Karman+ and lead of its science and technology - effort—walks through the science and engineering needed to find and characterize - asteroid water for space missions. Drawing on his background in remote sensing and - ML, Daynan explains hyperspectral infrared signatures for water detection, spectral - classification approaches, and the limits of ground truth from returned samples - and meteorites. Along the way we cover relevant astronomical data types (images, - hyperspectral bands, time series), asteroid features like photometry and rotation, - observability challenges, and ML tasks from signal processing to orbit linking and - synthetic tracking. Daynan also discusses mission architecture (CubeSats, COTS), - sampling and extraction concepts, economic use cases for water‑as‑fuel, and the - cloud, datasets, and tools (MPC, JPL Horizons, NEOWISE) that support scalable workflows. - Listen to gain practical insight into asteroid mining, hyperspectral spectroscopy, - machine learning for water detection, and the datasets and infrastructure to get - involved in ISRU research and missions. -dateadded: '2022-06-04' -duration: PT01H16S -quotableClips: -- name: Podcast Introduction - startOffset: 83 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=83 - endOffset: 111 -- name: 'Career & Data Science Pivot: From Astronomy to Asteroid Mining' - startOffset: 111 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=111 - endOffset: 292 -- name: 'Cosmology vs. Astronomy: Timescales, Theory & Observation' - startOffset: 292 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=292 - endOffset: 363 -- name: 'Machine Learning in Astronomy: Tasks, Signal Processing & Scaling' - startOffset: 363 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=363 - endOffset: 440 -- name: 'Gravitational Wave Detection: Signal, Noise & Instrument Glitches' - startOffset: 440 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=440 - endOffset: 765 -- name: 'Astronomical Data Types: Images, Hyperspectral Bands & Time Series' - startOffset: 765 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=765 - endOffset: 864 -- name: 'Hyperspectral Spectroscopy: Infrared Signatures & Water Detection' - startOffset: 864 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=864 - endOffset: 1004 -- name: 'Asteroid Features: Photometry, Light Curves, Rotation & Polarimetry' - startOffset: 1004 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1004 - endOffset: 1175 -- name: Spectral Classification & ML Approaches for Water Identification - startOffset: 1175 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1175 - endOffset: 1320 -- name: 'Ground Truth Limitations: Returned Samples, Meteorites & Validation' - startOffset: 1320 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1320 - endOffset: 1542 -- name: 'ISRU & Water-as-Fuel: Economics and Use Cases for Space Resources' - startOffset: 1542 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1542 - endOffset: 1818 -- name: 'Other Resources on Asteroids: Metals, Organics & Scientific Value' - startOffset: 1818 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1818 - endOffset: 1932 -- name: 'Asteroid Origins: Main Belt, Resonances & Near-Earth Populations' - startOffset: 1932 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=1932 - endOffset: 2148 -- name: 'Observability Challenges: Angles, Dawn/Dusk Windows & Detection Biases' - startOffset: 2148 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2148 - endOffset: 2293 -- name: 'Data Organization: Team Roles, Data Engineering & Bayesian Engines' - startOffset: 2293 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2293 - endOffset: 2543 -- name: 'Cloud & Infrastructure: Storage, COGs/STAC and Querying Large Imagery' - startOffset: 2543 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2543 - endOffset: 2726 -- name: 'Open Datasets & APIs: Minor Planet Center, JPL Horizons, NEOWISE' - startOffset: 2726 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2726 - endOffset: 2956 -- name: 'Orbit Linking & Synthetic Tracking: ML for Large-Scale Detection' - startOffset: 2956 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=2956 - endOffset: 3054 -- name: 'Mission Architecture: CubeSats, COTS Components & Partnership Strategy' - startOffset: 3054 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3054 - endOffset: 3202 -- name: 'Sampling & Extraction Methods: Scooping, Surface Interaction & R&D' - startOffset: 3202 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3202 - endOffset: 3436 -- name: 'Mathematical Models: Bayesian Frameworks, Thermal Models & Yarkovsky' - startOffset: 3436 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3436 - endOffset: 3611 -- name: 'Tools & Workflows: Notebooks, Reproducibility & Research Practices' - startOffset: 3611 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3611 - endOffset: 3668 -- name: 'Get Involved: Job Openings, Links, Contact & Further Resources' - startOffset: 3668 - url: https://www.youtube.com/watch?v=YxijEUoDCfw&t=3668 - endOffset: 3616 --- Links: diff --git a/_podcast/s09e01-machine-learning-in-marketing.md b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md similarity index 97% rename from _podcast/s09e01-machine-learning-in-marketing.md rename to _podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md index c281c72f..875c2b1f 100644 --- a/_podcast/s09e01-machine-learning-in-marketing.md +++ b/_podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.md @@ -1,20 +1,140 @@ --- +title: "Marketing Data Science: Attribution, Media Mix Modeling, Uplift & Cookieless Tracking" +short: "Machine Learning in Marketing" +season: 9 episode: 1 guests: - juanorduz +image: images/podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg ids: anchor: Machine-Learning-in-Marketing---Juan-Orduz-e1j1muj youtube: jsAxUd_bZpw -image: images/podcast/s09e01-machine-learning-in-marketing.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Machine-Learning-in-Marketing---Juan-Orduz-e1j1muj apple: https://podcasts.apple.com/us/podcast/machine-learning-in-marketing-juan-orduz/id1541710331?i=1000564219176 spotify: https://open.spotify.com/episode/0rc8zZjdxr5ncxqH9RDqBV?si=49feb89374554f65 youtube: https://www.youtube.com/watch?v=jsAxUd_bZpw -season: 9 -short: Machine Learning in Marketing -title: 'Marketing Data Science: Attribution, Media Mix Modeling, Uplift & Cookieless - Tracking' + +description: "Learn attribution, media mix modeling & cookieless tracking to measure uplift, TV/offline impact and automate MMM for faster acquisition & retention" +intro: "How can marketing teams reliably measure ad impact, allocate budget across channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin-based mathematician and data scientist specializing in statistical learning, time series, Bayesian and geometric methods — walks through practical marketing data science approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless tracking.

We cover attribution basics and multi-channel ambiguity, MMM techniques including regression, ad-stock and saturation, and campaign uplift estimation using time-series counterfactuals. Juan explains measuring TV and offline channels, the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention and purchase-frequency modeling. You’ll also hear about uplift A/B testing design, modeling benchmarks (start simple), MMM retraining cadence, learning decay rates with Bayesian regression, and building a marketing data function with the right data integrations and cross-functional collaboration.

If you want actionable guidance on attribution models, media mix optimization, privacy-aware tracking, and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, common pitfalls, and learning resources to help practitioners improve measurement and decision-making" +topics: +- marketing +- machine learning +dateadded: 2022-05-28 + +duration: PT00H59M31S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=0 + endOffset: 102 +- name: 'Introduction: Juan Orduz — mathematician and data scientist' + startOffset: 102 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=102 + endOffset: 167 +- name: 'Career Path: From geometric analysis to industry data science' + startOffset: 167 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=167 + endOffset: 309 +- name: Geometric Analysis Overview & connections to Bayesian sampling + startOffset: 309 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=309 + endOffset: 451 +- name: 'Machine Learning in Marketing: Key use cases (acquisition, retention, NLP)' + startOffset: 451 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=451 + endOffset: 618 +- name: 'Attribution Basics: Multi-channel user journeys and ambiguity' + startOffset: 618 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=618 + endOffset: 816 +- name: 'Media Mix Modeling: Regression, saturation and ad-stock transformations' + startOffset: 816 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=816 + endOffset: 898 +- name: 'Campaign Uplift Estimation: Time series counterfactuals and ad impact' + startOffset: 898 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=898 + endOffset: 1188 +- name: 'Measuring TV & Offline Channels: Aggregated impressions and time granularity' + startOffset: 1188 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1188 + endOffset: 1249 +- name: 'Privacy Changes and Cookieless Tracking: Impact of iOS 14.5' + startOffset: 1249 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1249 + endOffset: 1384 +- name: 'Retention Modeling: Contractual vs non-contractual churn strategies' + startOffset: 1384 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1384 + endOffset: 1537 +- name: 'Purchase Frequency Modeling: Detecting unusual inactivity patterns' + startOffset: 1537 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1537 + endOffset: 1753 +- name: 'Uplift Modeling: Targeted interventions versus churn prediction' + startOffset: 1753 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1753 + endOffset: 1854 +- name: 'A/B Testing for Uplift: Control/treatment design and data pitfalls' + startOffset: 1854 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1854 + endOffset: 2124 +- name: 'Modeling Benchmarks: Start simple with baselines before complex ML' + startOffset: 2124 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2124 + endOffset: 2225 +- name: 'MMM Retraining Cadence: Monthly updates and automation considerations' + startOffset: 2225 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2225 + endOffset: 2302 +- name: 'Attribution Baselines: Uniform allocation and look-alike approaches' + startOffset: 2302 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2302 + endOffset: 2381 +- name: 'Learning Decay Rates: Estimating channel decay with Bayesian regression' + startOffset: 2381 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2381 + endOffset: 2446 +- name: 'Learning Resources: Books, courses, talks and Juan’s blog' + startOffset: 2446 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2446 + endOffset: 2526 +- name: 'Bayesian vs Frequentist: When to use priors and hierarchical models' + startOffset: 2526 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2526 + endOffset: 2886 +- name: 'Building a Marketing Data Function: Data integrations and infrastructure + first' + startOffset: 2886 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2886 + endOffset: 3050 +- name: 'Cross-functional Collaboration: Analysts, engineers and marketing stakeholders' + startOffset: 3050 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3050 + endOffset: 3217 +- name: 'KPI Definition: Short-term vs long-term conversion objectives' + startOffset: 3217 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3217 + endOffset: 3312 +- name: 'Hard Problems in Marketing: Offline channels, data quality, creative solutions' + startOffset: 3312 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3312 + endOffset: 3422 +- name: 'Marketing Domain Knowledge: Stakeholder alignment and explainability' + startOffset: 3422 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3422 + endOffset: 3528 +- name: 'Find Juan Online: Blog, GitHub and contact links' + startOffset: 3528 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3528 + endOffset: 3622 +- name: Closing Remarks & resource links + startOffset: 3622 + url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3622 + endOffset: 3571 + transcript: - header: Podcast Introduction - header: 'Introduction: Juan Orduz — mathematician and data scientist' @@ -1100,137 +1220,6 @@ transcript: sec: 3673 time: '1:01:13' who: Alexey -description: Learn attribution, media mix modeling & cookieless tracking to measure - uplift, TV/offline impact and automate MMM for faster acquisition & retention -intro: How can marketing teams reliably measure ad impact, allocate budget across - channels, and adapt to a cookieless world? In this episode, Juan Orduz — a Berlin‑based - mathematician and data scientist specializing in statistical learning, time series, - Bayesian and geometric methods — walks through practical marketing data science - approaches for attribution, media mix modeling (MMM), uplift modeling, and cookieless - tracking.

We cover attribution basics and multi‑channel ambiguity, MMM - techniques including regression, ad‑stock and saturation, and campaign uplift estimation - using time‑series counterfactuals. Juan explains measuring TV and offline channels, - the impact of privacy changes like iOS 14.5 on tracking, and strategies for retention - and purchase‑frequency modeling. You’ll also hear about uplift A/B testing design, - modeling benchmarks (start simple), MMM retraining cadence, learning decay rates - with Bayesian regression, and building a marketing data function with the right - data integrations and cross‑functional collaboration.

If you want actionable - guidance on attribution models, media mix optimization, privacy‑aware tracking, - and when to choose Bayesian vs frequentist methods, this episode gives clear frameworks, - common pitfalls, and learning resources to help practitioners improve measurement - and decision‑making. -dateadded: '2022-05-28' -duration: PT00H59M31S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=0 - endOffset: 102 -- name: 'Introduction: Juan Orduz — mathematician and data scientist' - startOffset: 102 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=102 - endOffset: 167 -- name: 'Career Path: From geometric analysis to industry data science' - startOffset: 167 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=167 - endOffset: 309 -- name: Geometric Analysis Overview & connections to Bayesian sampling - startOffset: 309 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=309 - endOffset: 451 -- name: 'Machine Learning in Marketing: Key use cases (acquisition, retention, NLP)' - startOffset: 451 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=451 - endOffset: 618 -- name: 'Attribution Basics: Multi-channel user journeys and ambiguity' - startOffset: 618 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=618 - endOffset: 816 -- name: 'Media Mix Modeling: Regression, saturation and ad-stock transformations' - startOffset: 816 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=816 - endOffset: 898 -- name: 'Campaign Uplift Estimation: Time series counterfactuals and ad impact' - startOffset: 898 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=898 - endOffset: 1188 -- name: 'Measuring TV & Offline Channels: Aggregated impressions and time granularity' - startOffset: 1188 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1188 - endOffset: 1249 -- name: 'Privacy Changes and Cookieless Tracking: Impact of iOS 14.5' - startOffset: 1249 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1249 - endOffset: 1384 -- name: 'Retention Modeling: Contractual vs non-contractual churn strategies' - startOffset: 1384 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1384 - endOffset: 1537 -- name: 'Purchase Frequency Modeling: Detecting unusual inactivity patterns' - startOffset: 1537 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1537 - endOffset: 1753 -- name: 'Uplift Modeling: Targeted interventions versus churn prediction' - startOffset: 1753 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1753 - endOffset: 1854 -- name: 'A/B Testing for Uplift: Control/treatment design and data pitfalls' - startOffset: 1854 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=1854 - endOffset: 2124 -- name: 'Modeling Benchmarks: Start simple with baselines before complex ML' - startOffset: 2124 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2124 - endOffset: 2225 -- name: 'MMM Retraining Cadence: Monthly updates and automation considerations' - startOffset: 2225 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2225 - endOffset: 2302 -- name: 'Attribution Baselines: Uniform allocation and look-alike approaches' - startOffset: 2302 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2302 - endOffset: 2381 -- name: 'Learning Decay Rates: Estimating channel decay with Bayesian regression' - startOffset: 2381 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2381 - endOffset: 2446 -- name: 'Learning Resources: Books, courses, talks and Juan’s blog' - startOffset: 2446 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2446 - endOffset: 2526 -- name: 'Bayesian vs Frequentist: When to use priors and hierarchical models' - startOffset: 2526 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2526 - endOffset: 2886 -- name: 'Building a Marketing Data Function: Data integrations and infrastructure - first' - startOffset: 2886 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=2886 - endOffset: 3050 -- name: 'Cross-functional Collaboration: Analysts, engineers and marketing stakeholders' - startOffset: 3050 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3050 - endOffset: 3217 -- name: 'KPI Definition: Short-term vs long-term conversion objectives' - startOffset: 3217 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3217 - endOffset: 3312 -- name: 'Hard Problems in Marketing: Offline channels, data quality, creative solutions' - startOffset: 3312 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3312 - endOffset: 3422 -- name: 'Marketing Domain Knowledge: Stakeholder alignment and explainability' - startOffset: 3422 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3422 - endOffset: 3528 -- name: 'Find Juan Online: Blog, GitHub and contact links' - startOffset: 3528 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3528 - endOffset: 3622 -- name: Closing Remarks & resource links - startOffset: 3622 - url: https://www.youtube.com/watch?v=jsAxUd_bZpw&t=3622 - endOffset: 3571 --- Links: diff --git a/_podcast/s07e05-machine-learning-system-design-interview.md b/_podcast/machine-learning-system-design-interview.md similarity index 97% rename from _podcast/s07e05-machine-learning-system-design-interview.md rename to _podcast/machine-learning-system-design-interview.md index e50b7547..5177ebcf 100644 --- a/_podcast/s07e05-machine-learning-system-design-interview.md +++ b/_podcast/machine-learning-system-design-interview.md @@ -1,42 +1,141 @@ --- +title: "ML System Design Interviews: Production ML, Fraud Detection, Features, A/B Testing & MLOps" +short: "Machine Learning System Design Interview" +season: 7 episode: 5 guests: - valeriybabushkin -intro: 'How do you approach ML system design interviews that probe production constraints, - fraud detection trade-offs, and MLOps realities? In this episode, Valerii Babushkin - — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, - and author of Machine Learning System Design — walks through what interviewers look - for and how candidates should structure answers for real-world ML problems.

- We cover concrete topics you can use in interviews and on the job: distinguishing - software vs. ML system design; a fraud detection case study (probabilities, loss - functions, real-time requirements); label noise, class imbalance, and feature engineering - trade-offs; end-to-end pipeline items like metrics, baselines, A/B testing, and - validating in production; monitoring, distribution shift, fallbacks, and production - robustness; serving models, embeddings, and MLOps roles; plus when to avoid ML and - practical checklist items for core projects. Valerii also shares interview tactics - — signposting depth, stating assumptions, iterative baselines — and guidance for - new grads and career progression toward system design roles.

Listen to - learn actionable frameworks, example trade-offs, and preparation strategies to improve - your ML system design interviews and production ML decisions.' -description: 'Master ML system design: fraud detection, feature engineering & A/B - testing to ace interviews, build robust production models, monitoring and MLOps.' -date: 2025-11-07 -topics: -- machine learning -- career growth +image: images/podcast/machine-learning-system-design-interview.jpg ids: anchor: Machine-Learning-System-Design-Interview---Valerii-Babushkin-e1ej65e youtube: 0RsmRjar66E -image: images/podcast/s07e05-machine-learning-system-design-interview.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Machine-Learning-System-Design-Interview---Valerii-Babushkin-e1ej65e apple: https://podcasts.apple.com/us/podcast/machine-learning-system-design-interview-valerii-babushkin/id1541710331?i=1000551566652 spotify: https://open.spotify.com/episode/5tSLFOh8PGe1NFFz1of9Xe youtube: https://www.youtube.com/watch?v=0RsmRjar66E -season: 7 -short: Machine Learning System Design Interview -title: 'ML System Design Interviews: Production ML, Fraud Detection, Features, A/B - Testing & MLOps' + +description: "Master ML system design: fraud detection, feature engineering & A/B testing to ace interviews, build robust production models, monitoring and MLOps." +intro: "How do you approach ML system design interviews that probe production constraints, fraud detection trade-offs, and MLOps realities? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through what interviewers look for and how candidates should structure answers for real-world ML problems.

We cover concrete topics you can use in interviews and on the job: distinguishing software vs. ML system design; a fraud detection case study (probabilities, loss functions, real-time requirements); label noise, class imbalance, and feature engineering trade-offs; end-to-end pipeline items like metrics, baselines, A/B testing, and validating in production; monitoring, distribution shift, fallbacks, and production robustness; serving models, embeddings, and MLOps roles; plus when to avoid ML and practical checklist items for core projects. Valerii also shares interview tactics — signposting depth, stating assumptions, iterative baselines — and guidance for new grads and career progression toward system design roles.

Listen to learn actionable frameworks, example trade-offs, and preparation strategies to improve your ML system design interviews and production ML decisions." +topics: +- machine learning +- career growth +dateadded: 2022-02-19 +date: 2025-11-07 + +duration: PT00H59M + +quotableClips: +- name: Podcast Introduction & Episode Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=0 + endOffset: 111 +- name: 'Valerii Background: Career Snapshot and Kaggle Achievements' + startOffset: 111 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=111 + endOffset: 201 +- name: 'Blockchain.com Role: Scope, Responsibilities, and Data Ownership' + startOffset: 201 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=201 + endOffset: 346 +- name: 'Transition to Meta: User Privacy Work and Large-Scale ML Experience' + startOffset: 346 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=346 + endOffset: 451 +- name: 'Hiring Experience: Conducting High-Volume Interviews and Team Leadership' + startOffset: 451 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=451 + endOffset: 552 +- name: 'Candidate Targeting: Who Faces ML System Design Interviews' + startOffset: 552 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=552 + endOffset: 683 +- name: 'Interview Structure: 45-Minute Narrative and Evaluation Goals' + startOffset: 683 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=683 + endOffset: 838 +- name: 'Contrast: Software System Design Versus ML System Design' + startOffset: 838 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 + endOffset: 1003 +- name: 'Fraud Detection Case Study: Probabilities, Loss Functions, and Real-Time + Needs' + startOffset: 838 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 + endOffset: 1003 +- name: Labeling, Class Imbalance, and Feature Engineering Tradeoffs + startOffset: 1003 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1003 + endOffset: 1233 +- name: 'Interview Tactics: Stating Assumptions and Getting Alignment' + startOffset: 1233 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1233 + endOffset: 1325 +- name: 'Example: Points-of-Interest System vs Personalized Recommender' + startOffset: 1325 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1325 + endOffset: 1468 +- name: 'End-to-End ML Pipeline: Metrics, Baselines, and A/B Testing' + startOffset: 1468 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1468 + endOffset: 1749 +- name: 'Securing the Interview: Iterative Baselines and Signposting Depth' + startOffset: 1749 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1749 + endOffset: 1918 +- name: 'Appropriate Depth: Practical ML Decisions vs Research-Level Detail' + startOffset: 1918 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1918 + endOffset: 2011 +- name: 'Preparation Strategies: Mock Interviews, Resources, and Experience' + startOffset: 2011 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2011 + endOffset: 2279 +- name: 'Industry Checklist: Core ML Project Review Items and Patterns' + startOffset: 2279 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2279 + endOffset: 2411 +- name: 'Defining Goals and Proxy Metrics: Business Alignment and Long-Term Health' + startOffset: 2411 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2411 + endOffset: 2651 +- name: Features, Labels, Model Selection, and Validation Workflow + startOffset: 2651 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2651 + endOffset: 2762 +- name: 'Production Robustness: Monitoring, Distribution Shift, and Fallbacks' + startOffset: 2762 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2762 + endOffset: 2872 +- name: 'System Components: Why Features Matter More Than Model Architecture' + startOffset: 2872 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2872 + endOffset: 3057 +- name: 'Engineering Integration: Serving Models, Embeddings, and MLOps Roles' + startOffset: 3057 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3057 + endOffset: 3145 +- name: When to Avoid ML and Useful Design Pattern References + startOffset: 3145 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3145 + endOffset: 3247 +- name: 'New Grad Expectations: Coding Focus and Limited System Design' + startOffset: 3247 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3247 + endOffset: 3443 +- name: 'Validating in Production: A/B Tests, Causality, and Human Labels' + startOffset: 3443 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3443 + endOffset: 3541 +- name: 'Career Path: Moving from Data Science Practice to System Design' + startOffset: 3541 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3541 + endOffset: 3603 +- name: Closing Remarks and Contact Information + startOffset: 3603 + url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3603 + endOffset: 3540 + transcript: - header: Podcast Introduction & Episode Overview - header: 'Valerii Background: Career Snapshot and Kaggle Achievements' @@ -1317,118 +1416,6 @@ transcript: sec: 3651 time: '1:00:51' who: Valerii -dateadded: '2022-02-19' -duration: PT00H59M -quotableClips: -- name: Podcast Introduction & Episode Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=0 - endOffset: 111 -- name: 'Valerii Background: Career Snapshot and Kaggle Achievements' - startOffset: 111 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=111 - endOffset: 201 -- name: 'Blockchain.com Role: Scope, Responsibilities, and Data Ownership' - startOffset: 201 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=201 - endOffset: 346 -- name: 'Transition to Meta: User Privacy Work and Large-Scale ML Experience' - startOffset: 346 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=346 - endOffset: 451 -- name: 'Hiring Experience: Conducting High-Volume Interviews and Team Leadership' - startOffset: 451 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=451 - endOffset: 552 -- name: 'Candidate Targeting: Who Faces ML System Design Interviews' - startOffset: 552 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=552 - endOffset: 683 -- name: 'Interview Structure: 45-Minute Narrative and Evaluation Goals' - startOffset: 683 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=683 - endOffset: 838 -- name: 'Contrast: Software System Design Versus ML System Design' - startOffset: 838 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 - endOffset: 1003 -- name: 'Fraud Detection Case Study: Probabilities, Loss Functions, and Real-Time - Needs' - startOffset: 838 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=838 - endOffset: 1003 -- name: Labeling, Class Imbalance, and Feature Engineering Tradeoffs - startOffset: 1003 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1003 - endOffset: 1233 -- name: 'Interview Tactics: Stating Assumptions and Getting Alignment' - startOffset: 1233 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1233 - endOffset: 1325 -- name: 'Example: Points-of-Interest System vs Personalized Recommender' - startOffset: 1325 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1325 - endOffset: 1468 -- name: 'End-to-End ML Pipeline: Metrics, Baselines, and A/B Testing' - startOffset: 1468 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1468 - endOffset: 1749 -- name: 'Securing the Interview: Iterative Baselines and Signposting Depth' - startOffset: 1749 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1749 - endOffset: 1918 -- name: 'Appropriate Depth: Practical ML Decisions vs Research-Level Detail' - startOffset: 1918 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=1918 - endOffset: 2011 -- name: 'Preparation Strategies: Mock Interviews, Resources, and Experience' - startOffset: 2011 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2011 - endOffset: 2279 -- name: 'Industry Checklist: Core ML Project Review Items and Patterns' - startOffset: 2279 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2279 - endOffset: 2411 -- name: 'Defining Goals and Proxy Metrics: Business Alignment and Long-Term Health' - startOffset: 2411 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2411 - endOffset: 2651 -- name: Features, Labels, Model Selection, and Validation Workflow - startOffset: 2651 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2651 - endOffset: 2762 -- name: 'Production Robustness: Monitoring, Distribution Shift, and Fallbacks' - startOffset: 2762 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2762 - endOffset: 2872 -- name: 'System Components: Why Features Matter More Than Model Architecture' - startOffset: 2872 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=2872 - endOffset: 3057 -- name: 'Engineering Integration: Serving Models, Embeddings, and MLOps Roles' - startOffset: 3057 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3057 - endOffset: 3145 -- name: When to Avoid ML and Useful Design Pattern References - startOffset: 3145 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3145 - endOffset: 3247 -- name: 'New Grad Expectations: Coding Focus and Limited System Design' - startOffset: 3247 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3247 - endOffset: 3443 -- name: 'Validating in Production: A/B Tests, Causality, and Human Labels' - startOffset: 3443 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3443 - endOffset: 3541 -- name: 'Career Path: Moving from Data Science Practice to System Design' - startOffset: 3541 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3541 - endOffset: 3603 -- name: Closing Remarks and Contact Information - startOffset: 3603 - url: https://www.youtube.com/watch?v=0RsmRjar66E&t=3603 - endOffset: 3540 --- Links: diff --git a/_podcast/s02e09-roles-skills-monetizing-ml.md b/_podcast/make-money-with-machine-learning-roles-skills.md similarity index 97% rename from _podcast/s02e09-roles-skills-monetizing-ml.md rename to _podcast/make-money-with-machine-learning-roles-skills.md index 77516282..05143183 100644 --- a/_podcast/s02e09-roles-skills-monetizing-ml.md +++ b/_podcast/make-money-with-machine-learning-roles-skills.md @@ -1,12 +1,11 @@ --- -title: 'Monetize Machine Learning: Convert Models to ARR/MRR with ML Product & MLOps - Strategy' -short: New Roles and Key Skills to Monetize Machine Learning -guests: -- vinvashishta -image: images/podcast/s02e09-roles-skills-monetizing-ml.jpg +title: "Monetize Machine Learning: Convert Models to ARR/MRR with ML Product & MLOps Strategy" +short: "New Roles and Key Skills to Monetize Machine Learning" season: 2 episode: 9 +guests: +- vinvashishta +image: images/podcast/make-money-with-machine-learning-roles-skills.jpg ids: youtube: xCjzA_8S4kI anchor: New-Roles-and-Key-Skills-to-Monetize-Machine-Learning---Vin-Vashishta-escer6 @@ -15,6 +14,109 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/New-Roles-and-Key-Skills-to-Monetize-Machine-Learning---Vin-Vashishta-escer6 spotify: https://open.spotify.com/episode/5u2WuUB8GBNE9qDsNR6mby apple: https://podcasts.apple.com/us/podcast/new-roles-key-skills-to-monetize-machine-learning-vin/id1541710331?i=1000512720281 + +description: "Master monetize machine learning: convert ML models into ARR/MRR using MLOps and team roles to drive revenue, adoption and measurable business impact." +intro: "How do you turn machine learning models into recurring revenue—ARR and MRR—rather than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner and engineer strategist who has brought products to market with ARR in the $100’s of millions, breaks down practical steps to monetize machine learning.

We explore why a revenue-first mindset changes ML strategy, how to translate models into C-suite metrics like ARR/MRR, and when to prioritize revenue versus cost-savings. Vin outlines the three core team roles for monetization, the research artifacts and experimental process that make models production-ready, and real category-creation examples from companies such as Amazon, Google, and Stitch Fix. For startups he explains the “angry users + data scientists” product recipe.

You’ll also get frameworks for ML product management—turning strategy into researchable use cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, and product metrics for adoption (usage, task time, decision quality, pricing impact). This episode delivers actionable guidance for leaders, product managers, and engineers seeking to convert ML into sustainable ARR and MRR" +topics: +- machine learning +- monetization +- product management +- strategy +dateadded: 2021-03-12 + +duration: PT01H19M01S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=0 + endOffset: 210 +- name: 'Career & technical background: Vin Vashishta''s journey in ML and strategy' + startOffset: 210 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=210 + endOffset: 477 +- name: 'Monetize machine learning: why revenue focus drives ML strategy' + startOffset: 477 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=477 + endOffset: 727 +- name: 'ARR & MRR: translating models into C-suite revenue metrics' + startOffset: 727 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=727 + endOffset: 959 +- name: 'Revenue vs. cost-savings: business model metrics for ML products' + startOffset: 959 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=959 + endOffset: 1215 +- name: 'Team capabilities for monetizing ML: three core roles overview' + startOffset: 1215 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1215 + endOffset: 1618 +- name: 'Machine learning research: artifacts, datasets, and experimental process' + startOffset: 1618 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1618 + endOffset: 1758 +- name: 'Category creation with ML: examples and market entry (Amazon, Google, Stitch + Fix)' + startOffset: 1758 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1758 + endOffset: 2037 +- name: 'Startups: the "angry users + data scientists" product recipe' + startOffset: 2037 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2037 + endOffset: 2170 +- name: 'Research skillset: hypothesis design, experimentation, explainability & advanced + math' + startOffset: 2170 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2170 + endOffset: 2608 +- name: 'Product management for ML: translating strategy into researchable use cases' + startOffset: 2608 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2608 + endOffset: 2934 +- name: 'Product manager ecosystem: gated decisions, feasibility studies and stakeholders' + startOffset: 2934 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2934 + endOffset: 3053 +- name: 'Career paths into ML product management: backgrounds and upskilling routes' + startOffset: 3053 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3053 + endOffset: 3290 +- name: 'Machine learning architecture: platform vision, cost estimation and production + path' + startOffset: 3290 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3290 + endOffset: 3484 +- name: 'Architecture skills & tools: cloud, MLOps, buy vs build tradeoffs' + startOffset: 3484 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3484 + endOffset: 3642 +- name: 'Transitioning into research & architecture roles: realistic career steps' + startOffset: 3642 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3642 + endOffset: 3792 +- name: 'Education gap & corporate upskilling: "farm club" pipelines and university + roles' + startOffset: 3792 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3792 + endOffset: 4416 +- name: 'MBA relevance: degrees vs. practical business fluency for ML product leaders' + startOffset: 4416 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4416 + endOffset: 4454 +- name: 'Role specialization trend: splitting data science into focused functions' + startOffset: 4454 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4454 + endOffset: 4514 +- name: 'Product metrics for adoption: usage, task time, decision quality and pricing + impact' + startOffset: 4514 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4514 + endOffset: 4692 +- name: Episode recap & next steps + startOffset: 4692 + url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4692 + endOffset: 4741 + transcript: - header: Podcast Introduction - line: Good morning. Now it's 7 AM for you or? @@ -234,7 +336,7 @@ transcript: sec: 712 time: '11:52' who: Vin -- header: 'ARR & MRR: translating models into C‑suite revenue metrics' +- header: 'ARR & MRR: translating models into C-suite revenue metrics' - line: We definitely will go there, but it's interesting to know how companies actually evaluate the value that data scientists can bring. Coming back to your LinkedIn profile. You bio mentions that you built and brought products to market with ARR @@ -298,7 +400,7 @@ transcript: sec: 926 time: '15:26' who: Alexey -- header: 'Revenue vs. cost‑savings: business model metrics for ML products' +- header: 'Revenue vs. cost-savings: business model metrics for ML products' - line: What are the other important things that people on this level care about? In addition to this annual recurring revenue and monthly recurring revenue. What are the other money related metrics that they care about? @@ -1297,115 +1399,6 @@ transcript: sec: 4743 time: '1:19:03' who: Alexey -description: 'Master monetize machine learning: convert ML models into ARR/MRR using - MLOps and team roles to drive revenue, adoption and measurable business impact.' -intro: How do you turn machine learning models into recurring revenue—ARR and MRR—rather - than just a cost center? In this episode, Vin Vashishta, an applied ML practitioner - and engineer strategist who has brought products to market with ARR in the $100’s - of millions, breaks down practical steps to monetize machine learning.

- We explore why a revenue-first mindset changes ML strategy, how to translate models - into C‑suite metrics like ARR/MRR, and when to prioritize revenue versus cost‑savings. - Vin outlines the three core team roles for monetization, the research artifacts - and experimental process that make models production-ready, and real category-creation - examples from companies such as Amazon, Google, and Stitch Fix. For startups he - explains the “angry users + data scientists” product recipe.

You’ll also - get frameworks for ML product management—turning strategy into researchable use - cases—plus guidance on architecture, MLOps tradeoffs, pricing strategy, model reliability, - and product metrics for adoption (usage, task time, decision quality, pricing impact). - This episode delivers actionable guidance for leaders, product managers, and engineers - seeking to convert ML into sustainable ARR and MRR. -dateadded: '2021-03-12' -duration: PT01H19M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=0 - endOffset: 210 -- name: 'Career & technical background: Vin Vashishta''s journey in ML and strategy' - startOffset: 210 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=210 - endOffset: 477 -- name: 'Monetize machine learning: why revenue focus drives ML strategy' - startOffset: 477 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=477 - endOffset: 727 -- name: 'ARR & MRR: translating models into C‑suite revenue metrics' - startOffset: 727 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=727 - endOffset: 959 -- name: 'Revenue vs. cost‑savings: business model metrics for ML products' - startOffset: 959 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=959 - endOffset: 1215 -- name: 'Team capabilities for monetizing ML: three core roles overview' - startOffset: 1215 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1215 - endOffset: 1618 -- name: 'Machine learning research: artifacts, datasets, and experimental process' - startOffset: 1618 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1618 - endOffset: 1758 -- name: 'Category creation with ML: examples and market entry (Amazon, Google, Stitch - Fix)' - startOffset: 1758 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=1758 - endOffset: 2037 -- name: 'Startups: the "angry users + data scientists" product recipe' - startOffset: 2037 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2037 - endOffset: 2170 -- name: 'Research skillset: hypothesis design, experimentation, explainability & advanced - math' - startOffset: 2170 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2170 - endOffset: 2608 -- name: 'Product management for ML: translating strategy into researchable use cases' - startOffset: 2608 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2608 - endOffset: 2934 -- name: 'Product manager ecosystem: gated decisions, feasibility studies and stakeholders' - startOffset: 2934 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=2934 - endOffset: 3053 -- name: 'Career paths into ML product management: backgrounds and upskilling routes' - startOffset: 3053 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3053 - endOffset: 3290 -- name: 'Machine learning architecture: platform vision, cost estimation and production - path' - startOffset: 3290 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3290 - endOffset: 3484 -- name: 'Architecture skills & tools: cloud, MLOps, buy vs build tradeoffs' - startOffset: 3484 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3484 - endOffset: 3642 -- name: 'Transitioning into research & architecture roles: realistic career steps' - startOffset: 3642 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3642 - endOffset: 3792 -- name: 'Education gap & corporate upskilling: "farm club" pipelines and university - roles' - startOffset: 3792 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=3792 - endOffset: 4416 -- name: 'MBA relevance: degrees vs. practical business fluency for ML product leaders' - startOffset: 4416 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4416 - endOffset: 4454 -- name: 'Role specialization trend: splitting data science into focused functions' - startOffset: 4454 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4454 - endOffset: 4514 -- name: 'Product metrics for adoption: usage, task time, decision quality and pricing - impact' - startOffset: 4514 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4514 - endOffset: 4692 -- name: Episode recap & next steps - startOffset: 4692 - url: https://www.youtube.com/watch?v=xCjzA_8S4kI&t=4692 - endOffset: 4741 --- We discussed monetization roles and the capabilities people need to move into those roles. The key roles are ML Researcher, ML Architect, and ML Product Manager. diff --git a/_podcast/s01e05-mentoring.md b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md similarity index 91% rename from _podcast/s01e05-mentoring.md rename to _podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md index 0d697aa9..2a1aff25 100644 --- a/_podcast/s01e05-mentoring.md +++ b/_podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.md @@ -1,16 +1,11 @@ --- -title: 'How to Find a Mentor and Become One: Mentoring Strategies for Tech Careers' -short: Mentoring -guests: -- rahuljain -image: images/podcast/s01e05-mentoring.jpg -description: 'Discover practical mentoring strategies for tech careers: find mentors, - master cold outreach, run effective sessions, start paid mentorship & boost leadership.' -keywords: mentoring, career development, tech mentorship, finding a mentor, becoming - a mentor, imposter syndrome, tech leadership, career advice, professional development, - data engineering +title: "How to Find a Mentor and Become One: Mentoring Strategies for Tech Careers" +short: "Mentoring" season: 1 episode: 5 +guests: +- rahuljain +image: images/podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg ids: youtube: LQvwTNQbPg4 anchor: Mentoring---Rahul-Jain-eo7cmu @@ -19,23 +14,18 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Mentoring---Rahul-Jain-eo7cmu spotify: TODO apple: TODO -intro: 'Struggling to find a mentor — or wondering how to become one — in a fast-moving - tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI - leader with 15+ years driving enterprise data transformations and a career arc from - mining engineering to data engineering and leadership, walks through practical mentoring - strategies for tech professionals. We define mentoring (purpose, scope, types), - explore early models like Thoughtworks’ sponsorship, and show how to find mentors - through networks, platforms, and cold outreach — with concrete outreach best practices: - specificity, background, and follow‑up. Rahul covers preparing mentoring sessions - (goals, agendas), mentoring formats (one‑off advice vs long‑term relationships), - and how to start as a mentor using simple first steps and platforms. Topics include - benefits of mentoring, transferable workplace guidance, developing people skills - (empathy, listening), balancing technical work and leadership, tackling imposter - syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining - development plans. Listen to gain actionable steps, templates, and mindset shifts - to both secure meaningful mentorship and build a sustainable mentoring practice - in your tech career.' -dateadded: '2021-02-23' + +description: "Discover practical mentoring strategies for tech careers: find mentors, master cold outreach, run effective sessions, start paid mentorship & boost leadership." +topics: +- mentoring +- career development +- career transition +- leadership +- data engineering +intro: "Struggling to find a mentor — or wondering how to become one — in a fast-moving tech career? In this episode, Rahul Jain, a senior solutions engineer and data/AI leader with 15+ years driving enterprise data transformations and a career arc from mining engineering to data engineering and leadership, walks through practical mentoring strategies for tech professionals. We define mentoring (purpose, scope, types), explore early models like Thoughtworks’ sponsorship, and show how to find mentors through networks, platforms, and cold outreach — with concrete outreach best practices: specificity, background, and follow-up. Rahul covers preparing mentoring sessions (goals, agendas), mentoring formats (one-off advice vs long-term relationships), and how to start as a mentor using simple first steps and platforms. Topics include benefits of mentoring, transferable workplace guidance, developing people skills (empathy, listening), balancing technical work and leadership, tackling imposter syndrome, coaching vs managing, setting boundaries and paid mentorship, and maintaining development plans. Listen to gain actionable steps, templates, and mindset shifts to both secure meaningful mentorship and build a sustainable mentoring practice in your tech career." +dateadded: 2021-02-23 + + quotableClips: - name: Episode Introduction startOffset: 0 @@ -57,7 +47,7 @@ quotableClips: startOffset: 770 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=770 endOffset: 990 -- name: 'Cold Outreach Best Practices: Specificity, Background, and Follow‑up' +- name: 'Cold Outreach Best Practices: Specificity, Background, and Follow-up' startOffset: 990 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=990 endOffset: 1180 @@ -65,7 +55,7 @@ quotableClips: startOffset: 1180 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1180 endOffset: 1350 -- name: 'Mentoring Formats: One‑Off Advice vs. Long‑Term Relationships' +- name: 'Mentoring Formats: One-Off Advice vs. Long-Term Relationships' startOffset: 1350 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1350 endOffset: 1510 @@ -77,7 +67,7 @@ quotableClips: startOffset: 1680 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1680 endOffset: 1840 -- name: 'Developing People Skills: Empathy, Listening, and Avoiding the "Advice Monster"' +- name: 'Developing People Skills: Empathy, Listening, and Avoiding the "Advice Monster" startOffset: 1840 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=1840 endOffset: 2010 @@ -85,7 +75,7 @@ quotableClips: startOffset: 2010 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=2010 endOffset: 2200 -- name: 'Common Mentee Challenges: Imposter Syndrome & Tech‑vs‑Management Choices' +- name: 'Common Mentee Challenges: Imposter Syndrome & Tech-vs-Management Choices' startOffset: 2200 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=2200 endOffset: 2390 @@ -109,7 +99,7 @@ quotableClips: startOffset: 3020 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=3020 endOffset: 3160 -- name: 'Maintaining Development Plans: Review, Visibility, and Follow‑through' +- name: 'Maintaining Development Plans: Review, Visibility, and Follow-through' startOffset: 3160 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=3160 endOffset: 3290 @@ -125,6 +115,8 @@ quotableClips: startOffset: 3480 url: https://www.youtube.com/watch?v=LQvwTNQbPg4&t=3480 endOffset: 3480 + +keywords: mentoring, career development, tech mentorship, finding a mentor, becoming a mentor, imposter syndrome, tech leadership, career advice, professional development, data engineering --- Today we're discussing mentoring with [Rahul Jain](/people/rahuljain.html), a technical leader with about 20 years of experience building and running software products. He currently leads the Business Intelligence and Data Engineering units at Omio, a ticket-booking company, and mentors engineers and managers through The Mentoring Club. diff --git a/_podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md b/_podcast/mindful-data-strategy-for-business-impact.md similarity index 94% rename from _podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md rename to _podcast/mindful-data-strategy-for-business-impact.md index 5fbb9020..49beb8ec 100644 --- a/_podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.md +++ b/_podcast/mindful-data-strategy-for-business-impact.md @@ -1,20 +1,142 @@ --- +title: "Mindful Data Strategy for Business Impact: Wabi-Sabi Approach, Data Trust & Maintenance-Innovation Balance" +short: "Mindful Data Strategy: From Pipelines to Business Impact" +season: 21 episode: 2 guests: - liorbarak +image: images/podcast/mindful-data-strategy-for-business-impact.jpg ids: anchor: datatalksclub/episodes/How-to-Rebuild-Data-Trust--Mindful-Data-Strategy-and-Maintenance-vs-Innovation---Lior-Barak-e36obcs youtube: B76J4QkZPWs -image: images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/How-to-Rebuild-Data-Trust--Mindful-Data-Strategy-and-Maintenance-vs-Innovation---Lior-Barak-e36obcs apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/54B0xvUI1eQjXW0s1eqgbI youtube: https://www.youtube.com/watch?v=B76J4QkZPWs -season: 21 -short: 'Mindful Data Strategy: From Pipelines to Business Impact' -title: 'Restore Data Trust: Practical Data Quality, Prioritization & Generative AI - Readiness' +description: "Discover a mindful data strategy to build data trust and balance maintenance-innovation with a Wabi-Sabi approach: practical tactics to boost business impact." +topics: +- data strategy +- data governance +- data engineering +- product management +- career transition +intro: "How do you build a data strategy that drives business impact without chasing perfection? In this episode Lior Barak — author of Data Is Like a Plate of Hummus, co-host of the WHAT the Data?! podcast, and founder of Tale About Data — explores a mindful data strategy that accepts imperfection, prioritizes data trust, and balances maintenance with innovation.

Lior draws on 12+ years building data teams and helping organizations use data for growth, with a particular focus on practical strategies for non-business functions. Key topics include the Wabi-Sabi approach to data (valuing usable, imperfect datasets), establishing data trust and governance, and how to allocate resources between ongoing data maintenance and forward-looking innovation. The conversation also touches on setting realistic expectations, reducing technical debt, and aligning data work to measurable business outcomes.

If you’re responsible for data strategy, analytics, or data product decisions, this episode provides concrete perspectives on building resilient, impact-driven data practices—helping you prioritize work that increases trust, lowers risk, and creates sustained business value." +dateadded: 2025-08-18 +duration: PT01H06M05S +quotableClips: +- name: Podcast Introduction and Episode Overview (mindful data strategy) + startOffset: 0 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=0 + endOffset: 144 +- name: 'Lior Barak: Background and shift from engineering to product' + startOffset: 144 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=144 + endOffset: 246 +- name: 'Startup and platform experience: automating data infrastructure' + startOffset: 246 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=246 + endOffset: 385 +- name: Product management learning paths for engineers and data scientists + startOffset: 385 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=385 + endOffset: 500 +- name: 'Wabi-sabi applied to data: accepting imperfection and communicating it' + startOffset: 500 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=500 + endOffset: 588 +- name: 'Data trust crisis: industry stats and common trust failures' + startOffset: 588 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=588 + endOffset: 707 +- name: 'Generative AI and hallucinations: managing expectations for models' + startOffset: 707 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=707 + endOffset: 849 +- name: 'Data quality metaphor: Lego bricks and pragmatic trade-offs' + startOffset: 849 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=849 + endOffset: 1052 +- name: 'Prioritization vs. tooling: translating data work into business impact' + startOffset: 1052 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1052 + endOffset: 1250 +- name: 'Core KPI diagnosis: investigating dashboard inaccuracies' + startOffset: 1250 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1250 + endOffset: 1322 +- name: 'Pipeline failure points: ingestion, SQL logic, and lineage checks' + startOffset: 1322 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1322 + endOffset: 1406 +- name: 'Process failures over tool fixes: focusing on root causes' + startOffset: 1406 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1406 + endOffset: 1692 +- name: 'Trust restoration framework: maintenance, rollouts, and innovation' + startOffset: 1692 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1692 + endOffset: 1756 +- name: 'Incident analysis: using incidents to identify recurring problems' + startOffset: 1756 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1756 + endOffset: 1847 +- name: Dashboard traffic-light system for data reliability (green/yellow/red) + startOffset: 1847 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1847 + endOffset: 1998 +- name: 'Analyst feedback and automation: closing the communication loop' + startOffset: 1998 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1998 + endOffset: 2299 +- name: 'Work allocation: tracking maintenance, rollout, and innovation time' + startOffset: 2299 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2299 + endOffset: 2481 +- name: 'Team stress index and guideline: ~45% maintenance as healthy baseline' + startOffset: 2481 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2481 + endOffset: 2592 +- name: 'Data product lifecycle: development, rollout, maturity, and decline' + startOffset: 2592 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2592 + endOffset: 2747 +- name: 'Zen practices for data teams: mindfulness, acceptance, and planning' + startOffset: 2747 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2747 + endOffset: 3014 +- name: 'Generative AI demand: why data readiness matters now' + startOffset: 3014 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3014 + endOffset: 3101 +- name: 'Measuring readiness by impact: ROI and product success signals' + startOffset: 3101 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3101 + endOffset: 3379 +- name: 'Legacy systems strategy: minimal maintenance and planned replacement' + startOffset: 3379 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3379 + endOffset: 3551 +- name: 'Replacing legacy: selling the change through user impact' + startOffset: 3551 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3551 + endOffset: 3623 +- name: 'Executive ad-hoc requests: elicit intent and quantify expected impact' + startOffset: 3623 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3623 + endOffset: 3753 +- name: 'Career guidance: choosing analytics, engineering, or product paths' + startOffset: 3753 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3753 + endOffset: 3876 +- name: Closing reflections, resources, and suggested next steps + startOffset: 3876 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3876 + endOffset: 3965 +- name: Episode Outro and Hummus Banter + startOffset: 3965 + url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3965 + endOffset: 3965 transcript: - header: Podcast Introduction and Episode Overview (mindful data strategy) - line: This week we'll talk about mindful data strategy and how teams can shift from @@ -166,7 +288,7 @@ transcript: sec: 494 time: '8:14' who: Alexey -- header: 'Wabi‑sabi applied to data: accepting imperfection and communicating it' +- header: 'Wabi-sabi applied to data: accepting imperfection and communicating it' - line: Wabi Sabi Your Data which is basically a Japanese concept about accepting the imperfections, the perfect imperfections basically and this is basically what I think about data. This is a lot of my philosophy about how to handle data. It's @@ -300,7 +422,7 @@ transcript: sec: 811 time: '13:31' who: Alexey -- header: 'Data quality metaphor: Lego bricks and pragmatic trade‑offs' +- header: 'Data quality metaphor: Lego bricks and pragmatic trade-offs' - line: Correct and you know I always explain it and I say also data is like Lego bricks. We can connect and we can build a lot of buildings inside it. sec: 849 @@ -671,7 +793,7 @@ transcript: sec: 1810 time: '30:10' who: Lior -- header: Dashboard traffic‑light system for data reliability (green/yellow/red) +- header: Dashboard traffic-light system for data reliability (green/yellow/red) - line: 'Second, for the CEO, we can add a traffic light indicator on the dashboard: green, yellow, red.' sec: 1847 @@ -1158,7 +1280,7 @@ transcript: sec: 3599 time: '59:59' who: Lior -- header: 'Executive ad‑hoc requests: elicit intent and quantify expected impact' +- header: 'Executive ad-hoc requests: elicit intent and quantify expected impact' - line: 'Thanks. Another question: how do you handle ad hoc requests from executives?' sec: 3623 time: '1:00:23' @@ -1269,142 +1391,19 @@ transcript: sec: 3965 time: '1:06:05' who: Alexey -description: Discover how to restore data trust with data quality fixes, prioritization - and generative AI readiness—KPI diagnosis, incident-driven roadmaps and rollout - tips. -intro: How do you restore data trust and make your organization ready for generative - AI without drowning in tools or endless cleanup? In this episode, Lior Barak — author - of Data is Like a Plate of Hummus, co‑host of WHAT the Data?!, and founder of Tale - About Data — walks through practical approaches to data quality, prioritization, - and generative AI readiness from his 12+ years building data teams.

We - cover a mindful data strategy that accepts imperfection (Wabi‑sabi), the shift from - engineering to product thinking, and automating data infrastructure. Lior explains - common data trust failures and hallucination risks with generative models, offers - diagnostic tactics for core KPI and dashboard inaccuracies, and pinpoints pipeline - failure modes (ingestion, SQL logic, lineage). You’ll hear a trust‑restoration framework - focused on maintenance, rollouts, and innovation, a traffic‑light dashboard for - reliability, incident analysis to find recurring causes, and practical work allocation - and team stress benchmarks (≈45% maintenance).

Listen to learn concrete - steps to prioritize data work by business impact, measure readiness for AI by ROI - and product signals, and manage legacy systems and executive ad‑hoc requests with - intent and impact in mind. -dateadded: '2025-08-18' -duration: PT01H06M05S -quotableClips: -- name: Podcast Introduction and Episode Overview (mindful data strategy) - startOffset: 0 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=0 - endOffset: 144 -- name: 'Lior Barak: Background and shift from engineering to product' - startOffset: 144 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=144 - endOffset: 246 -- name: 'Startup and platform experience: automating data infrastructure' - startOffset: 246 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=246 - endOffset: 385 -- name: Product management learning paths for engineers and data scientists - startOffset: 385 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=385 - endOffset: 500 -- name: 'Wabi‑sabi applied to data: accepting imperfection and communicating it' - startOffset: 500 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=500 - endOffset: 588 -- name: 'Data trust crisis: industry stats and common trust failures' - startOffset: 588 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=588 - endOffset: 707 -- name: 'Generative AI and hallucinations: managing expectations for models' - startOffset: 707 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=707 - endOffset: 849 -- name: 'Data quality metaphor: Lego bricks and pragmatic trade‑offs' - startOffset: 849 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=849 - endOffset: 1052 -- name: 'Prioritization vs. tooling: translating data work into business impact' - startOffset: 1052 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1052 - endOffset: 1250 -- name: 'Core KPI diagnosis: investigating dashboard inaccuracies' - startOffset: 1250 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1250 - endOffset: 1322 -- name: 'Pipeline failure points: ingestion, SQL logic, and lineage checks' - startOffset: 1322 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1322 - endOffset: 1406 -- name: 'Process failures over tool fixes: focusing on root causes' - startOffset: 1406 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1406 - endOffset: 1692 -- name: 'Trust restoration framework: maintenance, rollouts, and innovation' - startOffset: 1692 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1692 - endOffset: 1756 -- name: 'Incident analysis: using incidents to identify recurring problems' - startOffset: 1756 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1756 - endOffset: 1847 -- name: Dashboard traffic‑light system for data reliability (green/yellow/red) - startOffset: 1847 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1847 - endOffset: 1998 -- name: 'Analyst feedback and automation: closing the communication loop' - startOffset: 1998 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=1998 - endOffset: 2299 -- name: 'Work allocation: tracking maintenance, rollout, and innovation time' - startOffset: 2299 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2299 - endOffset: 2481 -- name: 'Team stress index and guideline: ~45% maintenance as healthy baseline' - startOffset: 2481 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2481 - endOffset: 2592 -- name: 'Data product lifecycle: development, rollout, maturity, and decline' - startOffset: 2592 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2592 - endOffset: 2747 -- name: 'Zen practices for data teams: mindfulness, acceptance, and planning' - startOffset: 2747 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=2747 - endOffset: 3014 -- name: 'Generative AI demand: why data readiness matters now' - startOffset: 3014 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3014 - endOffset: 3101 -- name: 'Measuring readiness by impact: ROI and product success signals' - startOffset: 3101 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3101 - endOffset: 3379 -- name: 'Legacy systems strategy: minimal maintenance and planned replacement' - startOffset: 3379 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3379 - endOffset: 3551 -- name: 'Replacing legacy: selling the change through user impact' - startOffset: 3551 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3551 - endOffset: 3623 -- name: 'Executive ad‑hoc requests: elicit intent and quantify expected impact' - startOffset: 3623 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3623 - endOffset: 3753 -- name: 'Career guidance: choosing analytics, engineering, or product paths' - startOffset: 3753 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3753 - endOffset: 3876 -- name: Closing reflections, resources, and suggested next steps - startOffset: 3876 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3876 - endOffset: 3965 -- name: Episode Outro and Hummus Banter - startOffset: 3965 - url: https://www.youtube.com/watch?v=B76J4QkZPWs&t=3965 - endOffset: 3965 ---- +context: 'Context: The episode examines a practical approach to data work—moving from + engineering to product thinking, accepting imperfect data (wabi-sabi), diagnosing + trust failures, prioritizing maintenance/rollout/innovation, using simple reliability + signals and feedback loops, and aligning team time and processes to measurable business + impact—especially as generative AI raises readiness demands and legacy systems require + pragmatic replacement. + Core: Adopt a mindful, impact-first data strategy that accepts and communicates + inevitable imperfection, prioritizes process and measurable business outcomes over + perfect tooling, and restores trust through clear signals, closed feedback loops, + and disciplined allocation of maintenance, rollout, and innovation effort so data + products remain reliable, scalable, and ready to deliver real ROI.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/liorbarak/){:target="_blank"} diff --git a/_podcast/s05e03-metrics-and-kpis.md b/_podcast/ml-engineering-kpis-and-metrics-strategy.md similarity index 97% rename from _podcast/s05e03-metrics-and-kpis.md rename to _podcast/ml-engineering-kpis-and-metrics-strategy.md index ba675718..b3396021 100644 --- a/_podcast/s05e03-metrics-and-kpis.md +++ b/_podcast/ml-engineering-kpis-and-metrics-strategy.md @@ -1,12 +1,11 @@ --- -title: 'KPI Design & Metrics Strategy: Prioritize Impact, Avoid Vanity Metrics, and - Prove ROI' -short: 'Defining Success: Metrics and KPIs' -guests: -- adamsroka -image: images/podcast/s05e03-metrics-and-kpis.jpg +title: "KPI Design & Metrics Strategy: Prioritize Impact, Avoid Vanity Metrics, and Prove ROI" +short: "Defining Success: Metrics and KPIs" season: 5 episode: 3 +guests: +- adamsroka +image: images/podcast/ml-engineering-kpis-and-metrics-strategy.jpg ids: youtube: H4P2RfKvXGs anchor: Defining-Success-Metrics-and-KPIs---Adam-Sroka-e17gfp0 @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Defining-Success-Metrics-and-KPIs---Adam-Sroka-e17gfp0 spotify: https://open.spotify.com/episode/5kTD7LjoXos1fm2LPD7nJc apple: https://podcasts.apple.com/us/podcast/defining-success-metrics-and-kpis-adam-sroka/id1541710331?i=1000535667935 + +description: "Discover KPI design, metrics strategy & ROI proof - avoid vanity metrics, build dashboards, prioritize impact, and measure experiments to prove value" +intro: "How do you design KPIs that prioritize real impact, avoid vanity metrics, and actually prove ROI? In this episode, Dr. Adam Sroka — Head of Machine Learning Engineering at Origami Energy, with a background from a Physics PhD to data science, reinforcement learning, and consultancy — walks through a practical metrics strategy for data and product teams.

We cover why metrics matter (Drucker, merit functions), how to make metrics comparable (units), and concrete examples like weighted revenue for sales pipelines and burn-down/maintainability metrics for professional services. Adam explains top-down KPI alignment, avoiding vanity metrics and KPI gaming, and using derived/composite KPIs to capture margin trade-offs. You’ll hear a workshop case for grocery retail, guidance on KPI prioritization and review cadence, and tips for operationalizing metrics through dashboards, executive communication, and a North Star metric. We also dig into threshold, health & hygiene metrics, translating model performance into £/time saved, and robust experiment and model validation (A/B, randomization, backtesting, uplift).

If you’re responsible for KPI design, metrics strategy, or proving ROI from data work, this episode gives actionable frameworks to measure impact and reduce measurement risk" +topics: +- machine learning +- leadership +- data science +- product management +- strategy +- metrics +- communication +dateadded: 2021-09-19 + +duration: PT01H02M30S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=0 + endOffset: 90 +- name: Guest Introduction & Career Path + startOffset: 90 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=90 + endOffset: 142 +- name: From Physics PhD to Data Science and Reinforcement Learning + startOffset: 142 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=142 + endOffset: 392 +- name: 'Moving into Consultancy: BI, Dashboards, and Client Workshops' + startOffset: 392 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=392 + endOffset: 540 +- name: Laser Research, Ray-Tracing Tools, and Early RL Experiments + startOffset: 540 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=540 + endOffset: 726 +- name: 'Why Metrics Matter: Drucker, Measurement, and Merit Functions' + startOffset: 726 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=726 + endOffset: 911 +- name: Merit Functions & Project Prioritization (Impact vs Cost) + startOffset: 911 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=911 + endOffset: 1011 +- name: Units & Comparability in Metric Design + startOffset: 1011 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1011 + endOffset: 1042 +- name: 'Sales Pipeline Metrics: Weighted Revenue and Lead Qualification' + startOffset: 1042 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1042 + endOffset: 1246 +- name: 'Professional Services Metrics: Burn-Down Rate & Maintainability of Earnings' + startOffset: 1246 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1246 + endOffset: 1361 +- name: 'KPIs Defined: Top-Down Alignment and Executive Decision Metrics' + startOffset: 1361 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1361 + endOffset: 1567 +- name: 'Avoiding Vanity Metrics: Make the Important Measurable' + startOffset: 1567 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1567 + endOffset: 1684 +- name: KPI Gaming Risks & Designing Competing KPIs + startOffset: 1684 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1684 + endOffset: 1830 +- name: 'Derived KPIs: Composite Metrics to Capture Margin and Trade-offs' + startOffset: 1830 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1830 + endOffset: 1964 +- name: 'Workshop Process: Designing Metrics for Grocery Retail' + startOffset: 1964 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1964 + endOffset: 2239 +- name: KPI Prioritization, Review Cadence, and Iteration Best Practices + startOffset: 2239 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2239 + endOffset: 2467 +- name: 'Operationalizing KPIs: Dashboards, Visibility, and Executive Communication' + startOffset: 2467 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2467 + endOffset: 2699 +- name: 'North Star Metric: Single Guiding Indicator for Strategy' + startOffset: 2699 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2699 + endOffset: 2794 +- name: 'Threshold Metrics: Alerts, Limits, and Safety Conditions' + startOffset: 2794 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2794 + endOffset: 2928 +- name: 'Health & Hygiene Metrics: Downtime and Service Reliability' + startOffset: 2928 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2928 + endOffset: 3072 +- name: 'Data Team Metrics: Translate Model Performance into £ / Time Saved' + startOffset: 3072 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3072 + endOffset: 3342 +- name: 'Experimentation & Measurement: A/B Testing and Champion–Challenger' + startOffset: 3342 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3342 + endOffset: 3395 +- name: 'Model Validation Techniques: Randomization, Backtesting, and Uplift' + startOffset: 3395 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3395 + endOffset: 3602 +- name: 'Timeboxing Data Work: Two-Week Spikes and Accelerate Metrics' + startOffset: 3602 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3602 + endOffset: 3795 +- name: Episode Wrap-Up, Contact Info, and Further Reading + startOffset: 3795 + url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3795 + endOffset: 3750 + transcript: - header: Podcast Introduction - header: Guest Introduction & Career Path @@ -1039,127 +1155,6 @@ transcript: sec: 3840 time: '1:04:00' who: Alexey -description: Discover KPI design, metrics strategy & ROI proof - avoid vanity metrics, - build dashboards, prioritize impact, and measure experiments to prove value. -intro: How do you design KPIs that prioritize real impact, avoid vanity metrics, and - actually prove ROI? In this episode, Dr. Adam Sroka — Head of Machine Learning Engineering - at Origami Energy, with a background from a Physics PhD to data science, reinforcement - learning, and consultancy — walks through a practical metrics strategy for data - and product teams.

We cover why metrics matter (Drucker, merit functions), - how to make metrics comparable (units), and concrete examples like weighted revenue - for sales pipelines and burn-down/maintainability metrics for professional services. - Adam explains top-down KPI alignment, avoiding vanity metrics and KPI gaming, and - using derived/composite KPIs to capture margin trade-offs. You’ll hear a workshop - case for grocery retail, guidance on KPI prioritization and review cadence, and - tips for operationalizing metrics through dashboards, executive communication, and - a North Star metric. We also dig into threshold, health & hygiene metrics, translating - model performance into £/time saved, and robust experiment and model validation - (A/B, randomization, backtesting, uplift).

If you’re responsible for KPI - design, metrics strategy, or proving ROI from data work, this episode gives actionable - frameworks to measure impact and reduce measurement risk. -dateadded: '2021-09-19' -duration: PT01H02M30S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=0 - endOffset: 90 -- name: Guest Introduction & Career Path - startOffset: 90 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=90 - endOffset: 142 -- name: From Physics PhD to Data Science and Reinforcement Learning - startOffset: 142 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=142 - endOffset: 392 -- name: 'Moving into Consultancy: BI, Dashboards, and Client Workshops' - startOffset: 392 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=392 - endOffset: 540 -- name: Laser Research, Ray-Tracing Tools, and Early RL Experiments - startOffset: 540 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=540 - endOffset: 726 -- name: 'Why Metrics Matter: Drucker, Measurement, and Merit Functions' - startOffset: 726 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=726 - endOffset: 911 -- name: Merit Functions & Project Prioritization (Impact vs Cost) - startOffset: 911 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=911 - endOffset: 1011 -- name: Units & Comparability in Metric Design - startOffset: 1011 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1011 - endOffset: 1042 -- name: 'Sales Pipeline Metrics: Weighted Revenue and Lead Qualification' - startOffset: 1042 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1042 - endOffset: 1246 -- name: 'Professional Services Metrics: Burn-Down Rate & Maintainability of Earnings' - startOffset: 1246 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1246 - endOffset: 1361 -- name: 'KPIs Defined: Top-Down Alignment and Executive Decision Metrics' - startOffset: 1361 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1361 - endOffset: 1567 -- name: 'Avoiding Vanity Metrics: Make the Important Measurable' - startOffset: 1567 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1567 - endOffset: 1684 -- name: KPI Gaming Risks & Designing Competing KPIs - startOffset: 1684 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1684 - endOffset: 1830 -- name: 'Derived KPIs: Composite Metrics to Capture Margin and Trade-offs' - startOffset: 1830 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1830 - endOffset: 1964 -- name: 'Workshop Process: Designing Metrics for Grocery Retail' - startOffset: 1964 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=1964 - endOffset: 2239 -- name: KPI Prioritization, Review Cadence, and Iteration Best Practices - startOffset: 2239 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2239 - endOffset: 2467 -- name: 'Operationalizing KPIs: Dashboards, Visibility, and Executive Communication' - startOffset: 2467 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2467 - endOffset: 2699 -- name: 'North Star Metric: Single Guiding Indicator for Strategy' - startOffset: 2699 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2699 - endOffset: 2794 -- name: 'Threshold Metrics: Alerts, Limits, and Safety Conditions' - startOffset: 2794 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2794 - endOffset: 2928 -- name: 'Health & Hygiene Metrics: Downtime and Service Reliability' - startOffset: 2928 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=2928 - endOffset: 3072 -- name: 'Data Team Metrics: Translate Model Performance into £ / Time Saved' - startOffset: 3072 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3072 - endOffset: 3342 -- name: 'Experimentation & Measurement: A/B Testing and Champion–Challenger' - startOffset: 3342 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3342 - endOffset: 3395 -- name: 'Model Validation Techniques: Randomization, Backtesting, and Uplift' - startOffset: 3395 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3395 - endOffset: 3602 -- name: 'Timeboxing Data Work: Two-Week Spikes and Accelerate Metrics' - startOffset: 3602 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3602 - endOffset: 3795 -- name: Episode Wrap-Up, Contact Info, and Further Reading - startOffset: 3795 - url: https://www.youtube.com/watch?v=H4P2RfKvXGs&t=3795 - endOffset: 3750 --- diff --git a/_podcast/s06e07-product-management-for-machine-learning.md b/_podcast/ml-product-manager-and-mlops-platform-strategy.md similarity index 97% rename from _podcast/s06e07-product-management-for-machine-learning.md rename to _podcast/ml-product-manager-and-mlops-platform-strategy.md index 10ce5f48..2107a686 100644 --- a/_podcast/s06e07-product-management-for-machine-learning.md +++ b/_podcast/ml-product-manager-and-mlops-platform-strategy.md @@ -1,11 +1,11 @@ --- -title: 'Become an ML Product Manager: MLOps Platforms, Observability & Adoption' -short: Product Management for Machine Learning -guests: -- geojolly -image: images/podcast/s06e07-product-management-for-machine-learning.jpg +title: "Become an ML Product Manager: MLOps Platforms, Observability & Adoption" +short: "Product Management for Machine Learning" season: 6 episode: 7 +guests: +- geojolly +image: images/podcast/ml-product-manager-and-mlops-platform-strategy.jpg ids: youtube: PjqjPvHliqg anchor: Product-Management-for-Machine-Learning---Geo-Jolly-e1brpvm @@ -14,6 +14,141 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Product-Management-for-Machine-Learning---Geo-Jolly-e1brpvm spotify: https://open.spotify.com/episode/7zfH4hagZKwoIWmee0AXBd apple: https://podcasts.apple.com/us/podcast/product-management-for-machine-learning-geo-jolly/id1541710331?i=1000545301034 + +description: "Learn ML Product Manager tactics: MLOps platform strategy, observability KPIs & adoption playbooks to own roadmaps, governance, and stakeholder ROI." +intro: "How do you become an ML product manager and build MLOps platforms that teams actually use? In this episode, Geo Jolly, a Technical PM at Glovo with a background from web/dev to data science and product management, walks through the practical skills and decisions that define the role.

We cover MLOps platform strategy and vendor evaluation, treating internal platform users as customers, and the real productivity costs of poor tooling UX. Geo outlines product manager responsibilities—roadmaps, specs, backlog prioritization—and explains outcome-driven problem definition, avoiding solution bias, and running workshops and interviews to break down complex problems. You’ll hear about ML observability and KPIs to measure platform impact, release governance and rollout timing, adoption strategy for internal stakeholders, and engineering roles needed for platform delivery (CI/CD, K8s, syseng). Practical topics also include model validation and ML quality assurance, embedded data scientists as power users, Agile approaches for data science, and concrete transition paths from data scientist or Scrum Master into technical ML product roles.

Listen to gain actionable guidance on MLOps platforms, observability, adoption strategy, and the technical literacy required to succeed as an ML product manager" +topics: +- product management +- machine learning +- MLOps +- leadership +- career growth +dateadded: 2021-12-17 + +duration: PT01H02M46S + +quotableClips: +- name: 'Episode Introduction: Product Management for Machine Learning' + startOffset: 0 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=0 + endOffset: 73 +- name: 'Guest Overview: Geo and episode focus on AI Product Manager role' + startOffset: 73 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=73 + endOffset: 116 +- name: 'Career Journey: From web/dev to data science to product management' + startOffset: 116 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=116 + endOffset: 388 +- name: 'Glovo Role: Leading ML platform strategy and team responsibilities' + startOffset: 388 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=388 + endOffset: 521 +- name: In-house MLOps Platform Strategy & Vendor Evaluation + startOffset: 521 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=521 + endOffset: 590 +- name: 'Product Manager Responsibilities: Roadmap, specs, and stakeholder balance' + startOffset: 590 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=590 + endOffset: 684 +- name: 'Internal Platform Users as Customers: ROI and adoption considerations' + startOffset: 684 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=684 + endOffset: 824 +- name: 'Platform Usability Costs: Productivity losses from poor tooling UX' + startOffset: 824 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=824 + endOffset: 919 +- name: 'Backlog Prioritization: Grooming with engineering and Agile practices' + startOffset: 919 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=919 + endOffset: 1004 +- name: 'Outcome-Driven Problem Definition: Metrics over immediate solutions' + startOffset: 1004 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1004 + endOffset: 1105 +- name: 'ML Observability: KPIs and measuring platform impact' + startOffset: 1105 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1105 + endOffset: 1169 +- name: 'Avoiding Solution Bias: Techniques to resist jumping into solutions' + startOffset: 1169 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1169 + endOffset: 1266 +- name: 'Collaborative Problem Breakdown: Workshops, interviews, and user input' + startOffset: 1266 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1266 + endOffset: 1335 +- name: 'Core PM Skills: Communication, prioritization, and continuous learning' + startOffset: 1335 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1335 + endOffset: 1408 +- name: 'Technical Literacy: Model architectures, data infra, and cloud concepts' + startOffset: 1408 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1408 + endOffset: 1531 +- name: 'Technical Background Value: Why platform PMs need tooling familiarity' + startOffset: 1531 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1531 + endOffset: 1717 +- name: 'Role Differences: Data Science Lead / Staff vs Technical ML Product Manager' + startOffset: 1717 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1717 + endOffset: 1888 +- name: 'Release Governance & Rollout Strategy: Approvals, compliance, and timing' + startOffset: 1888 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1888 + endOffset: 2118 +- name: 'Adoption Strategy: "Time to stakeholders" and internal rollouts' + startOffset: 2118 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2118 + endOffset: 2268 +- name: 'Engineering Roles in ML Platforms: Backend, syseng, CI/CD, and K8s' + startOffset: 2268 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2268 + endOffset: 2414 +- name: 'Embedded Data Scientists: Power users, developer advocates, and demos' + startOffset: 2414 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2414 + endOffset: 2534 +- name: 'Agile for Data Science: Kanban, Scrum, and adapting to research work' + startOffset: 2534 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2534 + endOffset: 2696 +- name: 'Transition Path: Moving from Data Scientist to Technical Product Manager' + startOffset: 2696 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2696 + endOffset: 2983 +- name: 'Recommended Resources: Books and communities for PM transition' + startOffset: 2983 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2983 + endOffset: 3165 +- name: 'Non-Technical Transitions: Feasibility of moving into ML product roles' + startOffset: 3165 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3165 + endOffset: 3344 +- name: 'User Research for Internal Platforms: Surveys and Happiness Reports' + startOffset: 3344 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3344 + endOffset: 3440 +- name: 'ML Quality Assurance: Model validation, shadowing, and release checklists' + startOffset: 3440 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3440 + endOffset: 3592 +- name: 'Scrum Master to PM Advice: Leverage Agile skills and learn ML basics' + startOffset: 3592 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3592 + endOffset: 3711 +- name: 'Final Thoughts: PM demands, scope, and career realities' + startOffset: 3711 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3711 + endOffset: 3815 +- name: 'Contact & Hiring: Geo on LinkedIn and Glovo opportunities' + startOffset: 3815 + url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3815 + endOffset: 3766 + transcript: - header: 'Episode Introduction: Product Management for Machine Learning' - header: 'Guest Overview: Geo and episode focus on AI Product Manager role' @@ -885,7 +1020,7 @@ transcript: sec: 3150 time: '52:30' who: Alexey -- header: 'Non‑Technical Transitions: Feasibility of moving into ML product roles' +- header: 'Non-Technical Transitions: Feasibility of moving into ML product roles' - line: It's easier for them to become a traditional software engineering PMs, I would say. Because in this role, you go into writing specifications, understanding the requirements, etc.. If they have some understanding of machine learning philosophy, @@ -1095,147 +1230,6 @@ transcript: sec: 3839 time: '1:03:59' who: Geo -description: 'Learn ML Product Manager tactics: MLOps platform strategy, observability - KPIs & adoption playbooks to own roadmaps, governance, and stakeholder ROI.' -intro: How do you become an ML product manager and build MLOps platforms that teams - actually use? In this episode, Geo Jolly, a Technical PM at Glovo with a background - from web/dev to data science and product management, walks through the practical - skills and decisions that define the role.

We cover MLOps platform strategy - and vendor evaluation, treating internal platform users as customers, and the real - productivity costs of poor tooling UX. Geo outlines product manager responsibilities—roadmaps, - specs, backlog prioritization—and explains outcome-driven problem definition, avoiding - solution bias, and running workshops and interviews to break down complex problems. - You’ll hear about ML observability and KPIs to measure platform impact, release - governance and rollout timing, adoption strategy for internal stakeholders, and - engineering roles needed for platform delivery (CI/CD, K8s, syseng). Practical topics - also include model validation and ML quality assurance, embedded data scientists - as power users, Agile approaches for data science, and concrete transition paths - from data scientist or Scrum Master into technical ML product roles.

Listen - to gain actionable guidance on MLOps platforms, observability, adoption strategy, - and the technical literacy required to succeed as an ML product manager. -dateadded: '2021-12-17' -duration: PT01H02M46S -quotableClips: -- name: 'Episode Introduction: Product Management for Machine Learning' - startOffset: 0 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=0 - endOffset: 73 -- name: 'Guest Overview: Geo and episode focus on AI Product Manager role' - startOffset: 73 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=73 - endOffset: 116 -- name: 'Career Journey: From web/dev to data science to product management' - startOffset: 116 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=116 - endOffset: 388 -- name: 'Glovo Role: Leading ML platform strategy and team responsibilities' - startOffset: 388 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=388 - endOffset: 521 -- name: In-house MLOps Platform Strategy & Vendor Evaluation - startOffset: 521 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=521 - endOffset: 590 -- name: 'Product Manager Responsibilities: Roadmap, specs, and stakeholder balance' - startOffset: 590 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=590 - endOffset: 684 -- name: 'Internal Platform Users as Customers: ROI and adoption considerations' - startOffset: 684 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=684 - endOffset: 824 -- name: 'Platform Usability Costs: Productivity losses from poor tooling UX' - startOffset: 824 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=824 - endOffset: 919 -- name: 'Backlog Prioritization: Grooming with engineering and Agile practices' - startOffset: 919 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=919 - endOffset: 1004 -- name: 'Outcome-Driven Problem Definition: Metrics over immediate solutions' - startOffset: 1004 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1004 - endOffset: 1105 -- name: 'ML Observability: KPIs and measuring platform impact' - startOffset: 1105 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1105 - endOffset: 1169 -- name: 'Avoiding Solution Bias: Techniques to resist jumping into solutions' - startOffset: 1169 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1169 - endOffset: 1266 -- name: 'Collaborative Problem Breakdown: Workshops, interviews, and user input' - startOffset: 1266 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1266 - endOffset: 1335 -- name: 'Core PM Skills: Communication, prioritization, and continuous learning' - startOffset: 1335 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1335 - endOffset: 1408 -- name: 'Technical Literacy: Model architectures, data infra, and cloud concepts' - startOffset: 1408 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1408 - endOffset: 1531 -- name: 'Technical Background Value: Why platform PMs need tooling familiarity' - startOffset: 1531 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1531 - endOffset: 1717 -- name: 'Role Differences: Data Science Lead / Staff vs Technical ML Product Manager' - startOffset: 1717 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1717 - endOffset: 1888 -- name: 'Release Governance & Rollout Strategy: Approvals, compliance, and timing' - startOffset: 1888 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=1888 - endOffset: 2118 -- name: 'Adoption Strategy: "Time to stakeholders" and internal rollouts' - startOffset: 2118 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2118 - endOffset: 2268 -- name: 'Engineering Roles in ML Platforms: Backend, syseng, CI/CD, and K8s' - startOffset: 2268 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2268 - endOffset: 2414 -- name: 'Embedded Data Scientists: Power users, developer advocates, and demos' - startOffset: 2414 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2414 - endOffset: 2534 -- name: 'Agile for Data Science: Kanban, Scrum, and adapting to research work' - startOffset: 2534 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2534 - endOffset: 2696 -- name: 'Transition Path: Moving from Data Scientist to Technical Product Manager' - startOffset: 2696 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2696 - endOffset: 2983 -- name: 'Recommended Resources: Books and communities for PM transition' - startOffset: 2983 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=2983 - endOffset: 3165 -- name: 'Non‑Technical Transitions: Feasibility of moving into ML product roles' - startOffset: 3165 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3165 - endOffset: 3344 -- name: 'User Research for Internal Platforms: Surveys and Happiness Reports' - startOffset: 3344 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3344 - endOffset: 3440 -- name: 'ML Quality Assurance: Model validation, shadowing, and release checklists' - startOffset: 3440 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3440 - endOffset: 3592 -- name: 'Scrum Master to PM Advice: Leverage Agile skills and learn ML basics' - startOffset: 3592 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3592 - endOffset: 3711 -- name: 'Final Thoughts: PM demands, scope, and career realities' - startOffset: 3711 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3711 - endOffset: 3815 -- name: 'Contact & Hiring: Geo on LinkedIn and Glovo opportunities' - startOffset: 3815 - url: https://www.youtube.com/watch?v=PjqjPvHliqg&t=3815 - endOffset: 3766 --- Links: diff --git a/_podcast/s15e01-why-machine-learning-design-broken.md b/_podcast/ml-system-design.md similarity index 96% rename from _podcast/s15e01-why-machine-learning-design-broken.md rename to _podcast/ml-system-design.md index 72d276fd..197624a1 100644 --- a/_podcast/s15e01-why-machine-learning-design-broken.md +++ b/_podcast/ml-system-design.md @@ -1,20 +1,107 @@ --- +title: "ML System Design Playbook: Fail-Fast Design Docs, Modular Architecture & Data Drift Monitoring" +short: "Why Machine Learning Design is Broken" +season: 15 episode: 1 guests: - valeriybabushkin +image: images/podcast/ml-system-design.jpg ids: - anchor: atatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o + anchor: datatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o youtube: 6YBMU6475KQ -image: images/podcast/s15e01-why-machine-learning-design-broken.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Why-Machine-Learning-Design-is-Broken---Valerii-Babushkin-e26rv8o apple: https://podcasts.apple.com/us/podcast/why-machine-learning-design-is-broken-valerii-babushkin/id1541710331?i=1000621176183 spotify: https://open.spotify.com/episode/3KfKptkWIa1hW1hSOvBQaO youtube: https://www.youtube.com/watch?v=6YBMU6475KQ -season: 15 -short: Why Machine Learning Design is Broken -title: 'ML System Design Playbook: Fail-Fast Design Docs, Modular Architecture & Data - Drift Monitoring' + +description: "Master ML system design: fail-fast design docs, modular architecture & data drift monitoring to cut risk, assign ownership, speed experiments." +intro: "How do you design ML systems that fail fast, scale with modular architecture, and survive data drift in production? In this episode, Valerii Babushkin — Senior Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and author of Machine Learning System Design — walks through a practical playbook for ML system design.

We cover why fail-fast design docs act like blueprints to prevent wasted work, how shared and chapter-based design docs enable alignment and versioning, and the maintenance challenges of treating design docs as living artifacts. Valerii explains assigning ownership and mapping the bus factor for risk assessment, incentivizing documentation, and using a 16-chapter ML design template to standardize architecture. On the operational side we dig into monitoring strategies for data drift, concept drift, and prediction drift, plus fallback strategies — redundancy, simple baselines, and serving reliability — to keep models robust. He also points to tools and resources including Evidently AI, templates, and the book.

Listen to gain concrete tactics for fail-fast design docs, modular architecture, data drift monitoring, and baseline solutions you can apply to reduce risk and accelerate ML delivery" +topics: +- machine learning +- system design +dateadded: 2023-07-16 + +duration: PT00H59M38S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=0 + endOffset: 126 +- name: 'Guest Introduction: Valerii Babushkin background' + startOffset: 126 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=126 + endOffset: 240 +- name: 'Book Announcement: Machine Learning System Design' + startOffset: 240 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=240 + endOffset: 426 +- name: 'Design Document Purpose: Fail-Fast Principle' + startOffset: 426 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=426 + endOffset: 519 +- name: 'Blueprint Analogy: Preventing Waste with Early Design' + startOffset: 519 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=519 + endOffset: 669 +- name: Prevalence of Missing Documentation in ML Projects + startOffset: 669 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=669 + endOffset: 876 +- name: 'Shared Design Docs: Alignment, Feedback, and Simplicity' + startOffset: 876 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=876 + endOffset: 1141 +- name: 'Design Doc as a Living Artifact: Maintenance Challenges' + startOffset: 1141 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1141 + endOffset: 1477 +- name: 'Accountability & Ownership: Assigning Areas of Responsibility' + startOffset: 1477 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1477 + endOffset: 1919 +- name: 'Bus Factor & Risk Assessment: People Dependency Mapping' + startOffset: 1919 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1919 + endOffset: 2210 +- name: 'Modularity: Chapter-Based Design Docs and Versioning Signals' + startOffset: 2210 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2210 + endOffset: 2461 +- name: 'Incentivizing Documentation: Metrics and Performance Reviews' + startOffset: 2461 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2461 + endOffset: 2633 +- name: 'ML Design Template Overview: 16-Chapter Book Outline' + startOffset: 2633 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2633 + endOffset: 2866 +- name: 'Monitoring: Detecting Data Drift, Concept Drift, and Prediction Drift' + startOffset: 2866 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2866 + endOffset: 3119 +- name: 'Fallback Strategies: Redundancy, Simple Baselines, and Serving Reliability' + startOffset: 3119 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3119 + endOffset: 3313 +- name: 'Baseline Solutions: Start Simple to Validate Hypotheses Quickly' + startOffset: 3313 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3313 + endOffset: 3495 +- name: 'Resources & Tools: Book Discount, Evidently AI, and Templates' + startOffset: 3495 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3495 + endOffset: 3644 +- name: 'Contact: Connect with Valerii on LinkedIn' + startOffset: 3644 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3644 + endOffset: 3704 +- name: Episode Close and Final Takeaways + startOffset: 3704 + url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3704 + endOffset: 3578 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Valerii Babushkin background' @@ -936,103 +1023,6 @@ transcript: sec: 3704 time: '1:01:44' who: Alexey -description: 'Master ML system design: fail-fast design docs, modular architecture - & data drift monitoring to cut risk, assign ownership, speed experiments.' -intro: How do you design ML systems that fail fast, scale with modular architecture, - and survive data drift in production? In this episode, Valerii Babushkin — Senior - Director of Data, Analytics, and AI at BP, Kaggle Competitions Grandmaster, and - author of Machine Learning System Design — walks through a practical playbook for - ML system design.

We cover why fail-fast design docs act like blueprints - to prevent wasted work, how shared and chapter-based design docs enable alignment - and versioning, and the maintenance challenges of treating design docs as living - artifacts. Valerii explains assigning ownership and mapping the bus factor for risk - assessment, incentivizing documentation, and using a 16-chapter ML design template - to standardize architecture. On the operational side we dig into monitoring strategies - for data drift, concept drift, and prediction drift, plus fallback strategies — - redundancy, simple baselines, and serving reliability — to keep models robust. He - also points to tools and resources including Evidently AI, templates, and the book. -

Listen to gain concrete tactics for fail-fast design docs, modular architecture, - data drift monitoring, and baseline solutions you can apply to reduce risk and accelerate - ML delivery. -dateadded: '2023-07-16' -duration: PT00H59M38S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=0 - endOffset: 126 -- name: 'Guest Introduction: Valerii Babushkin background' - startOffset: 126 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=126 - endOffset: 240 -- name: 'Book Announcement: Machine Learning System Design' - startOffset: 240 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=240 - endOffset: 426 -- name: 'Design Document Purpose: Fail-Fast Principle' - startOffset: 426 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=426 - endOffset: 519 -- name: 'Blueprint Analogy: Preventing Waste with Early Design' - startOffset: 519 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=519 - endOffset: 669 -- name: Prevalence of Missing Documentation in ML Projects - startOffset: 669 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=669 - endOffset: 876 -- name: 'Shared Design Docs: Alignment, Feedback, and Simplicity' - startOffset: 876 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=876 - endOffset: 1141 -- name: 'Design Doc as a Living Artifact: Maintenance Challenges' - startOffset: 1141 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1141 - endOffset: 1477 -- name: 'Accountability & Ownership: Assigning Areas of Responsibility' - startOffset: 1477 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1477 - endOffset: 1919 -- name: 'Bus Factor & Risk Assessment: People Dependency Mapping' - startOffset: 1919 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=1919 - endOffset: 2210 -- name: 'Modularity: Chapter-Based Design Docs and Versioning Signals' - startOffset: 2210 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2210 - endOffset: 2461 -- name: 'Incentivizing Documentation: Metrics and Performance Reviews' - startOffset: 2461 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2461 - endOffset: 2633 -- name: 'ML Design Template Overview: 16-Chapter Book Outline' - startOffset: 2633 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2633 - endOffset: 2866 -- name: 'Monitoring: Detecting Data Drift, Concept Drift, and Prediction Drift' - startOffset: 2866 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=2866 - endOffset: 3119 -- name: 'Fallback Strategies: Redundancy, Simple Baselines, and Serving Reliability' - startOffset: 3119 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3119 - endOffset: 3313 -- name: 'Baseline Solutions: Start Simple to Validate Hypotheses Quickly' - startOffset: 3313 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3313 - endOffset: 3495 -- name: 'Resources & Tools: Book Discount, Evidently AI, and Templates' - startOffset: 3495 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3495 - endOffset: 3644 -- name: 'Contact: Connect with Valerii on LinkedIn' - startOffset: 3644 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3644 - endOffset: 3704 -- name: Episode Close and Final Takeaways - startOffset: 3704 - url: https://www.youtube.com/watch?v=6YBMU6475KQ&t=3704 - endOffset: 3578 --- Links: diff --git a/_podcast/s17e05-machine-learning-engineering-in-finance.md b/_podcast/mlops-and-ml-engineering-in-finance.md similarity index 95% rename from _podcast/s17e05-machine-learning-engineering-in-finance.md rename to _podcast/mlops-and-ml-engineering-in-finance.md index 5e36d977..9c88de08 100644 --- a/_podcast/s17e05-machine-learning-engineering-in-finance.md +++ b/_podcast/mlops-and-ml-engineering-in-finance.md @@ -1,19 +1,127 @@ --- +title: "MLOps in Finance: Regulated Deployment, CI/CD and Model Governance" +short: "Machine Learning Engineering in Finance" +season: 17 episode: 5 guests: - nemanjaradojkovic +image: images/podcast/mlops-and-ml-engineering-in-finance.jpg ids: - anchor: atatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 + anchor: datatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 youtube: Nl4aibeFwiI -image: images/podcast/s17e05-machine-learning-engineering-in-finance.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Machine-Learning-Engineering-in-Finance---Nemanja-Radojkovic-e2evai8 apple: https://podcasts.apple.com/us/podcast/machine-learning-engineering-in-finance-nemanja-radojkovic/id1541710331?i=1000643322929 spotify: https://open.spotify.com/episode/3yQtA8EAndau1yhCFPfwtj?si=ZutO4mLlRfOz_Zgw4GujiQ youtube: https://www.youtube.com/watch?v=Nl4aibeFwiI -season: 17 -short: Machine Learning Engineering in Finance -title: 'Practical MLOps for Finance: CI/CD, On-Prem Deployment & Minimal Viable ML' +description: "Learn MLOps for finance: model governance, compliant deployments, monitoring, and MVP ML Ops tactics to build production-ready, auditable models." +topics: +- MLOps +- machine learning +- data engineering +- production +- career transition +intro: "How do you deploy machine learning in heavily regulated finance environments while keeping CI/CD pipelines, model governance, and operational risk under control? In this episode Nemanja Radojkovic—an electrical engineer turned data scientist and MLOps practitioner who moved from Belgrade to Leuven—walks through real-world constraints and pragmatic solutions for MLOps in finance.

Drawing on his PhD background, consulting experience, and teaching, Nemanja covers finance use cases such as compliance, AML, fraud detection, and document/email automation, then drills into ML engineering responsibilities: deployment choices, CI/CD, release management, and building trust with governance and approvals. We examine legacy and regulatory constraints, on-premises platforms (Hadoop, OpenShift), and low-cost MLOps strategies: minimal viable setups (dev/test/prod, monitoring, model registry, data versioning, reproducible pipelines) and tactical workarounds like S3-based registries.

Listeners will gain actionable guidance on adapting ML workflows to corporate DevOps, prioritizing MLOps on a shoestring, standardizing deployment patterns and platform reuse (FastAPI, internal libraries), and the core skills needed for ML engineering and production readiness. Ideal for ML engineers and data teams tackling regulated deployment, CI/CD, and model governance in finance." +dateadded: 2024-01-29 +duration: PT00H58M04S +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=0 + endOffset: 95 +- name: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' + startOffset: 95 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=95 + endOffset: 172 +- name: 'Guest Background: Electrical engineering, PhD experience, and early career + moves' + startOffset: 172 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=172 + endOffset: 498 +- name: 'Early Data Roles: PhD, Deloitte, and first paid Python work' + startOffset: 498 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=498 + endOffset: 635 +- name: 'Finance Use Cases: Compliance, AML, fraud, and smart automation (document + & email processing)' + startOffset: 635 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=635 + endOffset: 897 +- name: 'Role Overview: ML engineering / ML Ops responsibilities in finance (CI/CD, + deployment choices)' + startOffset: 897 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=897 + endOffset: 1132 +- name: 'Regulatory & Legacy Constraints: Slow change, legacy systems, and governance + impact' + startOffset: 1132 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1132 + endOffset: 1345 +- name: 'DevOps Governance: Release management, approvals, and building trust' + startOffset: 1345 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1345 + endOffset: 1419 +- name: 'Integrating ML with DevOps: Adapting ML workflows to existing corporate processes' + startOffset: 1419 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1419 + endOffset: 1671 +- name: 'On-Premises Infrastructure: Hadoop, OpenShift, hardware requests, and platform + teams' + startOffset: 1671 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1671 + endOffset: 1862 +- name: 'ML Ops on a Shoestring: Prioritization and minimal viable ML Ops strategy' + startOffset: 1862 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1862 + endOffset: 1917 +- name: 'Minimal ML Ops Components: Dev/test/prod environments, monitoring, model + registry, data versioning, reproducible pipelines' + startOffset: 1917 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1917 + endOffset: 2157 +- name: 'Tactical Solutions: Using S3 and simple approaches as interim model registry/data + versioning' + startOffset: 2157 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2157 + endOffset: 2328 +- name: 'Project Approach: Prototyping, Agile limits for ML, and iterative groundwork' + startOffset: 2328 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2328 + endOffset: 2474 +- name: 'Team Structure: Multiple data scientists per ML engineer and standardized + deployment patterns' + startOffset: 2474 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2474 + endOffset: 2619 +- name: 'Platform & Reuse: Internal libraries, FastAPI framework, and maintaining + production apps' + startOffset: 2619 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2619 + endOffset: 2704 +- name: 'Skills for ML Engineers: Python, Linux, networking, cloud basics, and stakeholder + evangelism' + startOffset: 2704 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2704 + endOffset: 2935 +- name: 'Career Transition Challenges: Moving from electrical engineering and sales + into ML — probabilistic thinking' + startOffset: 2935 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2935 + endOffset: 3171 +- name: 'Beginner Tech Stack: Python, SQL, Pandas/Polars, cloud basics, and job-market + driven learning' + startOffset: 3171 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3171 + endOffset: 3379 +- name: 'Learn by Building: End-to-end projects, web apps, and scraping job postings + to discover in-demand skills' + startOffset: 3379 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3379 + endOffset: 3544 +- name: Closing Remarks and Links to Talk/Resources + startOffset: 3544 + url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3544 + endOffset: 3484 transcript: - header: Episode Introduction - header: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' @@ -1167,125 +1275,20 @@ transcript: sec: 3579 time: '59:39' who: Nemanja -description: 'Learn MLOps for finance: CI/CD & on-prem deployment with minimal viable - ML - build reproducible pipelines, model registry and monitoring to ensure compliance' -intro: 'How do you deliver machine learning in highly regulated, legacy finance environments - where CI/CD, on‑prem deployment, and governance constrain every decision? In this - episode Nemanja Radojkovic — an electrical engineer turned data scientist and MLOps - practitioner who now teaches Data Science and contributes courses to DataCamp — - walks through pragmatic MLOps for finance.

We cover concrete finance use - cases (AML, fraud, compliance, automated document and email processing) and the - ML engineering responsibilities that matter most: CI/CD, deployment choices, and - integrating ML workflows with existing DevOps and release governance. Nemanja explains - working with on‑prem platforms like Hadoop and OpenShift, how to prioritize a minimal - viable ML Ops stack on a shoestring (dev/test/prod environments, monitoring, model - registry, data versioning, reproducible pipelines), and tactical interim solutions - such as using S3 for registry/versioning.

Listeners will get actionable - guidance on prototyping under regulatory constraints, team structures and reusable - platform patterns, and the practical skills and beginner tech stack (Python, SQL, - Pandas/Polars, cloud basics) to move models from experiment to production in finance.' -dateadded: '2024-01-29' -duration: PT00H58M04S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=0 - endOffset: 95 -- name: 'Guest Introduction: Nemanja’s journey from Belgrade to ML Ops in Europe' - startOffset: 95 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=95 - endOffset: 172 -- name: 'Guest Background: Electrical engineering, PhD experience, and early career - moves' - startOffset: 172 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=172 - endOffset: 498 -- name: 'Early Data Roles: PhD, Deloitte, and first paid Python work' - startOffset: 498 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=498 - endOffset: 635 -- name: 'Finance Use Cases: Compliance, AML, fraud, and smart automation (document - & email processing)' - startOffset: 635 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=635 - endOffset: 897 -- name: 'Role Overview: ML engineering / ML Ops responsibilities in finance (CI/CD, - deployment choices)' - startOffset: 897 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=897 - endOffset: 1132 -- name: 'Regulatory & Legacy Constraints: Slow change, legacy systems, and governance - impact' - startOffset: 1132 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1132 - endOffset: 1345 -- name: 'DevOps Governance: Release management, approvals, and building trust' - startOffset: 1345 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1345 - endOffset: 1419 -- name: 'Integrating ML with DevOps: Adapting ML workflows to existing corporate processes' - startOffset: 1419 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1419 - endOffset: 1671 -- name: 'On-Premises Infrastructure: Hadoop, OpenShift, hardware requests, and platform - teams' - startOffset: 1671 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1671 - endOffset: 1862 -- name: 'ML Ops on a Shoestring: Prioritization and minimal viable ML Ops strategy' - startOffset: 1862 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1862 - endOffset: 1917 -- name: 'Minimal ML Ops Components: Dev/test/prod environments, monitoring, model - registry, data versioning, reproducible pipelines' - startOffset: 1917 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=1917 - endOffset: 2157 -- name: 'Tactical Solutions: Using S3 and simple approaches as interim model registry/data - versioning' - startOffset: 2157 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2157 - endOffset: 2328 -- name: 'Project Approach: Prototyping, Agile limits for ML, and iterative groundwork' - startOffset: 2328 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2328 - endOffset: 2474 -- name: 'Team Structure: Multiple data scientists per ML engineer and standardized - deployment patterns' - startOffset: 2474 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2474 - endOffset: 2619 -- name: 'Platform & Reuse: Internal libraries, FastAPI framework, and maintaining - production apps' - startOffset: 2619 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2619 - endOffset: 2704 -- name: 'Skills for ML Engineers: Python, Linux, networking, cloud basics, and stakeholder - evangelism' - startOffset: 2704 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2704 - endOffset: 2935 -- name: 'Career Transition Challenges: Moving from electrical engineering and sales - into ML — probabilistic thinking' - startOffset: 2935 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=2935 - endOffset: 3171 -- name: 'Beginner Tech Stack: Python, SQL, Pandas/Polars, cloud basics, and job-market - driven learning' - startOffset: 3171 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3171 - endOffset: 3379 -- name: 'Learn by Building: End-to-end projects, web apps, and scraping job postings - to discover in-demand skills' - startOffset: 3379 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3379 - endOffset: 3544 -- name: Closing Remarks and Links to Talk/Resources - startOffset: 3544 - url: https://www.youtube.com/watch?v=Nl4aibeFwiI&t=3544 - endOffset: 3484 ---- +context: 'Context: Nemanja’s story and the episode’s segments trace practical ML work + in regulated finance—moving from research to ML engineering in legacy, governance-heavy + environments—covering real constraints (on-prem infra, approvals), concrete ML Ops + responsibilities (CI/CD, deployment, monitoring, model/data versioning), tactical + shortcuts, team and platform patterns, and the skills and career moves that enable + this work. + Core: The unifying idea is that bringing ML into production in conservative, regulated + organizations succeeds not through ideal tools or big rewrites but through a pragmatic, + engineering-first, incremental approach—building minimal viable ML Ops (reproducible + pipelines, environments, monitoring, simple registries), integrating with existing + DevOps/governance, reusing platform patterns, and focusing on practical skills and + iterative delivery to earn trust and scale ML responsibly.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/radojkovic/){:target="_blank"} diff --git a/_podcast/s19e04-mlops-as-team.md b/_podcast/mlops-at-scale-reproducibility-adoption.md similarity index 93% rename from _podcast/s19e04-mlops-as-team.md rename to _podcast/mlops-at-scale-reproducibility-adoption.md index 7fab47b4..683bfa50 100644 --- a/_podcast/s19e04-mlops-as-team.md +++ b/_podcast/mlops-at-scale-reproducibility-adoption.md @@ -1,38 +1,130 @@ --- +title: "MLOps at Scale: CI/CD, Reproducibility, Model Monitoring & Adoption Strategies" +short: "MLOps as a Team" +season: 19 episode: 4 guests: - raphaelhoogvliets -description: 'Master MLOps: CI/CD, reproducibility, and delivery strategies to accelerate - ML delivery, boost reliability, improve team efficiency, and measure business impact.' -intro: How do you keep machine learning models deployed, monitored, and maintained - in production? In this episode, Raphaël Hoogvliets from Eneco — whose career journey - spans agriculture to data science and MLOps — tackles that core MLOps challenge. - We trace practical design choices and long‑term trade‑offs between speed and robustness, - and why team coordination, evangelists, tech translators and technical leads matter - when scaling ML.

Key topics include centralized MLOps as an enabling platform - team, support models for product teams and ML engineers, adoption strategies centered - on iteration and developer experience, and tactics for building trust through quick - wins and pain‑point collection. We also cover measurable KPIs like deployment frequency - and impact tracking, core practices such as CI/CD, repo structure, parameterization, - testing, and reproducibility with data versioning and experiment capture. The episode - reviews essential tools—experiment tracking, model registry, serving and monitoring—plus - dependency and container strategies (Docker, Kubernetes, Databricks) and signals - for when to introduce governance.

If you’re responsible for operationalizing - machine learning, this discussion offers concrete guidance on prioritizing CI/CD, - staffing the right skill mix, and choosing tools and processes to keep models reliable - in production. +image: images/podcast/mlops-at-scale-reproducibility-adoption.jpg ids: - anchor: atalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr + anchor: datatalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr youtube: rMq63r3zi4c -image: images/podcast/s19e04-mlops-as-team.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/MLOps-as-a-Team---Raphal-Hoogvliets-e2qnnu5/a-abkcdlr apple: https://podcasts.apple.com/us/podcast/mlops-as-a-team-rapha%C3%ABl-hoogvliets/id1541710331?i=1000676238840 spotify: https://open.spotify.com/episode/0Dl372MFGvN0zDa1YQx7oe?si=eCy-a4fkRtOaEe21-KDHXQ youtube: https://youtube.com/watch?v=rMq63r3zi4c -season: 19 -short: MLOps as a Team -title: 'MLOps at Scale: CI/CD, Reproducibility, Model Monitoring & Team Adoption Strategies' +description: "Learn MLOps CI/CD and model monitoring to scale reliable deployments, accelerate delivery, ensure reproducibility, and drive model adoption in production." +topics: +- MLOps +- data science +- machine learning +- tools +- data governance +intro: "How do you run MLOps at scale so models stay deployed, reproducible, and actually adopted? In this episode Raphaël Hoogvliets—who leads a 12-engineer team at Eneco and brings a career arc from agriculture into data science and MLOps—walks through practical approaches for CI/CD for ML, reproducibility, model monitoring, and adoption strategy.

We cover the core trade-offs between speed and robustness, design choices for long-term maintainability, and the team coordination needed to scale ML: evangelists, tech translators, and technical leads. Raphaël explains why a centralized MLOps platform team often works as an enabling layer, how MLOps should support product teams, and how to drive adoption through iteration, feedback loops, and developer experience. You’ll hear concrete practices—CI, repo structure, parameterization, testing—plus reproducibility tactics like data versioning, traceability, and experiment capture. We also discuss KPIs (deployment frequency and impact tracking), skill mix, dependency management, container strategies, and real success and failure stories.

Listen to learn actionable priorities for getting started (start with CI/CD and solve tangible pain points), and how to measure and sustain model value through monitoring and operational processes." +dateadded: 2024-11-16 +duration: PT01H04M07S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=0 + endOffset: 81 +- name: 'Guest Overview: Raphaël Hoogvliets and Eneco role' + startOffset: 81 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=81 + endOffset: 154 +- name: 'Career Path: From agriculture to data science and MLOps' + startOffset: 154 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=154 + endOffset: 521 +- name: Agriculture technology, scale, and sustainability trade-offs + startOffset: 521 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=521 + endOffset: 636 +- name: Design Choices and Long-Term Tradeoffs in ML projects + startOffset: 636 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=636 + endOffset: 817 +- name: 'Speed vs. Robustness: trade-offs in MLOps delivery' + startOffset: 817 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=817 + endOffset: 845 +- name: 'Team Coordination: why collaboration matters for ML at scale' + startOffset: 845 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=845 + endOffset: 1018 +- name: 'Key Team Roles: evangelists, tech translators, and technical leads' + startOffset: 1018 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1018 + endOffset: 1381 +- name: Centralized MLOps as an enabling platform team + startOffset: 1381 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1381 + endOffset: 1520 +- name: 'Support Model: how MLOps assists product teams and ML engineers' + startOffset: 1520 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1520 + endOffset: 1676 +- name: 'Adoption Strategy: iteration, feedback loops, and developer experience' + startOffset: 1676 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1676 + endOffset: 1966 +- name: 'Building Trust: collecting pain points and delivering quick wins' + startOffset: 1966 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=1966 + endOffset: 2215 +- name: 'Measuring Value: KPIs, deployment frequency, and impact tracking' + startOffset: 2215 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2215 + endOffset: 2346 +- name: 'Core Practices: CI, repo structure, parameterization, and testing' + startOffset: 2346 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2346 + endOffset: 2551 +- name: 'Reproducibility: data versioning, traceability, and experiment capture' + startOffset: 2551 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2551 + endOffset: 2662 +- name: 'Maturity Signals: when to introduce data versioning and governance' + startOffset: 2662 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2662 + endOffset: 2710 +- name: 'Skill Mix: combining data science, SRE/devops, and platform engineering' + startOffset: 2710 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2710 + endOffset: 2921 +- name: 'Getting Started: prioritize CI/CD and solve tangible pain points' + startOffset: 2921 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=2921 + endOffset: 3081 +- name: 'MLOps Toolset: experiment tracking, model registry, serving, and monitoring' + startOffset: 3081 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3081 + endOffset: 3188 +- name: 'Dependency Management: package registries for reproducible deployments' + startOffset: 3188 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3188 + endOffset: 3410 +- name: 'Container Strategy: Docker, Kubernetes, Databricks trade-offs' + startOffset: 3410 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3410 + endOffset: 3476 +- name: 'Success & Failure Stories: deployment wins and integration freezes' + startOffset: 3476 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3476 + endOffset: 3654 +- name: 'MLOps Defined: operationalizing machine learning in business' + startOffset: 3654 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3654 + endOffset: 3718 +- name: 'Core Challenge: keeping models deployed, monitored, and maintained' + startOffset: 3718 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3718 + endOffset: 3762 +- name: Closing Remarks and next steps + startOffset: 3762 + url: https://youtube.com/watch?v=rMq63r3zi4c&t=3762 + endOffset: 3847 transcript: - header: Podcast Introduction - line: Hi, everyone! Welcome to our event. This is brought to you by DataTalks.Club, @@ -165,7 +257,7 @@ transcript: sec: 553 time: '9:13' who: Raphaël -- header: Design Choices and Long‑Term Tradeoffs in ML projects +- header: Design Choices and Long-Term Tradeoffs in ML projects - line: 'Back to MLOps — your LinkedIn profile has an interesting tagline: “Creating the future’s technical debt today.” What does that mean?' sec: 636 @@ -832,107 +924,15 @@ transcript: sec: 3847 time: '1:04:07' who: Raphaël -dateadded: '2024-11-16' -duration: PT01H04M07S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=0 - endOffset: 81 -- name: 'Guest Overview: Raphaël Hoogvliets and Eneco role' - startOffset: 81 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=81 - endOffset: 154 -- name: 'Career Path: From agriculture to data science and MLOps' - startOffset: 154 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=154 - endOffset: 521 -- name: Agriculture technology, scale, and sustainability trade-offs - startOffset: 521 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=521 - endOffset: 636 -- name: Design Choices and Long‑Term Tradeoffs in ML projects - startOffset: 636 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=636 - endOffset: 817 -- name: 'Speed vs. Robustness: trade-offs in MLOps delivery' - startOffset: 817 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=817 - endOffset: 845 -- name: 'Team Coordination: why collaboration matters for ML at scale' - startOffset: 845 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=845 - endOffset: 1018 -- name: 'Key Team Roles: evangelists, tech translators, and technical leads' - startOffset: 1018 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1018 - endOffset: 1381 -- name: Centralized MLOps as an enabling platform team - startOffset: 1381 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1381 - endOffset: 1520 -- name: 'Support Model: how MLOps assists product teams and ML engineers' - startOffset: 1520 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1520 - endOffset: 1676 -- name: 'Adoption Strategy: iteration, feedback loops, and developer experience' - startOffset: 1676 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1676 - endOffset: 1966 -- name: 'Building Trust: collecting pain points and delivering quick wins' - startOffset: 1966 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=1966 - endOffset: 2215 -- name: 'Measuring Value: KPIs, deployment frequency, and impact tracking' - startOffset: 2215 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2215 - endOffset: 2346 -- name: 'Core Practices: CI, repo structure, parameterization, and testing' - startOffset: 2346 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2346 - endOffset: 2551 -- name: 'Reproducibility: data versioning, traceability, and experiment capture' - startOffset: 2551 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2551 - endOffset: 2662 -- name: 'Maturity Signals: when to introduce data versioning and governance' - startOffset: 2662 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2662 - endOffset: 2710 -- name: 'Skill Mix: combining data science, SRE/devops, and platform engineering' - startOffset: 2710 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2710 - endOffset: 2921 -- name: 'Getting Started: prioritize CI/CD and solve tangible pain points' - startOffset: 2921 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=2921 - endOffset: 3081 -- name: 'MLOps Toolset: experiment tracking, model registry, serving, and monitoring' - startOffset: 3081 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3081 - endOffset: 3188 -- name: 'Dependency Management: package registries for reproducible deployments' - startOffset: 3188 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3188 - endOffset: 3410 -- name: 'Container Strategy: Docker, Kubernetes, Databricks trade-offs' - startOffset: 3410 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3410 - endOffset: 3476 -- name: 'Success & Failure Stories: deployment wins and integration freezes' - startOffset: 3476 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3476 - endOffset: 3654 -- name: 'MLOps Defined: operationalizing machine learning in business' - startOffset: 3654 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3654 - endOffset: 3718 -- name: 'Core Challenge: keeping models deployed, monitored, and maintained' - startOffset: 3718 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3718 - endOffset: 3762 -- name: Closing Remarks and next steps - startOffset: 3762 - url: https://youtube.com/watch?v=rMq63r3zi4c&t=3762 - endOffset: 3847 +context: 'Context: Raphaël Hoogvliets (Eneco) walks through his journey from agriculture + to data science and MLOps, illustrating real-world tradeoffs in design, team structure, + tooling, and delivery while sharing concrete practices, stories, and metrics for + operationalizing ML. + + Core narrative: MLOps is fundamentally about operationalizing machine learning as + sustainable product engineering—building an enabling, platform-led way of working + that brings cross-functional teams, pragmatic engineering practices (CI/CD, reproducibility, + testing, dependency management), and iterative adoption together so organizations + can balance speed versus robustness, build trust with quick wins and measured KPIs, + and keep models reliably deployed and delivering business impact.' --- diff --git a/_podcast/s02e12-communities.md b/_podcast/mlops-community-building-and-meetups.md similarity index 97% rename from _podcast/s02e12-communities.md rename to _podcast/mlops-community-building-and-meetups.md index 7fc4194e..3b196360 100644 --- a/_podcast/s02e12-communities.md +++ b/_podcast/mlops-community-building-and-meetups.md @@ -1,11 +1,11 @@ --- -title: 'MLOps Community Playbook: Launch, Grow & Retain Meetups, Members, and Contributors' -short: Building Online Tech Communities -guests: -- demetriosbrinkmann -image: images/podcast/s02e12-communities.jpg +title: "MLOps Community Playbook: Launch, Grow & Retain Meetups, Members, and Contributors" +short: "Building Online Tech Communities" season: 2 episode: 12 +guests: +- demetriosbrinkmann +image: images/podcast/mlops-community-building-and-meetups.jpg ids: youtube: ByCE1vSrIr8 anchor: Building-Online-Tech-Communities---Demetrios-Brinkmann-eu35fo @@ -14,6 +14,118 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Building-Online-Tech-Communities---Demetrios-Brinkmann-eu35fo spotify: https://open.spotify.com/episode/58Xe9PCfdz26CVuYKtZWUE apple: https://podcasts.apple.com/us/podcast/building-online-tech-communities-demetrios-brinkmann/id1541710331?i=1000515510103 + +description: "Master MLOps meetups: launch communities, recruit contributors, and boost member retention with LinkedIn outreach, content strategy, and practical checklists." +intro: "How do you launch, grow, and retain an MLOps community that moves from meetups to a sustainable, contributor-led ecosystem? In this episode, Demetrios Brinkmann — who has led the MLOps community since April 2020 and now runs the largest active group with 2,500+ Slack members and 25k YouTube views — walks through a practical community playbook for MLOps meetups, members, and contributors.

We trace his origin story and pivot to meetups and podcasting, then dive into concrete tactics: recruiting speakers with sales techniques, LinkedIn outreach and cold DMs, weekly meetup and content strategies, editing and YouTube clips, and milestone growth from 500 to 3k members. Demetrios also addresses moderation challenges, evolving from founder-led to peer-to-peer governance, cultivating core contributors and advisory groups, and building belonging through Q&A, social channels, and non-technical spaces.

Listeners will get actionable retention strategies (giveaways, multi-format content, avoiding gamification), practical checklists for platform, purpose, audience, and content, and tips for member connections like Random Coffee and sprints. If you’re building an MLOps community or scaling technical meetups, this episode offers a focused, tactical roadmap. Find next steps at mlops.community." +topics: +- MLOps +- community building +dateadded: 2021-04-02 + +duration: PT01H13M56S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=0 + endOffset: 88 +- name: 'Origin Story: Launching the MLOps community' + startOffset: 88 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=88 + endOffset: 126 +- name: Pivot to meetups and turning events into a podcast + startOffset: 126 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=126 + endOffset: 397 +- name: Early hosting lessons and interview craft + startOffset: 397 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=397 + endOffset: 501 +- name: 'Founder Background: teaching, sales, and career pivot' + startOffset: 501 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=501 + endOffset: 641 +- name: Sales techniques for recruiting speakers and guests + startOffset: 641 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=641 + endOffset: 789 +- name: 'Early growth tactics: LinkedIn outreach and cold DMs' + startOffset: 789 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=789 + endOffset: 842 +- name: 'Content Strategy: weekly meetups, editing, and YouTube clips' + startOffset: 842 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=842 + endOffset: 1101 +- name: 'Growth Milestones: hitting 500, 1k, 2k, 3k members' + startOffset: 1101 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1101 + endOffset: 1250 +- name: 'Moderation Challenges: vendors, spam, and code of conduct' + startOffset: 1250 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1250 + endOffset: 1497 +- name: 'Community Evolution: moving from founder-led to peer-to-peer' + startOffset: 1497 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1497 + endOffset: 1645 +- name: Cultivating Core Contributors and advisory groups + startOffset: 1645 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1645 + endOffset: 1751 +- name: 'Fostering Belonging: Q&A, social channels, and non-technical spaces' + startOffset: 1751 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1751 + endOffset: 2064 +- name: 'Introvert Founders: starting and sustaining communities as an introvert' + startOffset: 2064 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2064 + endOffset: 2436 +- name: 'Retention Strategies: giveaways, multi-format content, and avoiding gamification' + startOffset: 2436 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2436 + endOffset: 2745 +- name: 'Customer Development (custdev): surveys, incentives, and feedback cadence' + startOffset: 2745 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2745 + endOffset: 3051 +- name: 'Member Connections: Random Coffee, Donut, and one-on-ones' + startOffset: 3051 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3051 + endOffset: 3304 +- name: 'Organizing Initiatives: sprints, autonomy, and many-to-many engagement' + startOffset: 3304 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3304 + endOffset: 3617 +- name: 'Team Structure: core volunteers vs. broader contributors' + startOffset: 3617 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3617 + endOffset: 3652 +- name: 'Community Economics: treating a community as an educational business' + startOffset: 3652 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3652 + endOffset: 3851 +- name: 'Practical Checklist: platform, purpose, audience, and content plan' + startOffset: 3851 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3851 + endOffset: 4046 +- name: 'Sourcing Members: LinkedIn, Reddit, YouTube, and platform-specific channels' + startOffset: 4046 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4046 + endOffset: 4178 +- name: 'Final Advice: actionable takeaways for new community builders' + startOffset: 4178 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4178 + endOffset: 4348 +- name: 'Get Involved: where to find mlops.community and next steps' + startOffset: 4348 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4348 + endOffset: 4436 +- name: Podcast Closing Remarks + startOffset: 4436 + url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4436 + endOffset: 4436 + transcript: - header: Podcast Introduction - line: Should I start or you want to start? @@ -1093,127 +1205,6 @@ transcript: sec: 4436 time: '1:13:56' who: Alexey -description: 'Master MLOps meetups: launch communities, recruit contributors, and - boost member retention with LinkedIn outreach, content strategy, and practical checklists.' -intro: 'How do you launch, grow, and retain an MLOps community that moves from meetups - to a sustainable, contributor-led ecosystem? In this episode, Demetrios Brinkmann - — who has led the MLOps community since April 2020 and now runs the largest active - group with 2,500+ Slack members and 25k YouTube views — walks through a practical - community playbook for MLOps meetups, members, and contributors.

We trace - his origin story and pivot to meetups and podcasting, then dive into concrete tactics: - recruiting speakers with sales techniques, LinkedIn outreach and cold DMs, weekly - meetup and content strategies, editing and YouTube clips, and milestone growth from - 500 to 3k members. Demetrios also addresses moderation challenges, evolving from - founder-led to peer-to-peer governance, cultivating core contributors and advisory - groups, and building belonging through Q&A, social channels, and non-technical spaces. -

Listeners will get actionable retention strategies (giveaways, multi-format - content, avoiding gamification), practical checklists for platform, purpose, audience, - and content, and tips for member connections like Random Coffee and sprints. If - you’re building an MLOps community or scaling technical meetups, this episode offers - a focused, tactical roadmap. Find next steps at mlops.community.' -dateadded: '2021-04-02' -duration: PT01H13M56S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=0 - endOffset: 88 -- name: 'Origin Story: Launching the MLOps community' - startOffset: 88 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=88 - endOffset: 126 -- name: Pivot to meetups and turning events into a podcast - startOffset: 126 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=126 - endOffset: 397 -- name: Early hosting lessons and interview craft - startOffset: 397 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=397 - endOffset: 501 -- name: 'Founder Background: teaching, sales, and career pivot' - startOffset: 501 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=501 - endOffset: 641 -- name: Sales techniques for recruiting speakers and guests - startOffset: 641 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=641 - endOffset: 789 -- name: 'Early growth tactics: LinkedIn outreach and cold DMs' - startOffset: 789 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=789 - endOffset: 842 -- name: 'Content Strategy: weekly meetups, editing, and YouTube clips' - startOffset: 842 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=842 - endOffset: 1101 -- name: 'Growth Milestones: hitting 500, 1k, 2k, 3k members' - startOffset: 1101 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1101 - endOffset: 1250 -- name: 'Moderation Challenges: vendors, spam, and code of conduct' - startOffset: 1250 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1250 - endOffset: 1497 -- name: 'Community Evolution: moving from founder-led to peer-to-peer' - startOffset: 1497 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1497 - endOffset: 1645 -- name: Cultivating Core Contributors and advisory groups - startOffset: 1645 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1645 - endOffset: 1751 -- name: 'Fostering Belonging: Q&A, social channels, and non-technical spaces' - startOffset: 1751 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=1751 - endOffset: 2064 -- name: 'Introvert Founders: starting and sustaining communities as an introvert' - startOffset: 2064 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2064 - endOffset: 2436 -- name: 'Retention Strategies: giveaways, multi-format content, and avoiding gamification' - startOffset: 2436 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2436 - endOffset: 2745 -- name: 'Customer Development (custdev): surveys, incentives, and feedback cadence' - startOffset: 2745 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=2745 - endOffset: 3051 -- name: 'Member Connections: Random Coffee, Donut, and one-on-ones' - startOffset: 3051 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3051 - endOffset: 3304 -- name: 'Organizing Initiatives: sprints, autonomy, and many-to-many engagement' - startOffset: 3304 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3304 - endOffset: 3617 -- name: 'Team Structure: core volunteers vs. broader contributors' - startOffset: 3617 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3617 - endOffset: 3652 -- name: 'Community Economics: treating a community as an educational business' - startOffset: 3652 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3652 - endOffset: 3851 -- name: 'Practical Checklist: platform, purpose, audience, and content plan' - startOffset: 3851 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=3851 - endOffset: 4046 -- name: 'Sourcing Members: LinkedIn, Reddit, YouTube, and platform-specific channels' - startOffset: 4046 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4046 - endOffset: 4178 -- name: 'Final Advice: actionable takeaways for new community builders' - startOffset: 4178 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4178 - endOffset: 4348 -- name: 'Get Involved: where to find mlops.community and next steps' - startOffset: 4348 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4348 - endOffset: 4436 -- name: Podcast Closing Remarks - startOffset: 4436 - url: https://www.youtube.com/watch?v=ByCE1vSrIr8&t=4436 - endOffset: 4436 --- We talked about: diff --git a/_podcast/s02e05-feature-stores.md b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md similarity index 91% rename from _podcast/s02e05-feature-stores.md rename to _podcast/mlops-feature-stores-feature-stores-feast-tecton.md index 3fbdea0c..2a0a3d99 100644 --- a/_podcast/s02e05-feature-stores.md +++ b/_podcast/mlops-feature-stores-feature-stores-feast-tecton.md @@ -1,24 +1,11 @@ --- -title: 'Feature Stores for MLOps: Real-Time Feature Engineering, Feast & Tecton Guide' -short: Feature Stores in MLOps Explained -description: Discover feature store use cases, real-time features with Feast & Tecton, - build scalable MLOps to speed production, cut duplication and detect drift. -tags: -- feature-stores -- feast -- tecton -- mlops -- machine-learning -- data-science -- ml-platform -- ml-architecture -- willem-pienaar -- gojek -guests: -- willempienaar -image: images/podcast/s02e05-feature-stores.jpg +title: "Feature Stores for MLOps: Real-Time Feature Engineering, Feast & Tecton Guide" +short: "Feature Stores in MLOps Explained" season: 2 episode: 5 +guests: +- willempienaar +image: images/podcast/mlops-feature-stores-feature-stores-feast-tecton.jpg ids: youtube: FQYTb4uWljQ anchor: Feature-Stores-Cutting-through-the-Hype---Willem-Pienaar-ept6m8/a-a4hlg3r @@ -27,23 +14,17 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Feature-Stores-Cutting-through-the-Hype---Willem-Pienaar-ept6m8/a-a4hlg3r spotify: https://open.spotify.com/episode/05YnfTWbplXwOwicR2doy3 apple: https://podcasts.apple.com/us/podcast/feature-stores-cutting-through-the-hype-willem-pienaar/id1541710331?i=1000508782957 -intro: How do you reliably build and serve real‑time features for production ML without - rework, duplication, or training/serving skew? In this episode, Willem Pienaar — - engineering lead at Tecton and creator of Feast — walks through what feature stores - solve in MLOps and how they enable real‑time feature engineering. We define feature - stores, compare feature creation vs retrieval (SQL, Python, APIs, on‑demand transforms), - and illustrate a production real‑time fraud detection lookup. Willem separates hype - from value, explains organizational challenges like team silos and speed to production, - and outlines the platform role across materialization, serving, and validation. -

You’ll get practical coverage of Feast (open‑source) and Tecton (enterprise), - architecture components (transform engine, storage, serving, registry, monitoring), - and when online tabular use cases require a feature store versus when it’s overkill. - The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch - (Flink, Spark), validation and monitoring (drift detection, Great Expectations, - TFDV), backfilling strategies, ownership and governance, and getting started resources - (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete - next steps for productionizing features in your MLOps stack. -dateadded: '2021-02-23' + +description: "Discover feature store use cases, real-time features with Feast & Tecton, build scalable MLOps to speed production, cut duplication and detect drift" +intro: "How do you reliably build and serve real-time features for production ML without rework, duplication, or training/serving skew? In this episode, Willem Pienaar — engineering lead at Tecton and creator of Feast — walks through what feature stores solve in MLOps and how they enable real-time feature engineering. We define feature stores, compare feature creation vs retrieval (SQL, Python, APIs, on-demand transforms), and illustrate a production real-time fraud detection lookup. Willem separates hype from value, explains organizational challenges like team silos and speed to production, and outlines the platform role across materialization, serving, and validation.

You’ll get practical coverage of Feast (open-source) and Tecton (enterprise), architecture components (transform engine, storage, serving, registry, monitoring), and when online tabular use cases require a feature store versus when it’s overkill. The episode also covers integrations (dbt, Kubeflow, Airflow), streaming vs batch (Flink, Spark), validation and monitoring (drift detection, Great Expectations, TFDV), backfilling strategies, ownership and governance, and getting started resources (feast.dev, Docker). Listen to learn when to adopt a feature store and concrete next steps for productionizing features in your MLOps stack" +topics: +- machine learning +- MLOps +- feature stores +- tools +dateadded: 2021-02-23 + + quotableClips: - name: 'Episode Introduction: Feature Stores in MLOps' startOffset: 0 @@ -61,11 +42,11 @@ quotableClips: startOffset: 660 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=660 endOffset: 870 -- name: 'Feature Creation vs Retrieval: SQL, Python, APIs, and On‑Demand Transforms' +- name: 'Feature Creation vs Retrieval: SQL, Python, APIs, and On-Demand Transforms' startOffset: 870 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=870 endOffset: 990 -- name: 'Production Example: Real‑Time Fraud Detection Feature Lookup' +- name: 'Production Example: Real-Time Fraud Detection Feature Lookup' startOffset: 990 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=990 endOffset: 1110 @@ -85,7 +66,7 @@ quotableClips: startOffset: 1680 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=1680 endOffset: 1890 -- name: 'Feast Overview: Open‑Source Feature Store Design and Use Cases' +- name: 'Feast Overview: Open-Source Feature Store Design and Use Cases' startOffset: 1890 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=1890 endOffset: 2040 @@ -105,7 +86,7 @@ quotableClips: startOffset: 2550 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=2550 endOffset: 2700 -- name: 'Streaming vs Batch: Flink, Spark, and Real‑Time Feature Engineering' +- name: 'Streaming vs Batch: Flink, Spark, and Real-Time Feature Engineering' startOffset: 2700 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=2700 endOffset: 2850 @@ -133,6 +114,7 @@ quotableClips: startOffset: 3450 url: https://www.youtube.com/watch?v=FQYTb4uWljQ&t=3450 endOffset: 3450 + --- In this episode, we dive deeper into feature stores with Willem, creator of Feast (an open-source feature store). Previously, Willem led the Data Science Platform team at Gojek and now works at Tecton, which develops feature store technology. diff --git a/_podcast/s02e04-mlops.md b/_podcast/mlops-kubeflow-model-monitoring.md similarity index 96% rename from _podcast/s02e04-mlops.md rename to _podcast/mlops-kubeflow-model-monitoring.md index d2ec47b3..16a88b5a 100644 --- a/_podcast/s02e04-mlops.md +++ b/_podcast/mlops-kubeflow-model-monitoring.md @@ -1,11 +1,11 @@ --- -title: 'Mastering MLOps: Kubeflow Pipelines, Model Monitoring & Automated Retraining' -short: The Rise of MLOps -guests: -- theofilospapapanagiotou -image: images/podcast/s02e04-mlops.jpg +title: "Mastering MLOps: Kubeflow Pipelines, Model Monitoring & Automated Retraining" +short: "The Rise of MLOps" season: 2 episode: 4 +guests: +- theofilospapapanagiotou +image: images/podcast/mlops-kubeflow-model-monitoring.jpg ids: youtube: -i0fVp0ntYA anchor: The-Rise-of-MLOps---Theofilos-Papapanagiotou-ept67o @@ -14,6 +14,152 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-Rise-of-MLOps---Theofilos-Papapanagiotou-ept67o spotify: https://open.spotify.com/episode/3YPvzGQnfxl7Mo1VKE0l1K apple: https://podcasts.apple.com/us/podcast/the-rise-of-mlops-theofilos-papapanagiotou/id1541710331?i=1000507907719 + +description: "Master MLOps with Kubeflow pipelines and automated retraining—detect drift, accelerate deployment, and boost production model reliability for faster iteration" +intro: "How do you build reliable, production-ready ML pipelines that detect model drift, monitor fairness, and trigger automated retraining? In this episode, Theofilos Papapanagiotou — a systems engineer with 20 years’ experience (from Unix engineering to ML engineering) now helping companies run ML workloads and a Kubeflow enthusiast — walks through practical MLOps strategies and tooling.

We define MLOps as culture, process, and technology and contrast it with DevOps across the model lifecycle. Key topics include model monitoring for drift and fairness, inference sensors and a Prometheus/Grafana monitoring stack, commoditizing inference monitoring for faster iteration, and maturity levels from manual training to automated retraining. Theofilos dives into the Kubeflow ecosystem — Pipelines, KFServing, Feast, Katib, and integrations with TFX — plus hyperparameter search, notebook→pipeline workflows, MLMD metadata for data and model versioning, and tradeoffs for small teams and edge/mobile deployments.

If you’re implementing Kubeflow pipelines, setting up model monitoring, or planning automated retraining, this episode offers practical guidance, maturity roadmaps, and resources to help you move from prototypes to reproducible, production ML" +topics: +- MLOps +- machine learning +- production +- tools +dateadded: 2021-02-23 + +duration: PT01H02M48S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=0 + endOffset: 154 +- name: Episode Kickoff & Guest Overview + startOffset: 154 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=154 + endOffset: 210 +- name: 'Guest Background: From Unix Engineer to ML Engineering' + startOffset: 210 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=210 + endOffset: 314 +- name: 'Defining MLOps: Culture, Process, and Technology' + startOffset: 314 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=314 + endOffset: 448 +- name: 'DevOps vs MLOps: Model Lifecycle and Data Drift' + startOffset: 448 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=448 + endOffset: 677 +- name: 'Monitoring for MLOps: Drift, Fairness, and Retraining Triggers' + startOffset: 677 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=677 + endOffset: 784 +- name: 'Monitoring Stack: Prometheus/Grafana and Inference Sensors' + startOffset: 784 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=784 + endOffset: 884 +- name: Commoditizing Inference Monitoring for Faster Iteration + startOffset: 884 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=884 + endOffset: 929 +- name: 'Role Distinction: ML Engineer as Practitioner, MLOps as Practice' + startOffset: 929 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=929 + endOffset: 997 +- name: 'Team Composition: Developer, Operator, and Product in MLOps' + startOffset: 997 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=997 + endOffset: 1208 +- name: 'The "MLOps Engineer" Debate: Title vs. Cross-Functional Teams' + startOffset: 1208 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1208 + endOffset: 1427 +- name: MLOps Job Signals & Maturity Models (Google and Microsoft) + startOffset: 1427 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1427 + endOffset: 1621 +- name: 'Maturity Levels: Manual Training → Pipeline Automation' + startOffset: 1621 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1621 + endOffset: 1808 +- name: 'Advanced Maturity: Data-Driven Triggers and Automated Retraining' + startOffset: 1808 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1808 + endOffset: 2007 +- name: 'Cultural Shift: Monitoring as a Source of New Training Data' + startOffset: 2007 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2007 + endOffset: 2065 +- name: 'Tooling Landscape: Vendors, Open Source, and Kubernetes' + startOffset: 2065 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2065 + endOffset: 2226 +- name: 'Kubeflow Ecosystem: Pipelines, KFServing, Feast, and Katib' + startOffset: 2226 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2226 + endOffset: 2412 +- name: Hyperparameter Search with Katib and Notebook→Pipeline Workflows + startOffset: 2412 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2412 + endOffset: 2548 +- name: 'Kubeflow & TFX: ML Orchestration and Production Patterns' + startOffset: 2548 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2548 + endOffset: 2608 +- name: 'Learning Kubeflow: Docs, Workshops, and Community Resources' + startOffset: 2608 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2608 + endOffset: 2761 +- name: 'Getting Started: Cloud-Managed Pipelines and Simple Projects' + startOffset: 2761 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2761 + endOffset: 2818 +- name: 'Data & Model Versioning: MLMD, Metadata, and Traceability' + startOffset: 2818 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2818 + endOffset: 3035 +- name: 'Relationship to DataOps: Continuation and Divergence' + startOffset: 3035 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3035 + endOffset: 3104 +- name: 'Edge & Mobile Deployment: Offline Models and Edge Kubernetes' + startOffset: 3104 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3104 + endOffset: 3258 +- name: 'MLOps Guidance: Maturity Roadmaps and Manifesto Alternatives' + startOffset: 3258 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3258 + endOffset: 3313 +- name: 'Why Learn Kubeflow: Community Contribution and Career Value' + startOffset: 3313 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3313 + endOffset: 3424 +- name: 'MLOps Benefits: Automation, Productization, and Productivity' + startOffset: 3424 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3424 + endOffset: 3505 +- name: 'AutoML & Katib: Commoditization vs. Empowering Data Scientists' + startOffset: 3505 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3505 + endOffset: 3589 +- name: 'Simplified Serving: KFServing and Faster Model Endpoints' + startOffset: 3589 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3589 + endOffset: 3684 +- name: 'Small Teams Adopting MLOps: Practical Examples and Tradeoffs' + startOffset: 3684 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3684 + endOffset: 3776 +- name: 'Breaking Silos: Language-Agnostic Pipelines and Collaboration' + startOffset: 3776 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3776 + endOffset: 3899 +- name: Closing Remarks & Resource Links + startOffset: 3899 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3899 + endOffset: 3922 +- name: Episode End + startOffset: 3922 + url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3922 + endOffset: 3768 + transcript: - header: Podcast Introduction - header: Episode Kickoff & Guest Overview @@ -333,7 +479,7 @@ transcript: sec: 1191 time: '19:51' who: 'Alexey:' -- header: 'The "MLOps Engineer" Debate: Title vs. Cross‑Functional Teams' +- header: 'The "MLOps Engineer" Debate: Title vs. Cross-Functional Teams' - line: I don't think that it will become a title. Maybe people will have it because it's a fancy word now, like data scientist was 10 years ago. But I don't think this will become a role of a department. If it does, that would be a shame because @@ -506,7 +652,7 @@ transcript: sec: 1805 time: '30:05' who: 'Alexey:' -- header: 'Advanced Maturity: Data‑Driven Triggers and Automated Retraining' +- header: 'Advanced Maturity: Data-Driven Triggers and Automated Retraining' - line: Yes, it’s manual. But maybe it automatically adds your new features from your feature store, or if you have some data versioning system, and etc. So, that's the maturity level one. Then there is the visual – the ultimate goal that we should @@ -737,7 +883,7 @@ transcript: sec: 2712 time: '45:12' who: 'Alexey:' -- header: 'Getting Started: Cloud‑Managed Pipelines and Simple Projects' +- header: 'Getting Started: Cloud-Managed Pipelines and Simple Projects' - line: Yeah. sec: 2793 time: '46:33' @@ -1062,7 +1208,7 @@ transcript: sec: 3753 time: '1:02:33' who: 'Alexey:' -- header: 'Breaking Silos: Language‑Agnostic Pipelines and Collaboration' +- header: 'Breaking Silos: Language-Agnostic Pipelines and Collaboration' - line: Well, the good news is that these things are language-agnostic. Of course, there shouldn't be silos in the organization, especially based on language. At least the development departments should be working together. But the tooling @@ -1117,158 +1263,4 @@ transcript: sec: 3922 time: '1:05:22' who: 'Theo:' -description: Master MLOps with Kubeflow pipelines and automated retraining—detect - drift, accelerate deployment, and boost production model reliability for faster - iteration. -intro: How do you build reliable, production-ready ML pipelines that detect model - drift, monitor fairness, and trigger automated retraining? In this episode, Theofilos - Papapanagiotou — a systems engineer with 20 years’ experience (from Unix engineering - to ML engineering) now helping companies run ML workloads and a Kubeflow enthusiast - — walks through practical MLOps strategies and tooling.

We define MLOps - as culture, process, and technology and contrast it with DevOps across the model - lifecycle. Key topics include model monitoring for drift and fairness, inference - sensors and a Prometheus/Grafana monitoring stack, commoditizing inference monitoring - for faster iteration, and maturity levels from manual training to automated retraining. - Theofilos dives into the Kubeflow ecosystem — Pipelines, KFServing, Feast, Katib, - and integrations with TFX — plus hyperparameter search, notebook→pipeline workflows, - MLMD metadata for data and model versioning, and tradeoffs for small teams and edge/mobile - deployments.

If you’re implementing Kubeflow pipelines, setting up model - monitoring, or planning automated retraining, this episode offers practical guidance, - maturity roadmaps, and resources to help you move from prototypes to reproducible, - production ML. -dateadded: '2021-02-23' -duration: PT01H02M48S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=0 - endOffset: 154 -- name: Episode Kickoff & Guest Overview - startOffset: 154 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=154 - endOffset: 210 -- name: 'Guest Background: From Unix Engineer to ML Engineering' - startOffset: 210 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=210 - endOffset: 314 -- name: 'Defining MLOps: Culture, Process, and Technology' - startOffset: 314 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=314 - endOffset: 448 -- name: 'DevOps vs MLOps: Model Lifecycle and Data Drift' - startOffset: 448 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=448 - endOffset: 677 -- name: 'Monitoring for MLOps: Drift, Fairness, and Retraining Triggers' - startOffset: 677 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=677 - endOffset: 784 -- name: 'Monitoring Stack: Prometheus/Grafana and Inference Sensors' - startOffset: 784 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=784 - endOffset: 884 -- name: Commoditizing Inference Monitoring for Faster Iteration - startOffset: 884 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=884 - endOffset: 929 -- name: 'Role Distinction: ML Engineer as Practitioner, MLOps as Practice' - startOffset: 929 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=929 - endOffset: 997 -- name: 'Team Composition: Developer, Operator, and Product in MLOps' - startOffset: 997 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=997 - endOffset: 1208 -- name: 'The "MLOps Engineer" Debate: Title vs. Cross‑Functional Teams' - startOffset: 1208 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1208 - endOffset: 1427 -- name: MLOps Job Signals & Maturity Models (Google and Microsoft) - startOffset: 1427 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1427 - endOffset: 1621 -- name: 'Maturity Levels: Manual Training → Pipeline Automation' - startOffset: 1621 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1621 - endOffset: 1808 -- name: 'Advanced Maturity: Data‑Driven Triggers and Automated Retraining' - startOffset: 1808 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=1808 - endOffset: 2007 -- name: 'Cultural Shift: Monitoring as a Source of New Training Data' - startOffset: 2007 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2007 - endOffset: 2065 -- name: 'Tooling Landscape: Vendors, Open Source, and Kubernetes' - startOffset: 2065 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2065 - endOffset: 2226 -- name: 'Kubeflow Ecosystem: Pipelines, KFServing, Feast, and Katib' - startOffset: 2226 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2226 - endOffset: 2412 -- name: Hyperparameter Search with Katib and Notebook→Pipeline Workflows - startOffset: 2412 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2412 - endOffset: 2548 -- name: 'Kubeflow & TFX: ML Orchestration and Production Patterns' - startOffset: 2548 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2548 - endOffset: 2608 -- name: 'Learning Kubeflow: Docs, Workshops, and Community Resources' - startOffset: 2608 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2608 - endOffset: 2761 -- name: 'Getting Started: Cloud‑Managed Pipelines and Simple Projects' - startOffset: 2761 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2761 - endOffset: 2818 -- name: 'Data & Model Versioning: MLMD, Metadata, and Traceability' - startOffset: 2818 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=2818 - endOffset: 3035 -- name: 'Relationship to DataOps: Continuation and Divergence' - startOffset: 3035 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3035 - endOffset: 3104 -- name: 'Edge & Mobile Deployment: Offline Models and Edge Kubernetes' - startOffset: 3104 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3104 - endOffset: 3258 -- name: 'MLOps Guidance: Maturity Roadmaps and Manifesto Alternatives' - startOffset: 3258 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3258 - endOffset: 3313 -- name: 'Why Learn Kubeflow: Community Contribution and Career Value' - startOffset: 3313 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3313 - endOffset: 3424 -- name: 'MLOps Benefits: Automation, Productization, and Productivity' - startOffset: 3424 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3424 - endOffset: 3505 -- name: 'AutoML & Katib: Commoditization vs. Empowering Data Scientists' - startOffset: 3505 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3505 - endOffset: 3589 -- name: 'Simplified Serving: KFServing and Faster Model Endpoints' - startOffset: 3589 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3589 - endOffset: 3684 -- name: 'Small Teams Adopting MLOps: Practical Examples and Tradeoffs' - startOffset: 3684 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3684 - endOffset: 3776 -- name: 'Breaking Silos: Language‑Agnostic Pipelines and Collaboration' - startOffset: 3776 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3776 - endOffset: 3899 -- name: Closing Remarks & Resource Links - startOffset: 3899 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3899 - endOffset: 3922 -- name: Episode End - startOffset: 3922 - url: https://www.youtube.com/watch?v=-i0fVp0ntYA&t=3922 - endOffset: 3768 --- diff --git a/_podcast/s10e03-mlops-architect.md b/_podcast/mlops-model-monitoring-data-observability.md similarity index 97% rename from _podcast/s10e03-mlops-architect.md rename to _podcast/mlops-model-monitoring-data-observability.md index b946de87..ca7c1132 100644 --- a/_podcast/s10e03-mlops-architect.md +++ b/_podcast/mlops-model-monitoring-data-observability.md @@ -1,19 +1,136 @@ --- +title: "MLOps Architect Guide: Production Model Monitoring, Data Observability & Tooling" +short: "MLOps Architect" +season: 10 episode: 3 guests: - dannyleybzon +image: images/podcast/mlops-model-monitoring-data-observability.jpg ids: anchor: MLOps-Architect---Danny-Leybzon-e1m81iu youtube: p1gVaS4Zx5M -image: images/podcast/s10e03-mlops-architect.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/MLOps-Architect---Danny-Leybzon-e1m81iu apple: https://podcasts.apple.com/us/podcast/mlops-architect-danny-leybzon/id1541710331?i=1000575901051 spotify: https://open.spotify.com/episode/5gz5lnS7onwRUtbcmpOSuU?si=8cbe799f284c4623 youtube: https://www.youtube.com/watch?v=p1gVaS4Zx5M -season: 10 -short: MLOps Architect -title: 'MLOps Architect Guide: Production Model Monitoring, Data Observability & Tooling' + +description: "Master MLOps, model monitoring & data observability with guidance on production observability, ETL root causes, tooling trade-offs, ONNX, build vs buy" +intro: "How do you keep machine learning models reliable in production — what should you monitor, where do upstream failures originate, and which tooling decisions actually matter? In this episode, Danny Leybzon, MLOps Architect at WhyLabs and computational statistics alum of UCLA, walks through the practical challenges of production model monitoring, data observability, and tooling trade-offs. Drawing on his path from analyst and product roles at Qubole to field engineering at Imply and now advising customers on observability, Danny defines the MLOps Architect as a technical-business bridge and explains how to prioritize production-first monitoring efforts.

Topics covered include scope of observability across ETL and data pipelines, data profiling architecture (WhyLogs, profiles, Apache Druid), build vs buy decisions, platform-agnostic integrations and ONNX interoperability, and trends around cloud-native stacks and vendor lock-in. He also offers hiring and career perspectives for MLOps roles and research priorities like fairness and segmentation. Listen to get concrete guidance on designing model monitoring, choosing observability tooling, and identifying upstream root causes so you can reduce incidents and improve model reliability in production" +topics: +- MLOps +- tools +- data engineering +dateadded: 2022-08-13 + +duration: PT00H57M51S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=0 + endOffset: 116 +- name: 'Guest Overview: Danny Leybzon, MLOps Architect at WhyLabs' + startOffset: 116 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=116 + endOffset: 192 +- name: 'Career Journey: From paralegal ambitions to statistics and machine learning' + startOffset: 192 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=192 + endOffset: 446 +- name: 'Prior Role: Field Engineer / Solutions Engineer experience' + startOffset: 446 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=446 + endOffset: 491 +- name: 'Role Definition: MLOps Architect as technical-business bridge' + startOffset: 491 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=491 + endOffset: 632 +- name: 'Architecture Advising: Tooling trade-offs and landscape navigation' + startOffset: 632 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=632 + endOffset: 770 +- name: 'Role Popularity: Uniqueness of the "MLOps Architect" title' + startOffset: 770 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=770 + endOffset: 830 +- name: 'Startup Reality: Wearing many hats in early-stage companies' + startOffset: 830 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=830 + endOffset: 935 +- name: 'Demonstrating Versatility: Convincing employers you can do it all' + startOffset: 935 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=935 + endOffset: 1101 +- name: 'Hiring Story: Cross-functional interview process at WhyLabs' + startOffset: 1101 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1101 + endOffset: 1324 +- name: 'Career Decision: Choosing startup risk for growth and learning' + startOffset: 1324 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1324 + endOffset: 1504 +- name: 'Prioritization Strategy: Focusing on production and model monitoring' + startOffset: 1504 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1504 + endOffset: 1655 +- name: 'Observability Scope: ETL, data pipelines, and upstream root causes' + startOffset: 1655 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1655 + endOffset: 1739 +- name: 'Customer Profiles: Production-first vs pre-deployment teams' + startOffset: 1739 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1739 + endOffset: 1839 +- name: 'Market Education: Shift from "why monitor" to "how to monitor" + startOffset: 1839 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1839 + endOffset: 1910 +- name: 'Data Profiling Architecture: WhyLogs, profiles, and Apache Druid backend' + startOffset: 1910 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1910 + endOffset: 2065 +- name: 'Build vs Buy: Guiding customers on tooling and procurement decisions' + startOffset: 2065 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2065 + endOffset: 2207 +- name: 'Platform Agnostic Integrations: Serving and inference tooling realities' + startOffset: 2207 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2207 + endOffset: 2281 +- name: 'ONNX Adoption: Interoperability use cases and industry uptake' + startOffset: 2281 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2281 + endOffset: 2350 +- name: 'Tooling Trends: Cloud-native stacks, heterogeneity, and vendor lock-in' + startOffset: 2350 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2350 + endOffset: 2460 +- name: 'Research Focus: Fairness, bias, segmentation over explainability' + startOffset: 2460 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2460 + endOffset: 2587 +- name: 'Productivity Habits: Inbox zero, workspace windows, and task management' + startOffset: 2587 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2587 + endOffset: 2749 +- name: 'Career Strategy: Exploration vs exploitation and Thompson sampling analogy' + startOffset: 2749 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2749 + endOffset: 3023 +- name: 'Skillset Advice: Coding, communication, and being an effective Googler' + startOffset: 3023 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3023 + endOffset: 3350 +- name: 'WhyLogs vs WhyLabs: Open-source profiling vs SaaS observability' + startOffset: 3350 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3350 + endOffset: 3487 +- name: 'Closing Remarks: Final thoughts, contact info, and upcoming workshops' + startOffset: 3487 + url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3487 + endOffset: 3471 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Danny Leybzon, MLOps Architect at WhyLabs' @@ -652,7 +769,7 @@ transcript: sec: 1816 time: '30:16' who: Danny -- header: 'Market Education: Shift from "why monitor" to "how to monitor"' +- header: 'Market Education: Shift from "why monitor" to "how to monitor" - line: Okay. You said you’re trying to be pre-emptive and this is also part of your role, right? You are trying to spread awareness about this problem by talking on different podcasts about model monitoring, why it’s important, what can go @@ -1271,131 +1388,6 @@ transcript: sec: 3587 time: '59:47' who: Danny -description: Master MLOps, model monitoring & data observability with guidance on - production observability, ETL root causes, tooling trade-offs, ONNX, build vs buy. -intro: How do you keep machine learning models reliable in production — what should - you monitor, where do upstream failures originate, and which tooling decisions actually - matter? In this episode, Danny Leybzon, MLOps Architect at WhyLabs and computational - statistics alum of UCLA, walks through the practical challenges of production model - monitoring, data observability, and tooling trade-offs. Drawing on his path from - analyst and product roles at Qubole to field engineering at Imply and now advising - customers on observability, Danny defines the MLOps Architect as a technical-business - bridge and explains how to prioritize production-first monitoring efforts.

- Topics covered include scope of observability across ETL and data pipelines, data - profiling architecture (WhyLogs, profiles, Apache Druid), build vs buy decisions, - platform-agnostic integrations and ONNX interoperability, and trends around cloud-native - stacks and vendor lock-in. He also offers hiring and career perspectives for MLOps - roles and research priorities like fairness and segmentation. Listen to get concrete - guidance on designing model monitoring, choosing observability tooling, and identifying - upstream root causes so you can reduce incidents and improve model reliability in - production. -dateadded: '2022-08-13' -duration: PT00H57M51S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=0 - endOffset: 116 -- name: 'Guest Overview: Danny Leybzon, MLOps Architect at WhyLabs' - startOffset: 116 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=116 - endOffset: 192 -- name: 'Career Journey: From paralegal ambitions to statistics and machine learning' - startOffset: 192 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=192 - endOffset: 446 -- name: 'Prior Role: Field Engineer / Solutions Engineer experience' - startOffset: 446 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=446 - endOffset: 491 -- name: 'Role Definition: MLOps Architect as technical-business bridge' - startOffset: 491 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=491 - endOffset: 632 -- name: 'Architecture Advising: Tooling trade-offs and landscape navigation' - startOffset: 632 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=632 - endOffset: 770 -- name: 'Role Popularity: Uniqueness of the "MLOps Architect" title' - startOffset: 770 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=770 - endOffset: 830 -- name: 'Startup Reality: Wearing many hats in early-stage companies' - startOffset: 830 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=830 - endOffset: 935 -- name: 'Demonstrating Versatility: Convincing employers you can do it all' - startOffset: 935 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=935 - endOffset: 1101 -- name: 'Hiring Story: Cross-functional interview process at WhyLabs' - startOffset: 1101 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1101 - endOffset: 1324 -- name: 'Career Decision: Choosing startup risk for growth and learning' - startOffset: 1324 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1324 - endOffset: 1504 -- name: 'Prioritization Strategy: Focusing on production and model monitoring' - startOffset: 1504 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1504 - endOffset: 1655 -- name: 'Observability Scope: ETL, data pipelines, and upstream root causes' - startOffset: 1655 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1655 - endOffset: 1739 -- name: 'Customer Profiles: Production-first vs pre-deployment teams' - startOffset: 1739 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1739 - endOffset: 1839 -- name: 'Market Education: Shift from "why monitor" to "how to monitor"' - startOffset: 1839 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1839 - endOffset: 1910 -- name: 'Data Profiling Architecture: WhyLogs, profiles, and Apache Druid backend' - startOffset: 1910 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=1910 - endOffset: 2065 -- name: 'Build vs Buy: Guiding customers on tooling and procurement decisions' - startOffset: 2065 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2065 - endOffset: 2207 -- name: 'Platform Agnostic Integrations: Serving and inference tooling realities' - startOffset: 2207 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2207 - endOffset: 2281 -- name: 'ONNX Adoption: Interoperability use cases and industry uptake' - startOffset: 2281 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2281 - endOffset: 2350 -- name: 'Tooling Trends: Cloud-native stacks, heterogeneity, and vendor lock-in' - startOffset: 2350 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2350 - endOffset: 2460 -- name: 'Research Focus: Fairness, bias, segmentation over explainability' - startOffset: 2460 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2460 - endOffset: 2587 -- name: 'Productivity Habits: Inbox zero, workspace windows, and task management' - startOffset: 2587 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2587 - endOffset: 2749 -- name: 'Career Strategy: Exploration vs exploitation and Thompson sampling analogy' - startOffset: 2749 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=2749 - endOffset: 3023 -- name: 'Skillset Advice: Coding, communication, and being an effective Googler' - startOffset: 3023 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3023 - endOffset: 3350 -- name: 'WhyLogs vs WhyLabs: Open-source profiling vs SaaS observability' - startOffset: 3350 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3350 - endOffset: 3487 -- name: 'Closing Remarks: Final thoughts, contact info, and upcoming workshops' - startOffset: 3487 - url: https://www.youtube.com/watch?v=p1gVaS4Zx5M&t=3487 - endOffset: 3471 --- Links: diff --git a/_podcast/s14e07-from-mlops-to-dataops.md b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md similarity index 97% rename from _podcast/s14e07-from-mlops-to-dataops.md rename to _podcast/modern-data-pipelines-orchestration-ingestion-modeling.md index 5d662040..c8cf07ad 100644 --- a/_podcast/s14e07-from-mlops-to-dataops.md +++ b/_podcast/modern-data-pipelines-orchestration-ingestion-modeling.md @@ -1,20 +1,115 @@ --- +title: "Modern Data Pipeline Architecture: Ingestion, Orchestration, Transformation & MLOps Systems" +short: "Modern Data Pipelines" +season: 14 episode: 7 guests: - santonatuli +image: images/podcast/modern-data-pipelines-orchestration-ingestion-modeling.jpg ids: anchor: ow/datatalksclub/episodes/From-MLOps-to-DataOps---Santona-Tuli-e25vb0q youtube: kSTfhQ_SZgc -image: images/podcast/s14e07-from-mlops-to-dataops.jpg links: anchor: https://podcasters.spotify.com/pod/pod/show/datatalksclub/episodes/From-MLOps-to-DataOps---Santona-Tuli-e25vb0q apple: https://podcasts.apple.com/us/podcast/from-mlops-to-dataops-santona-tuli/id1541710331?i=1000618121008 spotify: https://open.spotify.com/episode/0inhE28kLI4T1AsSjgwnL8?si=WeFES7dXRxqSK_SKonBejw youtube: https://www.youtube.com/watch?v=kSTfhQ_SZgc -season: 14 -short: From MLOps to DataOps -title: 'Build Modern Data Pipelines: Ingestion, dbt Transformations, Airflow Orchestration - & MLOps' + +description: "Master modern data pipelines with dbt transforms and Airflow orchestration—streamline ingestion, speed feature engineering and analytics delivery" +intro: "How do you build a modern data pipeline that reliably moves raw events through ingestion, dbt transformations, Airflow orchestration and into production ML and analytics? In this episode, Santona Tuli — a former CERN researcher turned ML and data engineering lead at Upsolver — walks through practical patterns and trade-offs for end-to-end pipelines. Drawing on experience from particle-physics event analysis to NLP and workflow authoring with Airflow, Santona explains where ingestion engines and declarative SQL frameworks fit, and when dbt belongs in the stack.

Topics include Upsolver vs dbt (pipeline authoring, execution engine and ingestion focus), differences between ML pipelines and analytics pipelines, MLOps vs DataOps, and dbt’s role in analytics engineering. We cover tooling (orchestrators, Spark, Kafka/Kinesis, feature stores, vector DBs), modern data stack choices like Snowflake and Databricks, lakehouse and staging patterns, and ingestion pre-processing needs such as deduplication, ordering guarantees and PII masking. You’ll also hear about transformation and data modeling (entities, foreign keys, business mappings), marts and dashboards, feature engineering and model serving, persona-driven pipeline design, and career-learning recommendations. Listen to gain concrete design guidance, tooling trade-offs, and resources to build scalable data and MLOps pipelines" +topics: +- data engineering +- MLOps +- tools +dateadded: 2023-06-24 + +duration: PT00H59M43S + +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=0 + endOffset: 90 +- name: 'Career journey: CERN researcher → NLP, ML engineering, Python, Astronomer, + Upsolver' + startOffset: 90 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=90 + endOffset: 428 +- name: Transition to workflow authoring and orchestration (Airflow, Astronomer) + startOffset: 428 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=428 + endOffset: 648 +- name: 'Upsolver vs DBT: pipeline authoring, execution engine, and ingestion focus' + startOffset: 648 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=648 + endOffset: 805 +- name: Comparing ML pipelines and analytics data pipelines + startOffset: 805 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=805 + endOffset: 1124 +- name: 'MLOps vs DataOps: operationalizing models vs business data' + startOffset: 1124 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1124 + endOffset: 1497 +- name: Analytics engineering and DBT's role in the modern data workflow + startOffset: 1497 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1497 + endOffset: 1603 +- name: 'Tooling landscape: orchestrators, Spark, Kafka/Kinesis, feature stores, vector + DBs' + startOffset: 1603 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1603 + endOffset: 1756 +- name: 'Modern data stack choices: Upsolver, Snowflake, Databricks, build vs buy' + startOffset: 1756 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1756 + endOffset: 1977 +- name: Data staging and lakehouse patterns; managed ingestion hiding the stage + startOffset: 1977 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1977 + endOffset: 2230 +- name: 'Ingestion pre-processing: deduplication, ordering guarantees, PII masking' + startOffset: 2230 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2230 + endOffset: 2363 +- name: 'Transformation and data modeling: entities, foreign keys, and business mappings' + startOffset: 2363 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2363 + endOffset: 2585 +- name: Marts, dashboards and translating business questions into metrics + startOffset: 2585 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2585 + endOffset: 2697 +- name: 'ML pipeline specifics: feature engineering, model training, and serving' + startOffset: 2697 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2697 + endOffset: 2877 +- name: Translating academic data/physics skills to industry pipelines + startOffset: 2877 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2877 + endOffset: 3174 +- name: Persona-driven pipeline design and real use-case examples + startOffset: 3174 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3174 + endOffset: 3356 +- name: 'Career advice: value of being a generalist and closing skill gaps' + startOffset: 3356 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3356 + endOffset: 3409 +- name: 'Learning strategy: vetting sources, networking, and engineering blogs' + startOffset: 3409 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3409 + endOffset: 3556 +- name: 'Recommended resources: Fundamentals of Data Engineering, Airflow guides, + whitepapers' + startOffset: 3556 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3556 + endOffset: 3673 +- name: Episode Closing and links + startOffset: 3673 + url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3673 + endOffset: 3583 + transcript: - header: Episode Introduction - header: 'Career journey: CERN researcher → NLP, ML engineering, Python, Astronomer, @@ -1001,111 +1096,6 @@ transcript: sec: 3673 time: '1:01:13' who: Santona -description: Master modern data pipelines with dbt transforms and Airflow orchestration—streamline - ingestion, speed feature engineering and analytics delivery. -intro: How do you build a modern data pipeline that reliably moves raw events through - ingestion, dbt transformations, Airflow orchestration and into production ML and - analytics? In this episode, Santona Tuli — a former CERN researcher turned ML and - data engineering lead at Upsolver — walks through practical patterns and trade-offs - for end-to-end pipelines. Drawing on experience from particle-physics event analysis - to NLP and workflow authoring with Airflow, Santona explains where ingestion engines - and declarative SQL frameworks fit, and when dbt belongs in the stack.

- Topics include Upsolver vs dbt (pipeline authoring, execution engine and ingestion - focus), differences between ML pipelines and analytics pipelines, MLOps vs DataOps, - and dbt’s role in analytics engineering. We cover tooling (orchestrators, Spark, - Kafka/Kinesis, feature stores, vector DBs), modern data stack choices like Snowflake - and Databricks, lakehouse and staging patterns, and ingestion pre-processing needs - such as deduplication, ordering guarantees and PII masking. You’ll also hear about - transformation and data modeling (entities, foreign keys, business mappings), marts - and dashboards, feature engineering and model serving, persona-driven pipeline design, - and career-learning recommendations. Listen to gain concrete design guidance, tooling - trade-offs, and resources to build scalable data and MLOps pipelines. -dateadded: '2023-06-24' -duration: PT00H59M43S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=0 - endOffset: 90 -- name: 'Career journey: CERN researcher → NLP, ML engineering, Python, Astronomer, - Upsolver' - startOffset: 90 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=90 - endOffset: 428 -- name: Transition to workflow authoring and orchestration (Airflow, Astronomer) - startOffset: 428 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=428 - endOffset: 648 -- name: 'Upsolver vs DBT: pipeline authoring, execution engine, and ingestion focus' - startOffset: 648 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=648 - endOffset: 805 -- name: Comparing ML pipelines and analytics data pipelines - startOffset: 805 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=805 - endOffset: 1124 -- name: 'MLOps vs DataOps: operationalizing models vs business data' - startOffset: 1124 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1124 - endOffset: 1497 -- name: Analytics engineering and DBT's role in the modern data workflow - startOffset: 1497 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1497 - endOffset: 1603 -- name: 'Tooling landscape: orchestrators, Spark, Kafka/Kinesis, feature stores, vector - DBs' - startOffset: 1603 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1603 - endOffset: 1756 -- name: 'Modern data stack choices: Upsolver, Snowflake, Databricks, build vs buy' - startOffset: 1756 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1756 - endOffset: 1977 -- name: Data staging and lakehouse patterns; managed ingestion hiding the stage - startOffset: 1977 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=1977 - endOffset: 2230 -- name: 'Ingestion pre-processing: deduplication, ordering guarantees, PII masking' - startOffset: 2230 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2230 - endOffset: 2363 -- name: 'Transformation and data modeling: entities, foreign keys, and business mappings' - startOffset: 2363 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2363 - endOffset: 2585 -- name: Marts, dashboards and translating business questions into metrics - startOffset: 2585 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2585 - endOffset: 2697 -- name: 'ML pipeline specifics: feature engineering, model training, and serving' - startOffset: 2697 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2697 - endOffset: 2877 -- name: Translating academic data/physics skills to industry pipelines - startOffset: 2877 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=2877 - endOffset: 3174 -- name: Persona-driven pipeline design and real use-case examples - startOffset: 3174 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3174 - endOffset: 3356 -- name: 'Career advice: value of being a generalist and closing skill gaps' - startOffset: 3356 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3356 - endOffset: 3409 -- name: 'Learning strategy: vetting sources, networking, and engineering blogs' - startOffset: 3409 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3409 - endOffset: 3556 -- name: 'Recommended resources: Fundamentals of Data Engineering, Airflow guides, - whitepapers' - startOffset: 3556 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3556 - endOffset: 3673 -- name: Episode Closing and links - startOffset: 3673 - url: https://www.youtube.com/watch?v=kSTfhQ_SZgc&t=3673 - endOffset: 3583 --- Links: diff --git a/_podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md similarity index 94% rename from _podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md rename to _podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md index 14bd51f3..82dafce6 100644 --- a/_podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.md +++ b/_podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.md @@ -1,38 +1,107 @@ --- -description: 'Discover RAG and vector DBs strategies for search: build podcast chatbots, - optimize embeddings, reduce LLM hallucinations and boost personalization.' -intro: 'How do you modernize search systems with vector search and retrieval‑augmented - generation (RAG) without trading away relevance or inviting hallucinations? In this - episode we talk with a search practitioner rooted in information retrieval who has - worked with Solr, Lucene and the Semantic Web era and later in search consulting - and teaching at Lucidworks and OpenSource Connections.

We cover the arc - from classic keyword search to NLP, embeddings and vector databases (including Qdrant - and plug‑and‑play vector search), and practical migration decisions: when to add - vectors to an existing stack versus adopting a standalone vector DB. You’ll hear - concrete guidance on RAG concepts to reduce LLM hallucinations, building a chatbot - from podcast transcripts using Whisper, ingest strategies (chunking, overlap, embedding - models), and orchestration with tools like LangChain. The episode also digs into - prompt design, citation strategies, multi‑level RAG evaluation with human‑in‑the‑loop - testing, and personalization approaches such as session‑based recommendations and - re‑ranking.

Listen to gain actionable techniques for vector search, embeddings, - RAG pipelines, evaluation metrics, and resources to deepen your knowledge.' +title: "Modern Search Systems: Vector Databases, LLMs and Semantic Retrieval" +short: "Searching Beyond the Surface: Navigating Challenges and Innovations in Search Technologies" +season: 17 episode: 2 guests: - atitaarora +image: images/podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.jpg ids: - anchor: atatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps + anchor: datatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps youtube: _fbe1QyJ1PY -image: images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Navigating-Challenges-and-Innovations-in-Search-Technologies---Atita-Arora-e2d7rps apple: https://podcasts.apple.com/us/podcast/navigating-challenges-and-innovations-in-search/id1541710331?i=1000639476594 spotify: https://open.spotify.com/episode/7mUMvxP4Efyeh0lhF5CvT6?si=7qqKrsMfQxaZy435s3XIEA youtube: https://www.youtube.com/watch?v=_fbe1QyJ1PY -season: 17 -short: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search - Technologies' -title: 'Searching Beyond the Surface: Navigating Challenges and Innovations in Search - Technologies' +description: "Learn vector databases, LLMs & semantic retrieval: RAG, embeddings and vector search tactics to build accurate chatbots, personalized search and better ranking." +topics: +- NLP +- LLMs +- MLOps +- machine learning +- data engineering +intro: "How do modern search systems combine vector databases, LLMs, and semantic retrieval to deliver relevant, reliable results—and when should you adopt each component? In this episode Atita Arora walks through that question from both historical and practical angles. A long-time contributor to information retrieval projects (including Apache OpenNLP and Quepid) and author of posts on vectors in e-commerce and the open-source Chorus implementation, Atita brings hands-on experience plus ongoing research into evaluating RAG systems and a commitment to user-centric metrics and inclusivity.

We cover the evolution from Solr/Lucene and the Semantic Web era to NLP for query-content matching; practical vector topics such as Qdrant, plug-and-play vector search, and migration tradeoffs; and end-to-end RAG pipelines—Whisper transcripts, chunking and embedding strategies, LangChain orchestration, prompt design, citations, and multi-level evaluation with human-in-the-loop testing. You’ll also hear about session-based recommendations, personalization approaches, and curated learning resources like Intro to Information Retrieval and Vector Hub. Listen to gain actionable guidance on building and evaluating vector search and retrieval-augmented generation systems while avoiding common pitfalls like LLM hallucinations." +dateadded: 2024-01-07 +duration: PT00H59M13S +quotableClips: +- name: 'Episode Introduction: search focus and guest overview' + startOffset: 115 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=115 + endOffset: 158 +- name: Background & career beginnings in information retrieval + startOffset: 158 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=158 + endOffset: 282 +- name: 'Early search stack: Solr, Lucene and the Semantic Web era' + startOffset: 282 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=282 + endOffset: 558 +- name: 'NLP and search: matching queries to content' + startOffset: 558 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=558 + endOffset: 689 +- name: 'Search consulting & teaching: Lucidworks and OpenSource Connections' + startOffset: 689 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=689 + endOffset: 1021 +- name: 'Vector databases overview: Qdrant and plug-and-play vector search' + startOffset: 1021 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1021 + endOffset: 1227 +- name: 'Migration decisions: vectors in existing search vs. standalone DBs' + startOffset: 1227 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1227 + endOffset: 1380 +- name: 'Evolution of search: NLP, personalization, learning-to-rank and LLMs' + startOffset: 1380 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1380 + endOffset: 1838 +- name: 'RAG concepts: retrieval plus generation to reduce LLM hallucinations' + startOffset: 1838 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1838 + endOffset: 2149 +- name: Building a chatbot from podcast transcripts and Whisper + startOffset: 2149 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2149 + endOffset: 2304 +- name: 'Ingest strategy: chunking, overlap, embedding models and vectorization' + startOffset: 2304 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2304 + endOffset: 2492 +- name: 'Orchestration tools: Langchain’s role in RAG pipelines' + startOffset: 2492 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2492 + endOffset: 2569 +- name: 'Retrieval → augmentation → generation: prompt design and citations' + startOffset: 2569 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2569 + endOffset: 2889 +- name: 'RAG evaluation: multi-level metrics, offline tests and human-in-the-loop' + startOffset: 2889 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2889 + endOffset: 3052 +- name: 'Evaluation reading: Human-in-the-Loop and practical methodologies' + startOffset: 3052 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3052 + endOffset: 3127 +- name: 'Vector databases for ML: session-based recommendations and re-ranking' + startOffset: 3127 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3127 + endOffset: 3294 +- name: 'Personalization approaches: session-based vs collaborative filtering' + startOffset: 3294 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3294 + endOffset: 3470 +- name: 'Learning resources: Intro to Information Retrieval, Relevant Search, Vector + Hub' + startOffset: 3470 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3470 + endOffset: 3624 +- name: Episode wrap-up, links and next steps + startOffset: 3624 + url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3624 + endOffset: 3553 transcript: - header: 'Episode Introduction: search focus and guest overview' - line: This week, we'll talk about search. We have a very special guest today, Atita. @@ -302,7 +371,7 @@ transcript: sec: 813 time: '13:33' who: Atita -- header: 'Vector databases overview: Qdrant and plug‑and‑play vector search' +- header: 'Vector databases overview: Qdrant and plug-and-play vector search' - line: Qdrant is a vector database, right? sec: 1021 time: '17:01' @@ -423,7 +492,7 @@ transcript: sec: 1244 time: '20:44' who: Atita -- header: 'Evolution of search: NLP, personalization, learning‑to‑rank and LLMs' +- header: 'Evolution of search: NLP, personalization, learning-to-rank and LLMs' - line: As somebody who has worked for 15 years in this area, you probably started with creating indices for Lucene in something similar to MapReduce without Hadoop in there. Now it has changed significantly since then. So now we’re talking about @@ -867,7 +936,7 @@ transcript: sec: 2887 time: '48:07' who: Atita -- header: 'RAG evaluation: multi‑level metrics, offline tests and human‑in‑the‑loop' +- header: 'RAG evaluation: multi-level metrics, offline tests and human-in-the-loop' - line: And then we were talking about evaluation because, right now, I have this RAG system with all the podcasts transcripts, but now I want to see if it's working fine. I can, of course, go ahead and test it – make 3, 4, 5 queries and then see, @@ -904,7 +973,7 @@ transcript: sec: 2954 time: '49:14' who: Atita -- header: 'Evaluation reading: Human‑in‑the‑Loop and practical methodologies' +- header: 'Evaluation reading: Human-in-the-Loop and practical methodologies' - line: So there's a book you mentioned, Human in the Loop. It's a book from Manning, right? [Atita agrees] It’s by Robert Monarch. sec: 3052 @@ -942,7 +1011,7 @@ transcript: sec: 3122 time: '52:02' who: Alexey -- header: 'Vector databases for ML: session‑based recommendations and re‑ranking' +- header: 'Vector databases for ML: session-based recommendations and re-ranking' - line: I noticed that we have a question. The question is from Taras. Taras is asking, “Is there any application of vector databases for machine learning? For instance, could it be used for making the training of deep learning models faster? Maybe @@ -989,7 +1058,7 @@ transcript: sec: 3288 time: '54:48' who: Atita -- header: 'Personalization approaches: session‑based vs collaborative filtering' +- header: 'Personalization approaches: session-based vs collaborative filtering' - line: What you mentioned is… With collaborative filtering, we would need to re-do the whole thing, right? Then the vectors we do from another training will be super different from the first training. What you mentioned right now with clicks updating @@ -1122,7 +1191,7 @@ transcript: sec: 3621 time: '1:00:21' who: Atita -- header: Episode wrap‑up, links and next steps +- header: Episode wrap-up, links and next steps - line: Yes. So please make a post when you publish that evaluation article. With that, I think, that's all we have time for today. Atita, thanks a lot for joining us today. @@ -1155,88 +1224,13 @@ transcript: sec: 3668 time: '1:01:08' who: Atita -dateadded: '2024-01-07' -duration: PT00H59M13S -quotableClips: -- name: 'Episode Introduction: search focus and guest overview' - startOffset: 115 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=115 - endOffset: 158 -- name: Background & career beginnings in information retrieval - startOffset: 158 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=158 - endOffset: 282 -- name: 'Early search stack: Solr, Lucene and the Semantic Web era' - startOffset: 282 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=282 - endOffset: 558 -- name: 'NLP and search: matching queries to content' - startOffset: 558 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=558 - endOffset: 689 -- name: 'Search consulting & teaching: Lucidworks and OpenSource Connections' - startOffset: 689 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=689 - endOffset: 1021 -- name: 'Vector databases overview: Qdrant and plug‑and‑play vector search' - startOffset: 1021 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1021 - endOffset: 1227 -- name: 'Migration decisions: vectors in existing search vs. standalone DBs' - startOffset: 1227 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1227 - endOffset: 1380 -- name: 'Evolution of search: NLP, personalization, learning‑to‑rank and LLMs' - startOffset: 1380 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1380 - endOffset: 1838 -- name: 'RAG concepts: retrieval plus generation to reduce LLM hallucinations' - startOffset: 1838 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=1838 - endOffset: 2149 -- name: Building a chatbot from podcast transcripts and Whisper - startOffset: 2149 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2149 - endOffset: 2304 -- name: 'Ingest strategy: chunking, overlap, embedding models and vectorization' - startOffset: 2304 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2304 - endOffset: 2492 -- name: 'Orchestration tools: Langchain’s role in RAG pipelines' - startOffset: 2492 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2492 - endOffset: 2569 -- name: 'Retrieval → augmentation → generation: prompt design and citations' - startOffset: 2569 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2569 - endOffset: 2889 -- name: 'RAG evaluation: multi‑level metrics, offline tests and human‑in‑the‑loop' - startOffset: 2889 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=2889 - endOffset: 3052 -- name: 'Evaluation reading: Human‑in‑the‑Loop and practical methodologies' - startOffset: 3052 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3052 - endOffset: 3127 -- name: 'Vector databases for ML: session‑based recommendations and re‑ranking' - startOffset: 3127 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3127 - endOffset: 3294 -- name: 'Personalization approaches: session‑based vs collaborative filtering' - startOffset: 3294 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3294 - endOffset: 3470 -- name: 'Learning resources: Intro to Information Retrieval, Relevant Search, Vector - Hub' - startOffset: 3470 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3470 - endOffset: 3624 -- name: Episode wrap‑up, links and next steps - startOffset: 3624 - url: https://www.youtube.com/watch?v=_fbe1QyJ1PY&t=3624 - endOffset: 3553 +context: 'Search today is less about keywords and more about constructing a reliable + retrieval-plus-generation system: the core through-line is that effective modern + search combines classical IR principles (indexing, ranking, evaluation) with semantic + vector representations, embedding stores or vector databases, and LLMs—stitched + together by careful ingestion, orchestration, prompt design, and human-in-the-loop + evaluation—to deliver accurate, contextualized, and personalized answers.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/atitaarora/){:target="_blank"} diff --git a/_podcast/s10e07-dataset-creation-and-curation.md b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md similarity index 96% rename from _podcast/s10e07-dataset-creation-and-curation.md rename to _podcast/nlp-dataset-creation-annotation-tools-workflows.md index c7523333..8e78edc2 100644 --- a/_podcast/s10e07-dataset-creation-and-curation.md +++ b/_podcast/nlp-dataset-creation-annotation-tools-workflows.md @@ -1,20 +1,115 @@ --- +title: "Practical Guide to Dataset Creation & Annotation for NLP: Active Learning, Weak Supervision, Tools" +short: "Dataset Creation and Curation" +season: 10 episode: 7 guests: - christiannswart +image: images/podcast/nlp-dataset-creation-annotation-tools-workflows.jpg ids: anchor: Dataset-Creation-and-Curation---Christiaan-Swart-e1nd1f6 youtube: QggWydGrWoo -image: images/podcast/s10e07-dataset-creation-and-curation.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Dataset-Creation-and-Curation---Christiaan-Swart-e1nd1f6 apple: https://podcasts.apple.com/us/podcast/dataset-creation-and-curation-christiaan-swart/id1541710331?i=1000578975804 spotify: https://open.spotify.com/episode/26K8JrQXKwLpQelo4n4Kdi?si=e2ad35c4941446c4 youtube: https://www.youtube.com/watch?v=QggWydGrWoo -season: 10 -short: Dataset Creation and Curation -title: 'Practical Guide to Dataset Creation & Annotation for NLP: Active Learning, - Weak Supervision, Tools' + +description: "Discover dataset creation, annotation & active learning: practical annotation UX, quality metrics, prototyping tips and tooling to accelerate NLP models." +intro: "How do you create high-quality NLP datasets without breaking the budget? In this episode Christiaan Swart — an NLP practitioner with six years’ experience across email, complaints, pharma, and sales who cofounded Comtura (born from sales call transcription and CRM integration) — walks through practical methods for dataset creation and annotation.

We cover automated, manual, and hybrid pipelines; stakeholder alignment to de-risk projects; in-house vs. crowdsourcing trade-offs; and building a living annotation guidebook for ambiguous cases. Chris explains model-assisted annotation (pre-labeling and interpretability layers), capturing expert knowledge, establishing human baselines, and improving annotation UX and productivity. You’ll also hear about annotation quality metrics (inter-annotator agreement, throughput, fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, Label Studio, Snorkel, and Rubrics. Quick-start tips using IPython widgets and Fast.ai, plus privacy and multilingual considerations (GDPR, anonymization), round out the conversation.

Listen to learn actionable strategies for cost-effective dataset creation, annotation workflows, and tool choices that speed model development and produce reliable training data" +topics: +- NLP +- data +dateadded: 2022-09-09 + +duration: PT01H03M40S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=0 + endOffset: 82 +- name: 'Episode Overview: Dataset creation, curation, and annotation' + startOffset: 82 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=82 + endOffset: 144 +- name: Guest Background & Career in NLP and bio-NLP + startOffset: 144 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=144 + endOffset: 312 +- name: 'Comtura Origin: Sales call transcription and CRM integration' + startOffset: 312 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=312 + endOffset: 411 +- name: 'Dataset Creation Approaches: Automated, manual, and hybrid pipelines' + startOffset: 411 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=411 + endOffset: 542 +- name: 'Stakeholder Alignment: Top-down framing to de-risk projects' + startOffset: 542 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=542 + endOffset: 939 +- name: 'Annotation Strategy: In-house vs. crowdsourcing trade-offs' + startOffset: 939 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=939 + endOffset: 1116 +- name: 'Annotation Guidebook: Living documentation and ambiguous cases' + startOffset: 1116 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1116 + endOffset: 1257 +- name: 'Model-Assisted Annotation: Pre-labeling and interpretability layers' + startOffset: 1257 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1257 + endOffset: 1441 +- name: 'Expert Knowledge Capture: Mind maps and task translation for annotators' + startOffset: 1441 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1441 + endOffset: 1768 +- name: 'Human Baseline & Prototyping: Validating feasibility and business value' + startOffset: 1768 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1768 + endOffset: 2102 +- name: 'Annotation UX & Productivity: Hotkeys, interfaces, and iterative gains' + startOffset: 2102 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2102 + endOffset: 2262 +- name: 'Annotation Quality Metrics: Inter-annotator agreement, throughput, fatigue' + startOffset: 2262 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2262 + endOffset: 2571 +- name: 'Active Learning in Practice: Expectations and typical gains' + startOffset: 2571 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2571 + endOffset: 2697 +- name: 'Distance Supervision & Weak Supervision: Labeling functions and Snorkel' + startOffset: 2697 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2697 + endOffset: 2904 +- name: 'Programmatic Heuristics: Entity/verb patterns and weak label design' + startOffset: 2904 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2904 + endOffset: 3037 +- name: 'Tooling Recommendations: Prodigy, Docanno, Label Studio, Snorkel, Rubrics' + startOffset: 3037 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3037 + endOffset: 3154 +- name: 'Portfolio Advice: Building career projects via dataset creation' + startOffset: 3154 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3154 + endOffset: 3438 +- name: 'Quick-start Collection: IPython widgets and Fast.ai for beginners' + startOffset: 3438 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3438 + endOffset: 3506 +- name: 'Privacy & Multilingual NLP: GDPR, anonymization, and language challenges' + startOffset: 3506 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3506 + endOffset: 3820 +- name: 'Contact & Resources: Blog, company, and social links' + startOffset: 3820 + url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3820 + endOffset: 3820 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Dataset creation, curation, and annotation' @@ -36,7 +131,7 @@ transcript: sec: 139 time: '2:19' who: Christiaan -- header: Guest Background & Career in NLP and bio‑NLP +- header: Guest Background & Career in NLP and bio-NLP - line: Before we go into our main topic of dataset creation and curation, let's start with your background. Can you tell us about your career journey so far? sec: 144 @@ -150,7 +245,7 @@ transcript: sec: 489 time: '8:09' who: Alexey -- header: 'Stakeholder Alignment: Top‑down framing to de‑risk projects' +- header: 'Stakeholder Alignment: Top-down framing to de-risk projects' - line: Yeah, I think this is the bottom-up view. But I think the top-down view is what I think most data scientists struggle with, actually. And I think I've made most of my mistakes from having this kind of bottom-up view rather than a more @@ -250,7 +345,7 @@ transcript: sec: 542 time: '9:02' who: Christiaan -- header: 'Annotation Strategy: In‑house vs. crowdsourcing trade‑offs' +- header: 'Annotation Strategy: In-house vs. crowdsourcing trade-offs' - line: Yeah, that was quite a lot to unpack – a lot of information. Let me try to summarize. I probably missed a few of the very important bits. When it comes to the process of actually collecting data, first of all, we need to have the process. @@ -339,7 +434,7 @@ transcript: sec: 1133 time: '18:53' who: Christiaan -- header: 'Model‑Assisted Annotation: Pre‑labeling and interpretability layers' +- header: 'Model-Assisted Annotation: Pre-labeling and interpretability layers' - line: This pre-labeling, I think I saw a tool that does something like this. Correct me if I'm wrong. We present a piece of a document and ask annotators to label it, right? It can be a part from the sales call and we say, “Okay, based on the @@ -613,7 +708,7 @@ transcript: sec: 2197 time: '36:37' who: Alexey -- header: 'Annotation Quality Metrics: Inter‑annotator agreement, throughput, fatigue' +- header: 'Annotation Quality Metrics: Inter-annotator agreement, throughput, fatigue' - line: Yeah, I think annotation user experience is massive and it's also measurable. I'm a huge fan of this whole annotation process. You can have a very quantitative and database approach to how you measure the impact of these things. For example, @@ -907,7 +1002,7 @@ transcript: sec: 3221 time: '53:41' who: Christiaan -- header: 'Quick‑start Collection: IPython widgets and Fast.ai for beginners' +- header: 'Quick-start Collection: IPython widgets and Fast.ai for beginners' - line: In my personal experience, you can just start using IPython widgets, like widgets in Jupyter Notebook. It's super easy to start with. It's not as advanced as Snorkel or Prodigy, but if you need some binary classification case, then you @@ -1048,112 +1143,6 @@ transcript: sec: 3902 time: '1:05:02' who: Alexey -description: 'Discover dataset creation, annotation & active learning: practical annotation - UX, quality metrics, prototyping tips and tooling to accelerate NLP models.' -intro: How do you create high‑quality NLP datasets without breaking the budget? In - this episode Christiaan Swart — an NLP practitioner with six years’ experience across - email, complaints, pharma, and sales who cofounded Comtura (born from sales call - transcription and CRM integration) — walks through practical methods for dataset - creation and annotation.

We cover automated, manual, and hybrid pipelines; - stakeholder alignment to de‑risk projects; in‑house vs. crowdsourcing trade‑offs; - and building a living annotation guidebook for ambiguous cases. Chris explains model‑assisted - annotation (pre‑labeling and interpretability layers), capturing expert knowledge, - establishing human baselines, and improving annotation UX and productivity. You’ll - also hear about annotation quality metrics (inter‑annotator agreement, throughput, - fatigue), active learning expectations, distant/weak supervision (Snorkel and labeling - functions), programmatic heuristics, and tooling recommendations like Prodigy, Docanno, - Label Studio, Snorkel, and Rubrics. Quick‑start tips using IPython widgets and Fast.ai, - plus privacy and multilingual considerations (GDPR, anonymization), round out the - conversation.

Listen to learn actionable strategies for cost‑effective - dataset creation, annotation workflows, and tool choices that speed model development - and produce reliable training data. -dateadded: '2022-09-09' -duration: PT01H03M40S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=0 - endOffset: 82 -- name: 'Episode Overview: Dataset creation, curation, and annotation' - startOffset: 82 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=82 - endOffset: 144 -- name: Guest Background & Career in NLP and bio‑NLP - startOffset: 144 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=144 - endOffset: 312 -- name: 'Comtura Origin: Sales call transcription and CRM integration' - startOffset: 312 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=312 - endOffset: 411 -- name: 'Dataset Creation Approaches: Automated, manual, and hybrid pipelines' - startOffset: 411 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=411 - endOffset: 542 -- name: 'Stakeholder Alignment: Top‑down framing to de‑risk projects' - startOffset: 542 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=542 - endOffset: 939 -- name: 'Annotation Strategy: In‑house vs. crowdsourcing trade‑offs' - startOffset: 939 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=939 - endOffset: 1116 -- name: 'Annotation Guidebook: Living documentation and ambiguous cases' - startOffset: 1116 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1116 - endOffset: 1257 -- name: 'Model‑Assisted Annotation: Pre‑labeling and interpretability layers' - startOffset: 1257 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1257 - endOffset: 1441 -- name: 'Expert Knowledge Capture: Mind maps and task translation for annotators' - startOffset: 1441 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1441 - endOffset: 1768 -- name: 'Human Baseline & Prototyping: Validating feasibility and business value' - startOffset: 1768 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=1768 - endOffset: 2102 -- name: 'Annotation UX & Productivity: Hotkeys, interfaces, and iterative gains' - startOffset: 2102 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2102 - endOffset: 2262 -- name: 'Annotation Quality Metrics: Inter‑annotator agreement, throughput, fatigue' - startOffset: 2262 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2262 - endOffset: 2571 -- name: 'Active Learning in Practice: Expectations and typical gains' - startOffset: 2571 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2571 - endOffset: 2697 -- name: 'Distance Supervision & Weak Supervision: Labeling functions and Snorkel' - startOffset: 2697 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2697 - endOffset: 2904 -- name: 'Programmatic Heuristics: Entity/verb patterns and weak label design' - startOffset: 2904 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=2904 - endOffset: 3037 -- name: 'Tooling Recommendations: Prodigy, Docanno, Label Studio, Snorkel, Rubrics' - startOffset: 3037 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3037 - endOffset: 3154 -- name: 'Portfolio Advice: Building career projects via dataset creation' - startOffset: 3154 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3154 - endOffset: 3438 -- name: 'Quick‑start Collection: IPython widgets and Fast.ai for beginners' - startOffset: 3438 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3438 - endOffset: 3506 -- name: 'Privacy & Multilingual NLP: GDPR, anonymization, and language challenges' - startOffset: 3506 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3506 - endOffset: 3820 -- name: 'Contact & Resources: Blog, company, and social links' - startOffset: 3820 - url: https://www.youtube.com/watch?v=QggWydGrWoo&t=3820 - endOffset: 3820 --- Links: diff --git a/_podcast/s06e08-nlp-teams.md b/_podcast/nlp-team-hiring-and-production-mlops.md similarity index 96% rename from _podcast/s06e08-nlp-teams.md rename to _podcast/nlp-team-hiring-and-production-mlops.md index 6c8b527f..ccb0b3ec 100644 --- a/_podcast/s06e08-nlp-teams.md +++ b/_podcast/nlp-team-hiring-and-production-mlops.md @@ -1,12 +1,11 @@ --- -title: 'Lead NLP Teams: Hiring, Production Pipelines, MLOps & LLM Tradeoffs (GPT-3, - spaCy)' -short: Leading NLP Teams -guests: -- ivanbilan -image: images/podcast/s06e08-nlp-teams.jpg +title: "Lead NLP Teams: Hiring, Production Pipelines, MLOps & LLM Tradeoffs (GPT-3, spaCy)" +short: "Leading NLP Teams" season: 6 episode: 8 +guests: +- ivanbilan +image: images/podcast/nlp-team-hiring-and-production-mlops.jpg ids: youtube: RJEf6mzxh1w anchor: Leading-NLP-Teams---Ivan-Bilan-e1c4929 @@ -15,6 +14,127 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Leading-NLP-Teams---Ivan-Bilan-e1c4929 spotify: https://open.spotify.com/episode/0jE1rpmLCYkD3GnUa2E7E3 apple: https://podcasts.apple.com/us/podcast/leading-nlp-teams-ivan-bilan/id1541710331?i=1000546053682 + +description: "Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 & spaCy tactics to deploy, monitor and scale reliable LLM systems" +intro: "How do you structure an NLP team and build reliable production pipelines while weighing the tradeoffs between GPT-3 and in-house models? In this episode, Ivan Bilan, Engineering Manager at Personio working on Identity and Access Management, walks through practical answers from his transition from linguistics to production NLP and MLOps.

We cover hiring and team models (centralized vs cross-disciplinary), what to look for in NLP engineers (tokenization, linguistics, deployment skills), and when to bring in linguists or conversational designers. Ivan breaks down the anatomy of an NLP production pipeline—data annotation, task engineering, testing, deployment, observability—and contrasts using GPT-3 with building in-house pipelines and open-source tools like spaCy and Hugging Face for MVPs. He discusses inference optimization, privacy and bias risks with large language models, benchmarking limits, and practical microservice patterns for data-intensive apps.

Listen to learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production" +topics: +- NLP +- machine learning +- MLOps +- data teams +- LLMs +- leadership +- career growth +- production +dateadded: 2021-12-26 + +duration: PT00H59M09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=0 + endOffset: 114 +- name: 'Episode Overview: Leading NLP Teams & Ivan''s Current Role' + startOffset: 114 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=114 + endOffset: 175 +- name: 'Personio Role: Identity and Access Management Responsibilities' + startOffset: 175 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=175 + endOffset: 279 +- name: 'Career Origins: From Linguistics to Computational NLP' + startOffset: 279 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=279 + endOffset: 442 +- name: 'Early Tech Stack: From Perl to Python and Web Scraping' + startOffset: 442 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=442 + endOffset: 522 +- name: 'Technical Management Study: CDTM, Internships, and Organizational Learning' + startOffset: 522 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=522 + endOffset: 714 +- name: 'Management Transition: From ML Teams to Web Product Engineering & Observability' + startOffset: 714 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=714 + endOffset: 847 +- name: 'Defining NLP Teams: Centralized vs Cross-disciplinary Structures' + startOffset: 847 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=847 + endOffset: 1005 +- name: 'NLP Engineer Role: Skills, Linguistics Background, and Tokenization Expertise' + startOffset: 1005 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1005 + endOffset: 1156 +- name: 'Path to Becoming an NLP Engineer: Practical Resources, spaCy & Hugging Face' + startOffset: 1156 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1156 + endOffset: 1351 +- name: 'Vision vs Text: Comparing Computer Vision and NLP Challenges' + startOffset: 1351 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1351 + endOffset: 1476 +- name: 'NLP Engineer vs ML Engineer: Inference Optimization, Deployment & MLOps' + startOffset: 1476 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1476 + endOffset: 1579 +- name: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non-coding Roles' + startOffset: 1579 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1579 + endOffset: 1718 +- name: 'Linguists in NLP: Parsing, Information Extraction & Multilingual Needs' + startOffset: 1718 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1718 + endOffset: 1811 +- name: 'When to Hire NLP Specialists: Task Complexity, Data Needs & Feature Engineering' + startOffset: 1811 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1811 + endOffset: 1941 +- name: 'Future of NLP: Library Ecosystem, AutoML & Research Velocity' + startOffset: 1941 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1941 + endOffset: 2097 +- name: 'NLP Pipeline Anatomy: Data Annotation, Task Engineering, Testing, Production + & Observability' + startOffset: 2097 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2097 + endOffset: 2335 +- name: 'Large Language Models & Prompting: GPT-3 Capabilities and Simplification' + startOffset: 2335 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2335 + endOffset: 2585 +- name: 'GPT-3 Limitations: Cost, Control, Bias & Privacy Risks' + startOffset: 2585 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2585 + endOffset: 2770 +- name: 'GPT-3 vs In-house Pipelines: MVP Strategy, Control & Open-Source Alternatives' + startOffset: 2770 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2770 + endOffset: 2919 +- name: 'What NLP Really Is: Industry Productization vs Academic Linguistic Research' + startOffset: 2919 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2919 + endOffset: 3177 +- name: 'AI Benchmarking: Human-level Claims, Dataset Limits & Real-world Gaps' + startOffset: 3177 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3177 + endOffset: 3225 +- name: 'Machine Translation State: Google Translate, DeepL, Data Coverage & Language + Pairs' + startOffset: 3225 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3225 + endOffset: 3488 +- name: 'NLP Pandect & Related Projects: GitHub Resources for NLP, Microservices & + Engineering Managers' + startOffset: 3488 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3488 + endOffset: 3641 +- name: 'Contact & Resources: LinkedIn, Presentation Links and Further Reading' + startOffset: 3641 + url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3641 + endOffset: 3549 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Leading NLP Teams & Ivan''s Current Role' @@ -239,7 +359,7 @@ transcript: sec: 816 time: '13:36' who: Alexey -- header: 'Defining NLP Teams: Centralized vs Cross‑disciplinary Structures' +- header: 'Defining NLP Teams: Centralized vs Cross-disciplinary Structures' - line: Yeah, good question. It's more of an industry question. Do we even have separate designation for NLP teams? I think maybe a few years ago, this wasn't the case. You would just have a data science team and everything data science is done there @@ -454,7 +574,7 @@ transcript: sec: 1489 time: '24:49' who: Alexey -- header: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non‑coding Roles' +- header: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non-coding Roles' - line: Yeah, for sure. There are some specific tasks that would really benefit from that. I think that in the last two years, there was a new role forming in the world of data science, called “conversational designer”. It's basically a person @@ -656,7 +776,7 @@ transcript: sec: 2325 time: '38:45' who: Alexey -- header: 'Large Language Models & Prompting: GPT‑3 Capabilities and Simplification' +- header: 'Large Language Models & Prompting: GPT-3 Capabilities and Simplification' - line: Yeah, GPT-3 is on a whole different level. You don't need to do anything, really. The idea of GPT-3 is that it’s a smart lookup table. It has seen, I think, like 10% of the whole internet. That's what the data set was used to train it. @@ -748,7 +868,7 @@ transcript: sec: 2577 time: '42:57' who: Alexey -- header: 'GPT‑3 Limitations: Cost, Control, Bias & Privacy Risks' +- header: 'GPT-3 Limitations: Cost, Control, Bias & Privacy Risks' - line: Yeah. I mean, I don't know. They are trying to open source it now or something. I don't know. But I think you still have to pay for tokens in order to be able to use it. @@ -821,7 +941,7 @@ transcript: sec: 2750 time: '45:50' who: Alexey -- header: 'GPT‑3 vs In‑house Pipelines: MVP Strategy, Control & Open‑Source Alternatives' +- header: 'GPT-3 vs In-house Pipelines: MVP Strategy, Control & Open-Source Alternatives' - line: I would say, yes, because GPT-3 still isn't able to solve everything. It is able to solve most of the tasks to a good extent. But the question is, “Can it actually solve everything you need for it to be used in production – for it to @@ -927,7 +1047,7 @@ transcript: sec: 3160 time: '52:40' who: Alexey -- header: 'AI Benchmarking: Human‑level Claims, Dataset Limits & Real‑world Gaps' +- header: 'AI Benchmarking: Human-level Claims, Dataset Limits & Real-world Gaps' - line: Yeah, I don't think so. I don't think there is any problem we have fully solved. There are papers that state something like “AI models are as good as humans” or “better than humans”. But this is all evaluated on a very small subset of data. @@ -1083,128 +1203,6 @@ transcript: sec: 3663 time: '1:01:03' who: Ivan -description: Learn practical NLP teams hiring, production pipelines and MLOps tradeoffs—GPT-3 - & spaCy tactics to deploy, monitor and scale reliable LLM systems. -intro: How do you structure an NLP team and build reliable production pipelines while - weighing the tradeoffs between GPT‑3 and in‑house models? In this episode, Ivan Bilan, - Engineering Manager at Personio working on Identity and Access Management, walks - through practical answers from his transition from linguistics to production NLP - and MLOps.

We cover hiring and team models (centralized vs cross‑disciplinary), - what to look for in NLP engineers (tokenization, linguistics, deployment skills), - and when to bring in linguists or conversational designers. Ivan breaks down the - anatomy of an NLP production pipeline—data annotation, task engineering, testing, - deployment, observability—and contrasts using GPT‑3 with building in‑house pipelines - and open‑source tools like spaCy and Hugging Face for MVPs. He discusses inference - optimization, privacy and bias risks with large language models, benchmarking limits, - and practical microservice patterns for data‑intensive apps.

Listen to - learn actionable guidance on hiring NLP talent, designing MLOps workflows, choosing - between LLMs and bespoke models, and the concrete tradeoffs you’ll face in production. -dateadded: '2021-12-26' -duration: PT00H59M09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=0 - endOffset: 114 -- name: 'Episode Overview: Leading NLP Teams & Ivan''s Current Role' - startOffset: 114 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=114 - endOffset: 175 -- name: 'Personio Role: Identity and Access Management Responsibilities' - startOffset: 175 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=175 - endOffset: 279 -- name: 'Career Origins: From Linguistics to Computational NLP' - startOffset: 279 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=279 - endOffset: 442 -- name: 'Early Tech Stack: From Perl to Python and Web Scraping' - startOffset: 442 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=442 - endOffset: 522 -- name: 'Technical Management Study: CDTM, Internships, and Organizational Learning' - startOffset: 522 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=522 - endOffset: 714 -- name: 'Management Transition: From ML Teams to Web Product Engineering & Observability' - startOffset: 714 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=714 - endOffset: 847 -- name: 'Defining NLP Teams: Centralized vs Cross‑disciplinary Structures' - startOffset: 847 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=847 - endOffset: 1005 -- name: 'NLP Engineer Role: Skills, Linguistics Background, and Tokenization Expertise' - startOffset: 1005 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1005 - endOffset: 1156 -- name: 'Path to Becoming an NLP Engineer: Practical Resources, spaCy & Hugging Face' - startOffset: 1156 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1156 - endOffset: 1351 -- name: 'Vision vs Text: Comparing Computer Vision and NLP Challenges' - startOffset: 1351 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1351 - endOffset: 1476 -- name: 'NLP Engineer vs ML Engineer: Inference Optimization, Deployment & MLOps' - startOffset: 1476 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1476 - endOffset: 1579 -- name: 'Conversational Designers: Chatbot UX, Dialogue Flow & Non‑coding Roles' - startOffset: 1579 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1579 - endOffset: 1718 -- name: 'Linguists in NLP: Parsing, Information Extraction & Multilingual Needs' - startOffset: 1718 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1718 - endOffset: 1811 -- name: 'When to Hire NLP Specialists: Task Complexity, Data Needs & Feature Engineering' - startOffset: 1811 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1811 - endOffset: 1941 -- name: 'Future of NLP: Library Ecosystem, AutoML & Research Velocity' - startOffset: 1941 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=1941 - endOffset: 2097 -- name: 'NLP Pipeline Anatomy: Data Annotation, Task Engineering, Testing, Production - & Observability' - startOffset: 2097 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2097 - endOffset: 2335 -- name: 'Large Language Models & Prompting: GPT‑3 Capabilities and Simplification' - startOffset: 2335 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2335 - endOffset: 2585 -- name: 'GPT‑3 Limitations: Cost, Control, Bias & Privacy Risks' - startOffset: 2585 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2585 - endOffset: 2770 -- name: 'GPT‑3 vs In‑house Pipelines: MVP Strategy, Control & Open‑Source Alternatives' - startOffset: 2770 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2770 - endOffset: 2919 -- name: 'What NLP Really Is: Industry Productization vs Academic Linguistic Research' - startOffset: 2919 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=2919 - endOffset: 3177 -- name: 'AI Benchmarking: Human‑level Claims, Dataset Limits & Real‑world Gaps' - startOffset: 3177 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3177 - endOffset: 3225 -- name: 'Machine Translation State: Google Translate, DeepL, Data Coverage & Language - Pairs' - startOffset: 3225 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3225 - endOffset: 3488 -- name: 'NLP Pandect & Related Projects: GitHub Resources for NLP, Microservices & - Engineering Managers' - startOffset: 3488 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3488 - endOffset: 3641 -- name: 'Contact & Resources: LinkedIn, Presentation Links and Further Reading' - startOffset: 3641 - url: https://www.youtube.com/watch?v=RJEf6mzxh1w&t=3641 - endOffset: 3549 --- Links: diff --git a/_podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md similarity index 95% rename from _podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md rename to _podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md index 0ee2b285..71066dbe 100644 --- a/_podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.md +++ b/_podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.md @@ -1,20 +1,108 @@ --- +title: "From Medicine to Machine Learning: Skill Stacking, Public Learning & Freelance-Driven Career Building" +short: "From Medicine to Machine Learning: How Public Learning Turned into a Career" +season: 21 episode: 3 guests: - pastorsoto +image: images/podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg ids: anchor: datatalksclub/episodes/From-Medicine-to-Machine-Learning-How-Public-Learning-Turned-into-a-Career---Pastor-Soto-e376e66 youtube: 5km62e4nDaw -image: images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Medicine-to-Machine-Learning-How-Public-Learning-Turned-into-a-Career---Pastor-Soto-e376e66 apple: https://podcasts.apple.com/us/podcast/how-to-rebuild-data-trust-mindful-data-strategy-and/id1541710331?i=1000722107501 spotify: https://open.spotify.com/episode/22Gc1bDecKA33KHAaSF9fx youtube: https://www.youtube.com/watch?v=5km62e4nDaw -season: 21 -short: 'From Medicine to Machine Learning: How Public Learning Turned into a Career' -title: 'Medicine to Machine Learning: Build Healthcare ML Portfolio, Freelance on - Upwork & Deploy to Cloud' +description: "Learn how to build a healthcare ML portfolio, land Upwork freelance gigs and deploy Dockerized models to AWS—practical tips, capstones, and career strategies" +topics: +- machine learning +- data science +- data engineering +- MLOps +- career transition +intro: "How do you go from medical school to shipping production-ready healthcare ML—and get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine learning engineer and mentor who transitioned from medicine and criminology into production ML, walks through the practical steps he used to build a healthcare ML portfolio and freelance career.

We cover his career trajectory (statistician → data analyst → data engineer), the skill progression from SPSS and R to Python, and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp and public learning—publishing exercises, leaderboards, and focused content—attracted interviews and opportunities. He also breaks down portfolio tactics (Notion notes, capstone projects using healthcare datasets), production topics (Dockerized models, AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and soft skills like English communication and handling critique.

Listeners will come away with concrete, repeatable strategies for building a healthcare machine learning portfolio, landing freelance work, and deploying models to the cloud—plus time-management and mentoring practices that make it sustainable" +dateadded: 2025-08-22 +duration: PT01H01M07S +quotableClips: +- name: Podcast Introduction & Event Announcements + startOffset: 0 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=0 + endOffset: 94 +- name: 'Guest Overview: Transition from Medicine and Criminology to Machine Learning + Career' + startOffset: 94 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=94 + endOffset: 201 +- name: 'Career Trajectory: Statistician → Data Analyst → Data Engineer' + startOffset: 201 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=201 + endOffset: 351 +- name: 'Skill Progression: SPSS, Excel, R, and Transition to Python' + startOffset: 351 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=351 + endOffset: 365 +- name: 'Freelancing Beginnings: First Upwork Gigs and Early Projects' + startOffset: 365 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=365 + endOffset: 548 +- name: 'Learning-by-Doing: Accepting Unknown Projects to Build Skills' + startOffset: 548 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=548 + endOffset: 704 +- name: 'Balancing Dual Paths: Medical School and Data Work' + startOffset: 704 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=704 + endOffset: 828 +- name: 'Medical Reasoning in Data Science: Probability, Reranking, and Triage' + startOffset: 828 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=828 + endOffset: 869 +- name: 'Communication Skills: Improving English for Remote Work' + startOffset: 869 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=869 + endOffset: 1443 +- name: 'Live Cohorts & ML Zoom Camp: Benefits of Structured, Hands-On Learning' + startOffset: 1443 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1443 + endOffset: 1647 +- name: 'Public Learning Strategy: Leaderboards, Posting, and Personal Branding' + startOffset: 1647 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1647 + endOffset: 1820 +- name: 'Content Framing: Owning Topics (ROC, Classifier Evaluation)' + startOffset: 1820 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1820 + endOffset: 1970 +- name: 'Recruiter Outreach: LinkedIn Visibility and Meta Interview Experience' + startOffset: 1970 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1970 + endOffset: 2116 +- name: 'Handling Critique: Social Media Feedback and Community Engagement' + startOffset: 2116 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2116 + endOffset: 2463 +- name: 'Portfolio Building: Notes, Notion, and Structured Content Workflow' + startOffset: 2463 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2463 + endOffset: 2868 +- name: 'Capstone Projects: Healthcare Datasets, Dockerized Models, and AWS Deployment' + startOffset: 2868 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2868 + endOffset: 3053 +- name: 'Community Contribution: Mentoring with DeepLearning.AI and Stanford Coding + Place' + startOffset: 3053 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3053 + endOffset: 3420 +- name: 'Time Management: Productivity Strategies for Medical Students and ML Learners' + startOffset: 3420 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3420 + endOffset: 3600 +- name: 'Final Reflections: Consistency, Career Next Steps, and Motivation' + startOffset: 3600 + url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3600 + endOffset: 3667 transcript: - header: Podcast Introduction & Event Announcements - line: Hi everyone, welcome to our event. This event is presented by Redox Club, @@ -859,106 +947,11 @@ transcript: sec: 3667 time: '1:01:07' who: Alexey -description: Learn how to build a healthcare ML portfolio, land Upwork freelance gigs - and deploy Dockerized models to AWS—practical tips, capstones, and career strategies. -intro: How do you go from medical school to shipping production-ready healthcare ML—and - get paid for it on platforms like Upwork? In this episode, Pastor Soto, a machine - learning engineer and mentor who transitioned from medicine and criminology into - production ML, walks through the practical steps he used to build a healthcare ML - portfolio and freelance career.

We cover his career trajectory (statistician - → data analyst → data engineer), the skill progression from SPSS and R to Python, - and the first Upwork gigs that taught him by doing. Pastor explains how ML Zoomcamp - and public learning—publishing exercises, leaderboards, and focused content—attracted - interviews and opportunities. He also breaks down portfolio tactics (Notion notes, - capstone projects using healthcare datasets), production topics (Dockerized models, - AWS deployment, wiring APIs, feeding LLMs), recruiter visibility on LinkedIn, and - soft skills like English communication and handling critique.

Listeners - will come away with concrete, repeatable strategies for building a healthcare machine - learning portfolio, landing freelance work, and deploying models to the cloud—plus - time-management and mentoring practices that make it sustainable. -dateadded: '2025-08-22' -duration: PT01H01M07S -quotableClips: -- name: Podcast Introduction & Event Announcements - startOffset: 0 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=0 - endOffset: 94 -- name: 'Guest Overview: Transition from Medicine and Criminology to Machine Learning - Career' - startOffset: 94 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=94 - endOffset: 201 -- name: 'Career Trajectory: Statistician → Data Analyst → Data Engineer' - startOffset: 201 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=201 - endOffset: 351 -- name: 'Skill Progression: SPSS, Excel, R, and Transition to Python' - startOffset: 351 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=351 - endOffset: 365 -- name: 'Freelancing Beginnings: First Upwork Gigs and Early Projects' - startOffset: 365 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=365 - endOffset: 548 -- name: 'Learning-by-Doing: Accepting Unknown Projects to Build Skills' - startOffset: 548 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=548 - endOffset: 704 -- name: 'Balancing Dual Paths: Medical School and Data Work' - startOffset: 704 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=704 - endOffset: 828 -- name: 'Medical Reasoning in Data Science: Probability, Reranking, and Triage' - startOffset: 828 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=828 - endOffset: 869 -- name: 'Communication Skills: Improving English for Remote Work' - startOffset: 869 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=869 - endOffset: 1443 -- name: 'Live Cohorts & ML Zoom Camp: Benefits of Structured, Hands-On Learning' - startOffset: 1443 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1443 - endOffset: 1647 -- name: 'Public Learning Strategy: Leaderboards, Posting, and Personal Branding' - startOffset: 1647 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1647 - endOffset: 1820 -- name: 'Content Framing: Owning Topics (ROC, Classifier Evaluation)' - startOffset: 1820 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1820 - endOffset: 1970 -- name: 'Recruiter Outreach: LinkedIn Visibility and Meta Interview Experience' - startOffset: 1970 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=1970 - endOffset: 2116 -- name: 'Handling Critique: Social Media Feedback and Community Engagement' - startOffset: 2116 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2116 - endOffset: 2463 -- name: 'Portfolio Building: Notes, Notion, and Structured Content Workflow' - startOffset: 2463 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2463 - endOffset: 2868 -- name: 'Capstone Projects: Healthcare Datasets, Dockerized Models, and AWS Deployment' - startOffset: 2868 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=2868 - endOffset: 3053 -- name: 'Community Contribution: Mentoring with DeepLearning.AI and Stanford Coding - Place' - startOffset: 3053 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3053 - endOffset: 3420 -- name: 'Time Management: Productivity Strategies for Medical Students and ML Learners' - startOffset: 3420 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3420 - endOffset: 3600 -- name: 'Final Reflections: Consistency, Career Next Steps, and Motivation' - startOffset: 3600 - url: https://www.youtube.com/watch?v=5km62e4nDaw&t=3600 - endOffset: 3667 +context: 'A deliberate, project-first career pivot: leveraging medical and statistical + domain knowledge while learning by doing—through freelance projects, structured + cohorts, public-facing content, and portfolio/dev-ops work—to build practical ML + skills, visibility, and job readiness while balancing time and commitments.' --- - Links: * [Twitter](https://x.com/PastorSotoB1){:target="_blank"} diff --git a/_podcast/s17e07-make-impact-through-volunteering-open-source-work.md b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md similarity index 96% rename from _podcast/s17e07-make-impact-through-volunteering-open-source-work.md rename to _podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md index dd9e21bc..e895322b 100644 --- a/_podcast/s17e07-make-impact-through-volunteering-open-source-work.md +++ b/_podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.md @@ -1,20 +1,128 @@ --- +title: "Open Source and Volunteering: Building AI Projects and Career Momentum" +short: "Make an Impact Through Volunteering Open Source Work" +season: 17 episode: 7 guests: - saraelateif +image: images/podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg ids: - anchor: atatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan + anchor: datatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan youtube: aHdaIwOEI8Q -image: images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Make-an-Impact-Through-Volunteering-Open-Source-Work---Sara-EL-ATEIF-e2g4dan apple: https://podcasts.apple.com/us/podcast/make-an-impact-through-volunteering-open-source-work/id1541710331?i=1000646627892 spotify: https://open.spotify.com/episode/7tZSSgv1yAlnoMyB4ggQmb?si=AqDaME2QS26usoZjOEWNtQ youtube: https://www.youtube.com/watch?v=aHdaIwOEI8Q -season: 17 -short: Make an Impact Through Volunteering Open Source Work -title: 'Boost Your AI Career: Volunteer Open-Source Projects, Hackathon Strategy & - Data Sourcing' +description: "Learn open source volunteering tactics for AI projects - data sourcing, hackathon MVP strategy, mentorship and portfolio-building to accelerate career momentum." +topics: +- computer vision +- machine learning +- data engineering +- open-source +- career development +- mentorship +- career growth +intro: "How can volunteering in open source AI projects accelerate your career while delivering tangible community impact? In this episode Sara El-Ateif — Google Developer Expert in Machine Learning, Google PhD Fellow, co-founder of AI Wonder Girls and Evercoach-certified business coach — walks through practical ways to build skills and momentum through volunteering and open source work.

We cover Sara's path from early AI interest to PhD research in multimodal learning and medical imaging, plus lessons from winning a Google PhD Fellowship. Hear concrete volunteer project case studies — a PTSD chatbot, trash detection, and cervical spine segmentation — and learn data sourcing tactics using Open Images and creative collection. Sara explains how to find opportunities (LinkedIn, social media, mailing lists, WIML), differences between collaboration platforms like Omdena and Fruit Punch AI, and how women-led groups structure projects.

Listeners will get actionable advice on hackathon strategy, MVP mindset under data/compute constraints, pitching for volunteer roles, building a research network, and the data engineering tasks that matter (pipelines, dashboards, prep). Tune in to discover how open source and volunteering translate into practical experience, referrals, and career traction in machine learning." +dateadded: 2024-02-29 +duration: PT00H59M34S +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=0 + endOffset: 103 +- name: 'Episode Overview: Volunteering, Open Source & Community Impact' + startOffset: 103 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=103 + endOffset: 157 +- name: 'Career Origins: Early AI Interest and Education Path' + startOffset: 157 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=157 + endOffset: 266 +- name: 'Academic Focus: Big Data Specialization and Computer Vision' + startOffset: 266 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=266 + endOffset: 346 +- name: 'PhD Research: Multimodal Learning for COVID-19 & Medical Imaging' + startOffset: 346 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=346 + endOffset: 473 +- name: 'Google PhD Fellowship: Application Strategy and Benefits' + startOffset: 473 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=473 + endOffset: 668 +- name: 'Volunteer Projects Overview: PTSD Chatbot and Trash Detection Cases' + startOffset: 668 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=668 + endOffset: 849 +- name: 'Medical Imaging Project: Cervical Spine Segmentation Work' + startOffset: 849 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=849 + endOffset: 965 +- name: 'Data Sourcing Techniques: Open Images and Creative Collection' + startOffset: 965 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=965 + endOffset: 1068 +- name: 'Opportunity Hunting: LinkedIn, Social Media, and Mailing Lists' + startOffset: 1068 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1068 + endOffset: 1225 +- name: 'Productivity Tips: Curated Feeds and Managing Social Media Time' + startOffset: 1225 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1225 + endOffset: 1424 +- name: 'Platform Differences: Omdena vs. Fruit Punch AI Collaboration Models' + startOffset: 1424 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1424 + endOffset: 1585 +- name: 'Joining Challenges: Beginner Support, Roles, and Team Dynamics' + startOffset: 1585 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1585 + endOffset: 1622 +- name: 'Women-Led AI Groups: Community Formation and Project Workflow' + startOffset: 1622 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1622 + endOffset: 1871 +- name: 'Hackathon Case Study: Medical Imaging Solution, Mentors, and Deliverables' + startOffset: 1871 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1871 + endOffset: 2192 +- name: 'Hackathon Strategy: Understanding Judges, Criteria, and Positioning' + startOffset: 2192 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2192 + endOffset: 2387 +- name: 'MVP Mindset: Overcoming Data and Compute Constraints' + startOffset: 2387 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2387 + endOffset: 2527 +- name: 'Data Creativity: Generative AI, Research, and Team Composition' + startOffset: 2527 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2527 + endOffset: 2724 +- name: 'Building a Research Network: Finding and Following Researchers on Twitter' + startOffset: 2724 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2724 + endOffset: 2922 +- name: 'Applying to Volunteer Projects: Interview Pitching and Relevant Skills' + startOffset: 2922 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2922 + endOffset: 3081 +- name: 'Volunteer Outcomes: Practical Experience, Referrals, and Soft Skills' + startOffset: 3081 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3081 + endOffset: 3365 +- name: 'Roles for Data Engineers: Data Preparation, Pipelines, and Dashboards' + startOffset: 3365 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3365 + endOffset: 3497 +- name: 'Opportunity Sources: WIML, Conference Feeds, and Newsletters' + startOffset: 3497 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3497 + endOffset: 3624 +- name: Closing Remarks and How to Connect + startOffset: 3624 + url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3624 + endOffset: 3574 transcript: - header: Podcast Introduction - header: 'Episode Overview: Volunteering, Open Source & Community Impact' @@ -1219,126 +1327,13 @@ transcript: sec: 3677 time: '1:01:17' who: Sara -description: Discover volunteer open-source projects, hackathon strategy and data - sourcing tips to build an AI portfolio, land referrals, and win medical imaging - challenges. -intro: Struggling to break into impactful AI work—what volunteer projects, hackathon - tactics, and data sourcing methods actually move your career forward? In this episode - Sara El‑Ateif, Google Developer Expert in Machine Learning, Google PhD Fellow and - co‑founder of AI Wonder Girls, walks through her path from big data and computer - vision studies to multimodal COVID‑19 research and practical volunteer projects. -

Sara breaks down real examples—PTSD chatbot, trash detection, and cervical - spine segmentation—showing how to source data (Open Images, creative collection, - generative approaches), pitch for volunteer roles, and contribute on platforms like - Omdena and Fruit Punch AI. She explains hackathon strategy—understanding judges, - defining an MVP despite limited data/compute, and building deliverables with mentors—and - outlines opportunity hunting via LinkedIn, social feeds, mailing lists, WIML and - conference channels.

Listeners will get actionable guidance on applying - to projects, roles for data engineers (data prep, pipelines, dashboards), productivity - tips, and how to build a research network. Tune in to learn concrete steps to boost - your AI career through open‑source volunteering, smarter hackathon participation, - and better data sourcing. -dateadded: '2024-02-29' -duration: PT00H59M34S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=0 - endOffset: 103 -- name: 'Episode Overview: Volunteering, Open Source & Community Impact' - startOffset: 103 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=103 - endOffset: 157 -- name: 'Career Origins: Early AI Interest and Education Path' - startOffset: 157 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=157 - endOffset: 266 -- name: 'Academic Focus: Big Data Specialization and Computer Vision' - startOffset: 266 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=266 - endOffset: 346 -- name: 'PhD Research: Multimodal Learning for COVID-19 & Medical Imaging' - startOffset: 346 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=346 - endOffset: 473 -- name: 'Google PhD Fellowship: Application Strategy and Benefits' - startOffset: 473 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=473 - endOffset: 668 -- name: 'Volunteer Projects Overview: PTSD Chatbot and Trash Detection Cases' - startOffset: 668 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=668 - endOffset: 849 -- name: 'Medical Imaging Project: Cervical Spine Segmentation Work' - startOffset: 849 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=849 - endOffset: 965 -- name: 'Data Sourcing Techniques: Open Images and Creative Collection' - startOffset: 965 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=965 - endOffset: 1068 -- name: 'Opportunity Hunting: LinkedIn, Social Media, and Mailing Lists' - startOffset: 1068 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1068 - endOffset: 1225 -- name: 'Productivity Tips: Curated Feeds and Managing Social Media Time' - startOffset: 1225 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1225 - endOffset: 1424 -- name: 'Platform Differences: Omdena vs. Fruit Punch AI Collaboration Models' - startOffset: 1424 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1424 - endOffset: 1585 -- name: 'Joining Challenges: Beginner Support, Roles, and Team Dynamics' - startOffset: 1585 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1585 - endOffset: 1622 -- name: 'Women-Led AI Groups: Community Formation and Project Workflow' - startOffset: 1622 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1622 - endOffset: 1871 -- name: 'Hackathon Case Study: Medical Imaging Solution, Mentors, and Deliverables' - startOffset: 1871 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=1871 - endOffset: 2192 -- name: 'Hackathon Strategy: Understanding Judges, Criteria, and Positioning' - startOffset: 2192 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2192 - endOffset: 2387 -- name: 'MVP Mindset: Overcoming Data and Compute Constraints' - startOffset: 2387 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2387 - endOffset: 2527 -- name: 'Data Creativity: Generative AI, Research, and Team Composition' - startOffset: 2527 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2527 - endOffset: 2724 -- name: 'Building a Research Network: Finding and Following Researchers on Twitter' - startOffset: 2724 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2724 - endOffset: 2922 -- name: 'Applying to Volunteer Projects: Interview Pitching and Relevant Skills' - startOffset: 2922 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=2922 - endOffset: 3081 -- name: 'Volunteer Outcomes: Practical Experience, Referrals, and Soft Skills' - startOffset: 3081 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3081 - endOffset: 3365 -- name: 'Roles for Data Engineers: Data Preparation, Pipelines, and Dashboards' - startOffset: 3365 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3365 - endOffset: 3497 -- name: 'Opportunity Sources: WIML, Conference Feeds, and Newsletters' - startOffset: 3497 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3497 - endOffset: 3624 -- name: Closing Remarks and How to Connect - startOffset: 3624 - url: https://www.youtube.com/watch?v=aHdaIwOEI8Q&t=3624 - endOffset: 3574 +context: Volunteering and community-driven open-source collaboration are the pivot + that turns academic curiosity and nascent AI skills into real-world impact and career + momentum — by embracing an MVP mindset, creative data sourcing, strategic positioning + (hackathons, platforms, and pitches), and intentional networking you can build practical + projects, gain mentors and referrals, and overcome resource constraints to bridge + research and production. --- - Links: * [Dev and AI hackathons](https://devpost.com/){:target="_blank"} diff --git a/_podcast/s02e03-open-source.md b/_podcast/open-source-ml-contributions.md similarity index 91% rename from _podcast/s02e03-open-source.md rename to _podcast/open-source-ml-contributions.md index 03a7ba43..fd367c86 100644 --- a/_podcast/s02e03-open-source.md +++ b/_podcast/open-source-ml-contributions.md @@ -1,23 +1,11 @@ --- -title: 'Contribute to Open Source ML: scikit-learn Pipelines, PRs, Docs & Rasa Conversational - AI' -short: Getting Started with Open Source -description: 'Learn open source contribution tactics for scikit-learn pipelines and - Rasa: make solid PRs, write docs & tests, boost your OSS skills and career visibility.' -guests: -- vincentwarmerdam -tags: -- open-source -- python -- data-science -- career-development -- contributing -- scikit-learn -- machine-learning -category: Data Science Career -image: images/podcast/s02e03-open-source.jpg +title: "Contribute to Open Source ML: scikit-learn Pipelines, PRs, Docs & Rasa Conversational AI" +short: "Getting Started with Open Source" season: 2 episode: 3 +guests: +- vincentwarmerdam +image: images/podcast/open-source-ml-contributions.jpg ids: youtube: IxV9EH-tphQ anchor: Getting-Started-with-Open-Source---Vincent-Warmerdam-epk60j @@ -26,24 +14,19 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Getting-Started-with-Open-Source---Vincent-Warmerdam-epk60j spotify: https://open.spotify.com/episode/1dsbDeVncfsEg3m3cYB927 apple: https://podcasts.apple.com/us/podcast/getting-started-with-open-source-vincent-warmerdam/id1541710331?i=1000507024598 -intro: 'How do you start contributing to open source ML projects like scikit-learn - pipelines—or move from curious user to confident contributor on Rasa’s conversational - AI stack? In this episode, Vincent Warmerdam, Research Advocate at Rasa and creator - of The Algorithm Whiteboard and calmcode.io, walks through practical, hands-on advice - for contributing to open source ML.

Vincent shares his career pivot from - design student to data scientist and highlights projects (evol, clumper, memo, whatlies, - scikit-lego) that illustrate small-tools-to-impact workflows. We deep-dive into - scikit-learn–compatible pipeline components, design principles for low-maintenance - APIs, and common mistakes such as publishing to PyPI too early. You’ll get a documentation - checklist (README, guides, API reference, examples), guidance on filing reproducible - issues, and step-by-step preparation for pull requests: testing, CI, packaging, - and pre-commit hooks.

Listeners will leave with concrete strategies for - finding the right project, balancing large vs. small repositories, community stewardship - and contribution etiquette, and ways OSS work can boost career visibility through - talks, blogs, and meetups. If you want actionable next steps for contributing to - open source ML, scikit-learn pipelines, PRs, docs, or Rasa conversational AI, this - episode maps the path.' -dateadded: '2021-02-23' + +description: "Learn open source contribution tactics for scikit-learn pipelines and Rasa: make solid PRs, write docs & tests, boost your OSS skills and career visibility." +intro: "How do you start contributing to open source ML projects like scikit-learn pipelines—or move from curious user to confident contributor on Rasa’s conversational AI stack? In this episode, Vincent Warmerdam, Research Advocate at Rasa and creator of The Algorithm Whiteboard and calmcode.io, walks through practical, hands-on advice for contributing to open source ML.

Vincent shares his career pivot from design student to data scientist and highlights projects (evol, clumper, memo, whatlies, scikit-lego) that illustrate small-tools-to-impact workflows. We deep-dive into scikit-learn–compatible pipeline components, design principles for low-maintenance APIs, and common mistakes such as publishing to PyPI too early. You’ll get a documentation checklist (README, guides, API reference, examples), guidance on filing reproducible issues, and step-by-step preparation for pull requests: testing, CI, packaging, and pre-commit hooks.

Listeners will leave with concrete strategies for finding the right project, balancing large vs. small repositories, community stewardship and contribution etiquette, and ways OSS work can boost career visibility through talks, blogs, and meetups. If you want actionable next steps for contributing to open source ML, scikit-learn pipelines, PRs, docs, or Rasa conversational AI, this episode maps the path." +topics: +- open-source +- data science +- career development +- contributing +- machine learning +- tools +dateadded: 2021-02-23 + + quotableClips: - name: Podcast Introduction and Episode Overview startOffset: 0 @@ -141,6 +124,16 @@ quotableClips: startOffset: 2280 url: https://www.youtube.com/watch?v=IxV9EH-tphQ&t=2280 endOffset: 2280 + +category: Data Science Career +tags: +- open-source +- python +- data-science +- career-development +- contributing +- scikit-learn +- machine-learning --- Today we're talking open source with our guest, **Vincent Warmerdam**. Vincent is a Research Advocate at Rasa. If you check his LinkedIn, you'll see a lot: he's made Reddit's front page, runs calmcode.io for learning to code, has organized PyData Amsterdam and AI Saturdays Amsterdam, and he's a data evangelist and open-source enthusiast who's created and maintains several open-source packages. And—last but not least—he has over 80 LinkedIn endorsements for "awesomeness." Welcome, Vincent! diff --git a/_podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.md b/_podcast/open-source-ml-tools-strategy-and-business-models.md similarity index 93% rename from _podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.md rename to _podcast/open-source-ml-tools-strategy-and-business-models.md index 4925b6ac..00b5e32a 100644 --- a/_podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.md +++ b/_podcast/open-source-ml-tools-strategy-and-business-models.md @@ -1,20 +1,155 @@ --- +title: "Open Source ML Tools: Scikit-Learn Governance, Sustainability and Business Models" +short: "Working in Open Source - Probabl.ai and sklearn" +season: 18 episode: 4 guests: - vincentwarmerdam +image: images/podcast/open-source-ml-tools-strategy-and-business-models.jpg ids: - anchor: atatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs + anchor: datatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs youtube: UPlIETGwTg8 -image: images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Working-in-Open-Source---Probabl-ai-and-sklearn---Vincent-Warmerdam-e2j78fs apple: https://podcasts.apple.com/us/podcast/working-in-open-source-probabl-ai-and-sklearn-vincent/id1541710331?i=1000654481795 spotify: https://open.spotify.com/episode/0HT3IQOaTXTMH0OdEBnw9s?si=HrLtx7QKT_amZyUbZuqRzQ youtube: https://www.youtube.com/watch?v=UPlIETGwTg8 -season: 18 -short: Working in Open Source - Probabl.ai and sklearn -title: 'Build Sustainable Scikit-Learn Ecosystems: scikit-lego, Skrub, GAP Encoder - & DevRel' +description: "Discover Scikit-Learn open source business models—learn maintainer strategies, CI cost optimization and training monetization to build sustainable projects." +topics: +- open-source +- machine learning +- data science +- tools +- developer relations + +intro: "How can open source ML tools stay healthy, useful, and financially sustainable while serving both researchers and industry? In this episode Vincent Warmerdam — Research Advocate at Rasa, author of the Koaning blog, creator of the Algorithm Whiteboard playlist, and cofounder of Calm Code — walks through the real-world tradeoffs of scikit-learn governance, sustainability, and business models for ML tooling.

We dig into scikit-learn's history, NumFOCUS relationships, and the plugin-versus-core strategy; practical maintainer issues like transitions, motivating volunteers, and using open source contributions as hiring signals; and the intersection of developer relations and core engineering. Vincent also explores Calm Code's low-pressure teaching philosophy, content and monetization choices, and platform decisions (Django, contributor hiring). Technical operations topics include CI cost optimization with custom runners and sustainable compute examples (Leaf.cloud), plus hands-on projects like Skrub's table vectorizer and GAP encoder for pragmatic tabular defaults.

Listeners will gain actionable insights on governance models, maintaining project health, and realistic business options — training, consulting, and partnerships — for anyone building or stewarding open source ML tools." +dateadded: 2024-05-06 +duration: PT01H15S +quotableClips: +- name: Episode Overview — Open Source Focus + startOffset: 0 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=0 + endOffset: 100 +- name: Guest Reintroduction & Vincent’s Open Source Profile + startOffset: 100 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=100 + endOffset: 240 +- name: Early Community Work & PyLadies Code Sprint + startOffset: 240 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=240 + endOffset: 259 +- name: Scikit Lego Origin, Adoption, and Career Impact + startOffset: 259 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=259 + endOffset: 363 +- name: 'Career Path: Econometrics → DevRel → Core Engineering' + startOffset: 363 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=363 + endOffset: 513 +- name: 'Company Naming: Why :probabl. Is Separate from Scikit-Learn' + startOffset: 513 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=513 + endOffset: 628 +- name: Scikit-Learn Governance, NumFOCUS, and Project History + startOffset: 628 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=628 + endOffset: 841 +- name: 'Ecosystem Strategy: Plugins vs. Core Scikit-Learn Features' + startOffset: 841 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=841 + endOffset: 1003 +- name: Scikit Lego in Corporate Training and Contributor Growth + startOffset: 1003 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1003 + endOffset: 1091 +- name: 'Maintainer Transition: Finding Sustainable Project Stewards' + startOffset: 1091 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1091 + endOffset: 1311 +- name: Motivating Volunteer Maintainers and Keeping Projects Fun + startOffset: 1311 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1311 + endOffset: 1409 +- name: 'Demonstrating Quality: Open Source Work as a Hiring Signal' + startOffset: 1409 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1409 + endOffset: 1546 +- name: 'Calm Code Philosophy: Practical, Low-Pressure Learning' + startOffset: 1546 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1546 + endOffset: 1644 +- name: 'Content Production: Videos, Scale, and Communication Practice' + startOffset: 1644 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1644 + endOffset: 1770 +- name: 'Calm Code Platform: Django, Monetization, and Hiring Contributors' + startOffset: 1770 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1770 + endOffset: 1902 +- name: 'CI and Cost Optimization: Custom Runners and GitHub Actions' + startOffset: 1902 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1902 + endOffset: 1946 +- name: 'Sustainable Compute Examples: Leaf.cloud and Environmental Impact' + startOffset: 1946 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1946 + endOffset: 2069 +- name: 'Teaching Fundamentals: Docker, pip, and Git Challenges for Beginners' + startOffset: 2069 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2069 + endOffset: 2136 +- name: 'Conceptual Learning: Mindset Over Commands for Tooling' + startOffset: 2136 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2136 + endOffset: 2302 +- name: Combining DevRel and Core Development Responsibilities + startOffset: 2302 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2302 + endOffset: 2481 +- name: 'Role Definition: Developer Relations Engineer at :probabl.' + startOffset: 2481 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2481 + endOffset: 2540 +- name: Enhancing Scikit-Learn with Interactive Content and Videos + startOffset: 2540 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2540 + endOffset: 2670 +- name: 'Deep Dive Example: Why the Standard Scaler Is Complex' + startOffset: 2670 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2670 + endOffset: 2911 +- name: 'Skrub Overview: Table Vectorizer and Pragmatic Tabular Defaults' + startOffset: 2911 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2911 + endOffset: 3027 +- name: 'Skrub GAP Encoder: Clustering Dirty Categories to Avoid One-Hot Explosion' + startOffset: 3027 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3027 + endOffset: 3227 +- name: 'Why Form a Company for Scikit-Learn: Funding and European Tech Goals' + startOffset: 3227 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3227 + endOffset: 3379 +- name: 'Potential Business Models: Training, Consulting, and Partnerships' + startOffset: 3379 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3379 + endOffset: 3454 +- name: 'Upcoming Work: Calm Code Book on Expectations vs. Reality in Data' + startOffset: 3454 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3454 + endOffset: 3497 +- name: 'Live Experiments: Converting Tree Models to SQL and Streaming Work' + startOffset: 3497 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3497 + endOffset: 3627 +- name: 'Live Stream Format: Preparation, Live Coding, and Demos' + startOffset: 3627 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3627 + endOffset: 3675 +- name: Episode Closing and Final Remarks + startOffset: 3675 + url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3675 + endOffset: 3615 transcript: - header: Episode Overview — Open Source Focus - header: Guest Reintroduction & Vincent’s Open Source Profile @@ -857,153 +992,19 @@ transcript: sec: 3715 time: '1:01:55' who: Alexey -description: 'Discover scalable scikit-learn ecosystems with scikit-lego and Skrub: - learn GAP Encoder, contributor growth, CI optimization and DevRel sustainability.' -intro: How do you build a sustainable scikit-learn ecosystem that serves both users - and contributors? In this episode, Vincent Warmerdam — Research Advocate at Rasa, - open source contributor and creator of Calm Code and the Koaning blog — walks through - practical decisions that keep ML tooling healthy over time. We cover scikit-lego’s - origins and adoption, governance and NumFOCUS roles, and the trade-offs between - adding features to core scikit-learn versus plugins.

Key topics include - maintaining contributor growth and steward transitions, motivating volunteer maintainers, - DevRel combined with core engineering, and demonstrable open source quality as a - hiring signal. Vincent also explains Skrub’s table vectorizer and the GAP Encoder - approach for clustering dirty categorical values to avoid one-hot explosion, plus - examples of CI and cost optimization (custom runners, GitHub Actions) and sustainable - compute choices. You’ll get actionable guidance on teaching fundamentals (Docker, - pip, Git), producing interactive content, and potential business models around training - and consulting. Tune in to learn concrete strategies for building, funding, and - scaling scikit-learn-compatible tools and communities without sacrificing long-term - sustainability. -dateadded: '2024-05-06' -duration: PT01H15S -quotableClips: -- name: Episode Overview — Open Source Focus - startOffset: 0 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=0 - endOffset: 100 -- name: Guest Reintroduction & Vincent’s Open Source Profile - startOffset: 100 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=100 - endOffset: 240 -- name: Early Community Work & PyLadies Code Sprint - startOffset: 240 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=240 - endOffset: 259 -- name: Scikit Lego Origin, Adoption, and Career Impact - startOffset: 259 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=259 - endOffset: 363 -- name: 'Career Path: Econometrics → DevRel → Core Engineering' - startOffset: 363 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=363 - endOffset: 513 -- name: 'Company Naming: Why :probabl. Is Separate from Scikit-Learn' - startOffset: 513 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=513 - endOffset: 628 -- name: Scikit-Learn Governance, NumFOCUS, and Project History - startOffset: 628 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=628 - endOffset: 841 -- name: 'Ecosystem Strategy: Plugins vs. Core Scikit-Learn Features' - startOffset: 841 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=841 - endOffset: 1003 -- name: Scikit Lego in Corporate Training and Contributor Growth - startOffset: 1003 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1003 - endOffset: 1091 -- name: 'Maintainer Transition: Finding Sustainable Project Stewards' - startOffset: 1091 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1091 - endOffset: 1311 -- name: Motivating Volunteer Maintainers and Keeping Projects Fun - startOffset: 1311 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1311 - endOffset: 1409 -- name: 'Demonstrating Quality: Open Source Work as a Hiring Signal' - startOffset: 1409 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1409 - endOffset: 1546 -- name: 'Calm Code Philosophy: Practical, Low-Pressure Learning' - startOffset: 1546 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1546 - endOffset: 1644 -- name: 'Content Production: Videos, Scale, and Communication Practice' - startOffset: 1644 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1644 - endOffset: 1770 -- name: 'Calm Code Platform: Django, Monetization, and Hiring Contributors' - startOffset: 1770 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1770 - endOffset: 1902 -- name: 'CI and Cost Optimization: Custom Runners and GitHub Actions' - startOffset: 1902 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1902 - endOffset: 1946 -- name: 'Sustainable Compute Examples: Leaf.cloud and Environmental Impact' - startOffset: 1946 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=1946 - endOffset: 2069 -- name: 'Teaching Fundamentals: Docker, pip, and Git Challenges for Beginners' - startOffset: 2069 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2069 - endOffset: 2136 -- name: 'Conceptual Learning: Mindset Over Commands for Tooling' - startOffset: 2136 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2136 - endOffset: 2302 -- name: Combining DevRel and Core Development Responsibilities - startOffset: 2302 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2302 - endOffset: 2481 -- name: 'Role Definition: Developer Relations Engineer at :probabl.' - startOffset: 2481 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2481 - endOffset: 2540 -- name: Enhancing Scikit-Learn with Interactive Content and Videos - startOffset: 2540 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2540 - endOffset: 2670 -- name: 'Deep Dive Example: Why the Standard Scaler Is Complex' - startOffset: 2670 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2670 - endOffset: 2911 -- name: 'Skrub Overview: Table Vectorizer and Pragmatic Tabular Defaults' - startOffset: 2911 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=2911 - endOffset: 3027 -- name: 'Skrub GAP Encoder: Clustering Dirty Categories to Avoid One-Hot Explosion' - startOffset: 3027 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3027 - endOffset: 3227 -- name: 'Why Form a Company for Scikit-Learn: Funding and European Tech Goals' - startOffset: 3227 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3227 - endOffset: 3379 -- name: 'Potential Business Models: Training, Consulting, and Partnerships' - startOffset: 3379 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3379 - endOffset: 3454 -- name: 'Upcoming Work: Calm Code Book on Expectations vs. Reality in Data' - startOffset: 3454 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3454 - endOffset: 3497 -- name: 'Live Experiments: Converting Tree Models to SQL and Streaming Work' - startOffset: 3497 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3497 - endOffset: 3627 -- name: 'Live Stream Format: Preparation, Live Coding, and Demos' - startOffset: 3627 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3627 - endOffset: 3675 -- name: Episode Closing and Final Remarks - startOffset: 3675 - url: https://www.youtube.com/watch?v=UPlIETGwTg8&t=3675 - endOffset: 3615 ---- +context: 'Context: This episode surveys the Scikit-Learn ecosystem, related projects + (Scikit Lego, Skrub), and initiatives like Calm Code and :probabl., weaving together + career stories, governance, tooling choices, content production, maintainer handoffs, + CI/cost concerns, and early business models. + Core unifying idea: Long-term health and impact of open-source machine-learning + projects depends not just on great code but on a deliberate integration of engineering + excellence, community stewardship, accessible education, and sustainable operational/business + practices — i.e., building pragmatic tools and clear learning paths while creating + incentives (training, consulting, platform models, cost-efficient infrastructure, + and low-pressure contributor experiences) that enable maintainers and contributors + to keep projects useful, adoptable, and durable.' +--- Links: * [probabl. YouTube channel](https://www.youtube.com/@UCIat2Cdg661wF5DQDWTQAmg){:target="_blank"} diff --git a/_podcast/s09e08-from-open-source-maintainer-to-founder.md b/_podcast/open-source-turned-into-career-and-startup-creation.md similarity index 97% rename from _podcast/s09e08-from-open-source-maintainer-to-founder.md rename to _podcast/open-source-turned-into-career-and-startup-creation.md index 444c9d07..72bbbf37 100644 --- a/_podcast/s09e08-from-open-source-maintainer-to-founder.md +++ b/_podcast/open-source-turned-into-career-and-startup-creation.md @@ -1,22 +1,126 @@ --- +title: "From Developer to Startup Founder: Building a Career Through Open Source" +short: "From Open-Source Maintainer to Founder" +season: 9 episode: 8 guests: - willmcgugan +image: images/podcast/open-source-turned-into-career-and-startup-creation.jpg ids: anchor: From-Open-Source-Maintainer-to-Founder---Will-McGugan-e1kqtu5 youtube: bwfR9dyxf1M -image: images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/From-Open-Source-Maintainer-to-Founder---Will-McGugan-e1kqtu5 apple: https://podcasts.apple.com/us/podcast/designing-a-data-science-organization-lisa-cohen/id1541710331?i=1000569172916 spotify: https://open.spotify.com/episode/4JAwU2jQuXu4MoMucsE899?si=6ed45b98dd4a415a youtube: https://www.youtube.com/watch?v=bwfR9dyxf1M -season: 9 -short: From Open-Source Maintainer to Founder -title: Build Rich Terminal UIs with Textual in Python and Turn Open Source into a - Startup + +description: "Discover how to turn open source work into a sustainable career and even a startup. Learn about terminal apps, fundraising, community growth & hiring signals." +intro: "How do you turn open source work into a sustainable career and even a startup? In this episode Will McGugan — a Python open source maintainer and creator of PyFilesystem, Rich, and Textual — walks through his path from video game developer to founder of Textualize. We trace his early projects (BBCode parser, chess libraries), the design of PyFilesystem and S3 integrations, and how solving personal needs led to learning by building.

Will breaks down the technical and product journey: Rich’s terminal styling, tables, progress bars and observability features; the Textual framework for terminal GUIs; and the moment of founding Textualize after a viral tweet. He explains building in public, community growth via demos and social media, hiring through open source signals, and practical dev workflows with GitHub, PR reviews, and releases. He also outlines Textualize’s positioning and web hosting business model for terminal apps, plus contribution channels like Discourse and Discord.

Listen to learn concrete, repeatable steps for leveraging Python open source to find freelance freedom, attract users, raise pre-seed interest, and transition from developer to startup founder." +topics: +- open-source +- startups +- career growth +- entrepreneurship +dateadded: 2022-07-15 + +duration: PT00H58M41S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=0 + endOffset: 99 +- name: 'Guest Introduction: Will McGugan, Python Open Source Maintainer' + startOffset: 99 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=99 + endOffset: 127 +- name: 'Career Path: From Video Games to Textualize Founder' + startOffset: 127 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=127 + endOffset: 258 +- name: 'Early Open Source Tools: BBCode Parser & Chess Libraries' + startOffset: 258 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=258 + endOffset: 412 +- name: 'PyFilesystem: Virtual File System Abstraction (fs)' + startOffset: 412 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=412 + endOffset: 575 +- name: 'S3 Integration: S3Fs and Cloud File Interfaces (Pandas integration)' + startOffset: 575 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=575 + endOffset: 689 +- name: 'Project Origins: Solving Personal Needs & Learning by Building' + startOffset: 689 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=689 + endOffset: 907 +- name: 'Freelance Career: Contracting, Long-Term Contracts, and Independence' + startOffset: 907 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=907 + endOffset: 1068 +- name: 'Open Source as Creative Outlet: Freedom Beyond Client Work' + startOffset: 1068 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1068 + endOffset: 1166 +- name: 'Rich Library: Terminal Styling, Tables, Progress, and CLI UX' + startOffset: 1166 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1166 + endOffset: 1476 +- name: 'Rich for Observability: Log Formatting and Readability' + startOffset: 1476 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1476 + endOffset: 1599 +- name: 'Textual Framework: Building Terminal GUIs on Rich' + startOffset: 1599 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1599 + endOffset: 1688 +- name: 'Founding Textualize: Pre-Seed Fundraising After Tweeting' + startOffset: 1688 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1688 + endOffset: 1900 +- name: 'Building in Public: Social Media, Demos, and Community Growth' + startOffset: 1900 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1900 + endOffset: 2238 +- name: 'Team & Roadmap: Early Hires, Roles, and Product Vision' + startOffset: 2238 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2238 + endOffset: 2312 +- name: 'Business Model: Web Hosting for Terminal Apps & Generous Free Tier' + startOffset: 2312 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2312 + endOffset: 2493 +- name: 'Market Comparison: Streamlit Analogy and Positioning' + startOffset: 2493 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2493 + endOffset: 2678 +- name: 'Hiring Signals: Open Source Contributions as a Recruiter’s Lens' + startOffset: 2678 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2678 + endOffset: 2760 +- name: 'Development Workflow: GitHub Projects, PR Reviews, and Releases' + startOffset: 2760 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2760 + endOffset: 2977 +- name: 'Community Channels: Discourse, Discord, and How to Contribute' + startOffset: 2977 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2977 + endOffset: 3005 +- name: 'Project Promotion: Getting GitHub Stars and Viral Reach' + startOffset: 3005 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3005 + endOffset: 3440 +- name: 'Advice for New Open Source Authors: Solve Your Own Problem' + startOffset: 3440 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3440 + endOffset: 3571 +- name: 'Closing & Contact: Find Will on Twitter and Textualize links' + startOffset: 3571 + url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3571 + endOffset: 3521 + transcript: -- header: Podcast Introduction - header: 'Guest Introduction: Will McGugan, Python Open Source Maintainer' - line: This week, we'll talk about working on open source. We have a special guest today, Will. Will is a software engineer and author. He's quite an enthusiastic @@ -1274,120 +1378,6 @@ transcript: sec: 3620 time: '1:00:20' who: Alexey -description: Learn Textual, Rich, and open source strategies to build terminal UIs - in Python and turn projects into a startup - fundraising, community growth, hosting - tips. -intro: How do you build expressive terminal UIs in Python and turn open source work - into a sustainable company? In this episode, Will McGugan — a software engineer from - Edinburgh and creator of PyFilesystem, Rich, and Textual — walks through that exact - journey. We cover his career path from game development to founding Textualize, - the origins of projects born from solving personal problems, and early libraries - like BBCode parsers and chess tools.

Listen for deep technical discussion - about PyFilesystem and S3 integration (S3Fs and Pandas workflows), Rich’s capabilities - for terminal styling, tables, progress bars and observability-focused log formatting, - and how Textual layers on Rich to enable full terminal GUIs. Will also explains - the transition from open source maintainer to founder — pre-seed fundraising after - tweeting, building in public, community growth, hiring early team members, product - roadmap, and a hosting-based business model with a generous free tier (Streamlit-style - positioning). Practical takeaways include development workflow, recruiting via open - source contributions, community channels, and advice for new OSS authors. If you - build Python CLI tools or want to commercialize open source, this episode offers - concrete technical and business insights. -dateadded: '2022-07-15' -duration: PT00H58M41S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=0 - endOffset: 99 -- name: 'Guest Introduction: Will McGugan, Python Open Source Maintainer' - startOffset: 99 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=99 - endOffset: 127 -- name: 'Career Path: From Video Games to Textualize Founder' - startOffset: 127 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=127 - endOffset: 258 -- name: 'Early Open Source Tools: BBCode Parser & Chess Libraries' - startOffset: 258 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=258 - endOffset: 412 -- name: 'PyFilesystem: Virtual File System Abstraction (fs)' - startOffset: 412 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=412 - endOffset: 575 -- name: 'S3 Integration: S3Fs and Cloud File Interfaces (Pandas integration)' - startOffset: 575 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=575 - endOffset: 689 -- name: 'Project Origins: Solving Personal Needs & Learning by Building' - startOffset: 689 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=689 - endOffset: 907 -- name: 'Freelance Career: Contracting, Long-Term Contracts, and Independence' - startOffset: 907 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=907 - endOffset: 1068 -- name: 'Open Source as Creative Outlet: Freedom Beyond Client Work' - startOffset: 1068 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1068 - endOffset: 1166 -- name: 'Rich Library: Terminal Styling, Tables, Progress, and CLI UX' - startOffset: 1166 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1166 - endOffset: 1476 -- name: 'Rich for Observability: Log Formatting and Readability' - startOffset: 1476 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1476 - endOffset: 1599 -- name: 'Textual Framework: Building Terminal GUIs on Rich' - startOffset: 1599 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1599 - endOffset: 1688 -- name: 'Founding Textualize: Pre-Seed Fundraising After Tweeting' - startOffset: 1688 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1688 - endOffset: 1900 -- name: 'Building in Public: Social Media, Demos, and Community Growth' - startOffset: 1900 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=1900 - endOffset: 2238 -- name: 'Team & Roadmap: Early Hires, Roles, and Product Vision' - startOffset: 2238 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2238 - endOffset: 2312 -- name: 'Business Model: Web Hosting for Terminal Apps & Generous Free Tier' - startOffset: 2312 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2312 - endOffset: 2493 -- name: 'Market Comparison: Streamlit Analogy and Positioning' - startOffset: 2493 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2493 - endOffset: 2678 -- name: 'Hiring Signals: Open Source Contributions as a Recruiter’s Lens' - startOffset: 2678 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2678 - endOffset: 2760 -- name: 'Development Workflow: GitHub Projects, PR Reviews, and Releases' - startOffset: 2760 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2760 - endOffset: 2977 -- name: 'Community Channels: Discourse, Discord, and How to Contribute' - startOffset: 2977 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=2977 - endOffset: 3005 -- name: 'Project Promotion: Getting GitHub Stars and Viral Reach' - startOffset: 3005 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3005 - endOffset: 3440 -- name: 'Advice for New Open Source Authors: Solve Your Own Problem' - startOffset: 3440 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3440 - endOffset: 3571 -- name: 'Closing & Contact: Find Will on Twitter and Textualize links' - startOffset: 3571 - url: https://www.youtube.com/watch?v=bwfR9dyxf1M&t=3571 - endOffset: 3521 --- Links: diff --git a/_podcast/s02e08-personal-branding.md b/_podcast/personal-brand-for-data-professionals.md similarity index 71% rename from _podcast/s02e08-personal-branding.md rename to _podcast/personal-brand-for-data-professionals.md index c3592b4f..7ea73b85 100644 --- a/_podcast/s02e08-personal-branding.md +++ b/_podcast/personal-brand-for-data-professionals.md @@ -1,12 +1,11 @@ --- -title: 'Build a Personal Brand: Publish on LinkedIn/Medium, Grow Audience, Monetize - with Online Courses' -short: Personal Branding -guests: -- admondleekinlim -image: images/podcast/s02e08-personal-branding.jpg +title: "Build a Personal Brand: Publish on LinkedIn/Medium, Grow Audience, Monetize with Online Courses" +short: "Personal Branding" season: 2 episode: 8 +guests: +- admondleekinlim +image: images/podcast/personal-brand-for-data-professionals.jpg ids: youtube: tQRQnz_aHYQ anchor: Personal-Branding---Admond-Lee-Kin-Lim-ern77e @@ -15,25 +14,16 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Personal-Branding---Admond-Lee-Kin-Lim-ern77e spotify: https://open.spotify.com/episode/61Yv13MISTeP5nOVAZpY88 apple: https://podcasts.apple.com/us/podcast/personal-branding-admond-lee-kin-lim/id1541710331?i=1000511761026 -description: 'Build a personal brand: publish on LinkedIn & Medium, grow your audience, - and monetize with online courses—publishing best practices, course design, growth - tips.' -intro: How do you build a personal brand that actually attracts an audience and turns - into revenue? In this episode, Admond Lee Kin Lim — data scientist, writer, speaker, - and Data Science Instructor at Hackwagon Academy — breaks down a practical path - from first posts to monetizing with online courses. Drawing on his experience at - Micron and as an independent consultant and communicator featured in KDnuggets and - Medium, Admond defines personal brand purpose and positioning, then walks through - the first steps and mindset to start publishing on LinkedIn and Medium.

- You’ll get concrete guidance on formats and best practices for LinkedIn and Medium, - idea generation and content frequency, and the tools he uses (BuzzSumo, Feedly and - alternatives). We also cover when to add podcasting or audio, offline networking - (Lunchclub, 1x1s), conference speaking, and overcoming imposter syndrome. Finally, - Admond explains monetization strategies for selling online courses, course design - focused on student outcomes, aligning content with your values, balancing frequency - vs. quality, and metrics for iteration. Listen for actionable tips, tools, and resource - recommendations to grow your audience and monetize your expertise. -dateadded: '2021-03-05' + +description: "Build a personal brand: publish on LinkedIn & Medium, grow your audience, and monetize with online courses—publishing best practices, course design, growth tips." +intro: "How do you build a personal brand that actually attracts an audience and turns into revenue? In this episode, Admond Lee Kin Lim — data scientist, writer, speaker, and Data Science Instructor at Hackwagon Academy — breaks down a practical path from first posts to monetizing with online courses. Drawing on his experience at Micron and as an independent consultant and communicator featured in KDnuggets and Medium, Admond defines personal brand purpose and positioning, then walks through the first steps and mindset to start publishing on LinkedIn and Medium.

You’ll get concrete guidance on formats and best practices for LinkedIn and Medium, idea generation and content frequency, and the tools he uses (BuzzSumo, Feedly and alternatives). We also cover when to add podcasting or audio, offline networking (Lunchclub, 1x1s), conference speaking, and overcoming imposter syndrome. Finally, Admond explains monetization strategies for selling online courses, course design focused on student outcomes, aligning content with your values, balancing frequency vs. quality, and metrics for iteration. Listen for actionable tips, tools, and resource recommendations to grow your audience and monetize your expertise" +topics: +- personal brand +- career growth +- monetization +dateadded: 2021-03-05 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -111,6 +101,7 @@ quotableClips: startOffset: 3030 url: https://www.youtube.com/watch?v=tQRQnz_aHYQ&t=3030 endOffset: 3030 + --- We talked about: diff --git a/_podcast/s06e06-from-academia-to-industry.md b/_podcast/postdoc-to-data-science-lead-career-transition.md similarity index 97% rename from _podcast/s06e06-from-academia-to-industry.md rename to _podcast/postdoc-to-data-science-lead-career-transition.md index 66e2fdc4..6816cf70 100644 --- a/_podcast/s06e06-from-academia-to-industry.md +++ b/_podcast/postdoc-to-data-science-lead-career-transition.md @@ -1,12 +1,11 @@ --- -title: 'From Postdoc to Data Science Lead: ML Foundations, Docker Deployment & Hiring - Tips' -short: Moving from Academia to Industry -guests: -- cjjenkins -image: images/podcast/s06e06-from-academia-to-industry.jpg +title: "From Postdoc to Data Science Lead: ML Foundations, Docker Deployment & Hiring Tips" +short: "Moving from Academia to Industry" season: 6 episode: 6 +guests: +- cjjenkins +image: images/podcast/postdoc-to-data-science-lead-career-transition.jpg ids: youtube: m4F651BpUFk anchor: Moving-from-Academia-to-Industry---CJ-Jenkins-e1bh84o @@ -15,6 +14,136 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Moving-from-Academia-to-Industry---CJ-Jenkins-e1bh84o spotify: https://open.spotify.com/episode/5Jvo53ibSoX6rfkfdGq5pJ apple: https://podcasts.apple.com/us/podcast/moving-from-academia-to-industry-cj-jenkins/id1541710331?i=1000544589971 + +description: "Learn a one-year roadmap from postdoc to data science lead: machine learning foundations, Docker deployment, resume and hiring tips to land jobs." +intro: "How do you go from a postdoc to a data science lead while mastering machine learning foundations and deployment? In this episode, CJ Jenkins — a PhD-turned-data science lead working on credit risk modeling, with published research and a textbook used in academia — walks through that transition. We trace CJ’s roots in evolutionary biology and genomics, the statistical ML foundations (GLMs, population dynamics), and practical tools like Bash, R, Python, and SQL. Key topics include Docker deployment and bridging the gap between research and production, hiring signals and interview assessment techniques that prioritize learning agility and humility, and concrete career tactics: a one-year Coursera sprint (Johns Hopkins, Andrew Ng), resume rewrites (14 CV iterations), LinkedIn keyword strategy, and selective application versus volume. CJ also discusses location and networking strategies (Berlin, Stockholm, Klarna onboarding), technical expectations for juniors, code quality, and building psychological safety on teams. Listen to learn actionable steps for skills-first resumes, interview preparation, deployment basics, and how to translate academic output into industry impact. Find CJ on LinkedIn for follow-up questions." +topics: +- career transition +- machine learning +- academia +- career growth +dateadded: 2021-12-11 + +duration: PT00H58M44S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=0 + endOffset: 58 +- name: 'Career Journey: Postdoc to Data Science Lead' + startOffset: 58 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=58 + endOffset: 88 +- name: 'Evolutionary Biology: Statistics & Population Dynamics' + startOffset: 88 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=88 + endOffset: 196 +- name: 'Academic Research as Data Science Practice: Genomics & Bash' + startOffset: 196 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=196 + endOffset: 285 +- name: 'Statistical Machine Learning: GLMs and Foundations' + startOffset: 285 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=285 + endOffset: 370 +- name: 'Bridging Gaps: Deployment, Docker, and Python Learning' + startOffset: 370 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=370 + endOffset: 521 +- name: 'Hiring Signals: Smartness, Ambition, and Receptiveness to Feedback' + startOffset: 521 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=521 + endOffset: 642 +- name: 'Interview Assessment: Testing Learning Agility and Humility' + startOffset: 642 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=642 + endOffset: 719 +- name: 'First Tech Interview: Referral, Case Study in R, and Honesty' + startOffset: 719 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=719 + endOffset: 936 +- name: 'Transition Timeline: One-Year Plan and Coursera Sprint' + startOffset: 936 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=936 + endOffset: 1034 +- name: 'Resume Strategy: Skills-First Rewriting and LinkedIn Keywords' + startOffset: 1034 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1034 + endOffset: 1240 +- name: 'Refining Applications: 14 CV Iterations, Recruiter Tips, and ATS' + startOffset: 1240 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1240 + endOffset: 1366 +- name: 'Learning Resources: John Hopkins Specialization and Andrew Ng' + startOffset: 1366 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1366 + endOffset: 1537 +- name: 'Location Strategy: Choosing Berlin and Targeting Companies' + startOffset: 1537 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1537 + endOffset: 1716 +- name: 'Application Strategy: Selective Research vs. Volume Applications' + startOffset: 1716 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1716 + endOffset: 1860 +- name: 'Job Move: Klarna Experience and Onboarding Challenges' + startOffset: 1860 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1860 + endOffset: 1968 +- name: 'Internal Mobility: Relocating to Stockholm Within the Company' + startOffset: 1968 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1968 + endOffset: 2028 +- name: 'Market Entry: Networking, Meetups, and Community Engagement' + startOffset: 2028 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2028 + endOffset: 2203 +- name: 'Technical Expectations: Clean Code and Coding Proficiency for Juniors' + startOffset: 2203 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2203 + endOffset: 2259 +- name: 'Skill Building: Pair Programming, LeetCode, and Code Reviews' + startOffset: 2259 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2259 + endOffset: 2402 +- name: 'Research vs Industry: Publications, Portfolios, and Relevance' + startOffset: 2402 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2402 + endOffset: 2472 +- name: 'Real-World Data Work: Cleaning, Bash, and Domain Translation' + startOffset: 2472 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2472 + endOffset: 2624 +- name: 'Communication Shift: Simplifying Explanations and Office Culture' + startOffset: 2624 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2624 + endOffset: 2838 +- name: 'Team Dynamics: Open Offices, Proximity, and Social Bonding' + startOffset: 2838 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2838 + endOffset: 2930 +- name: 'Counterproductive Habits: Competitiveness vs. Collaboration' + startOffset: 2930 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2930 + endOffset: 3065 +- name: 'Psychological Safety: Team Rituals, Sharing Failures, and Trust' + startOffset: 3065 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3065 + endOffset: 3165 +- name: 'Long-Term Learning: NLP, Kaggle as a Learning Resource' + startOffset: 3165 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3165 + endOffset: 3328 +- name: 'Academic Output: Writing a Textbook on Parasitology' + startOffset: 3328 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3328 + endOffset: 3525 +- name: Closing Remarks and Contact Info (Find CJ on LinkedIn) + startOffset: 3525 + url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3525 + endOffset: 3524 + transcript: - header: Podcast Introduction - header: 'Career Journey: Postdoc to Data Science Lead' @@ -1223,142 +1352,6 @@ transcript: sec: 3582 time: '59:42' who: Alexey -description: 'Learn a one-year roadmap from postdoc to data science lead: machine - learning foundations, Docker deployment, resume and hiring tips to land jobs.' -intro: 'How do you go from a postdoc to a data science lead while mastering machine - learning foundations and deployment? In this episode, CJ Jenkins — a PhD-turned-data - science lead working on credit risk modeling, with published research and a textbook - used in academia — walks through that transition. We trace CJ’s roots in evolutionary - biology and genomics, the statistical ML foundations (GLMs, population dynamics), - and practical tools like Bash, R, Python, and SQL. Key topics include Docker deployment - and bridging the gap between research and production, hiring signals and interview - assessment techniques that prioritize learning agility and humility, and concrete - career tactics: a one-year Coursera sprint (Johns Hopkins, Andrew Ng), resume rewrites - (14 CV iterations), LinkedIn keyword strategy, and selective application versus - volume. CJ also discusses location and networking strategies (Berlin, Stockholm, - Klarna onboarding), technical expectations for juniors, code quality, and building - psychological safety on teams. Listen to learn actionable steps for skills-first - resumes, interview preparation, deployment basics, and how to translate academic - output into industry impact. Find CJ on LinkedIn for follow-up questions.' -dateadded: '2021-12-11' -duration: PT00H58M44S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=0 - endOffset: 58 -- name: 'Career Journey: Postdoc to Data Science Lead' - startOffset: 58 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=58 - endOffset: 88 -- name: 'Evolutionary Biology: Statistics & Population Dynamics' - startOffset: 88 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=88 - endOffset: 196 -- name: 'Academic Research as Data Science Practice: Genomics & Bash' - startOffset: 196 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=196 - endOffset: 285 -- name: 'Statistical Machine Learning: GLMs and Foundations' - startOffset: 285 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=285 - endOffset: 370 -- name: 'Bridging Gaps: Deployment, Docker, and Python Learning' - startOffset: 370 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=370 - endOffset: 521 -- name: 'Hiring Signals: Smartness, Ambition, and Receptiveness to Feedback' - startOffset: 521 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=521 - endOffset: 642 -- name: 'Interview Assessment: Testing Learning Agility and Humility' - startOffset: 642 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=642 - endOffset: 719 -- name: 'First Tech Interview: Referral, Case Study in R, and Honesty' - startOffset: 719 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=719 - endOffset: 936 -- name: 'Transition Timeline: One-Year Plan and Coursera Sprint' - startOffset: 936 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=936 - endOffset: 1034 -- name: 'Resume Strategy: Skills-First Rewriting and LinkedIn Keywords' - startOffset: 1034 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1034 - endOffset: 1240 -- name: 'Refining Applications: 14 CV Iterations, Recruiter Tips, and ATS' - startOffset: 1240 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1240 - endOffset: 1366 -- name: 'Learning Resources: John Hopkins Specialization and Andrew Ng' - startOffset: 1366 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1366 - endOffset: 1537 -- name: 'Location Strategy: Choosing Berlin and Targeting Companies' - startOffset: 1537 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1537 - endOffset: 1716 -- name: 'Application Strategy: Selective Research vs. Volume Applications' - startOffset: 1716 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1716 - endOffset: 1860 -- name: 'Job Move: Klarna Experience and Onboarding Challenges' - startOffset: 1860 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1860 - endOffset: 1968 -- name: 'Internal Mobility: Relocating to Stockholm Within the Company' - startOffset: 1968 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=1968 - endOffset: 2028 -- name: 'Market Entry: Networking, Meetups, and Community Engagement' - startOffset: 2028 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2028 - endOffset: 2203 -- name: 'Technical Expectations: Clean Code and Coding Proficiency for Juniors' - startOffset: 2203 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2203 - endOffset: 2259 -- name: 'Skill Building: Pair Programming, LeetCode, and Code Reviews' - startOffset: 2259 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2259 - endOffset: 2402 -- name: 'Research vs Industry: Publications, Portfolios, and Relevance' - startOffset: 2402 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2402 - endOffset: 2472 -- name: 'Real-World Data Work: Cleaning, Bash, and Domain Translation' - startOffset: 2472 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2472 - endOffset: 2624 -- name: 'Communication Shift: Simplifying Explanations and Office Culture' - startOffset: 2624 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2624 - endOffset: 2838 -- name: 'Team Dynamics: Open Offices, Proximity, and Social Bonding' - startOffset: 2838 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2838 - endOffset: 2930 -- name: 'Counterproductive Habits: Competitiveness vs. Collaboration' - startOffset: 2930 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=2930 - endOffset: 3065 -- name: 'Psychological Safety: Team Rituals, Sharing Failures, and Trust' - startOffset: 3065 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3065 - endOffset: 3165 -- name: 'Long-Term Learning: NLP, Kaggle as a Learning Resource' - startOffset: 3165 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3165 - endOffset: 3328 -- name: 'Academic Output: Writing a Textbook on Parasitology' - startOffset: 3328 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3328 - endOffset: 3525 -- name: Closing Remarks and Contact Info (Find CJ on LinkedIn) - startOffset: 3525 - url: https://www.youtube.com/watch?v=m4F651BpUFk&t=3525 - endOffset: 3524 --- Links: diff --git a/_podcast/s20e08-from-hackathons-to-developer-advocacy.md b/_podcast/practical-devrel-demofirst-education-and-open-source.md similarity index 95% rename from _podcast/s20e08-from-hackathons-to-developer-advocacy.md rename to _podcast/practical-devrel-demofirst-education-and-open-source.md index 89077127..a2bb8727 100644 --- a/_podcast/s20e08-from-hackathons-to-developer-advocacy.md +++ b/_podcast/practical-devrel-demofirst-education-and-open-source.md @@ -1,20 +1,142 @@ --- +title: "Developer Advocacy Through Community Impact: Technical Leadership, Open Source Mentorship & Demo-Driven Communication" +short: "Developer Advocacy Through Community Impact" +season: 20 episode: 8 guests: - willrussell +image: images/podcast/practical-devrel-demofirst-education-and-open-source.jpg ids: anchor: datatalksclub/episodes/From-Hackathons-to-Developer-Advocacy---Will-Russel-e339a5f youtube: vXbMUfHE1OE -image: images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Hackathons-to-Developer-Advocacy---Will-Russel-e339a5f apple: https://podcasts.apple.com/us/podcast/from-hackathons-to-developer-advocacy-will-russel/id1541710331?i=1000709634418 spotify: https://open.spotify.com/episode/4Lt785S38GuK0W2m7naRKt youtube: https://www.youtube.com/watch?v=vXbMUfHE1OE -season: 20 -short: From Hackathons To Developer Advocacy -title: 'Run Hackathons & Scale MLH-Style Open Source Fellowships: Organize, Onboard, - Judge' +description: "Master developer advocacy, open source mentorship & demo-driven communication to elevate technical leadership, amplify community impact & accelerate adoption." +topics: +- open-source +- computer vision +- tools +- MLOps +- software engineering +intro: "How do developer advocates create measurable community impact while balancing technical leadership, mentorship, and clear communication? In this episode Will Russell, Developer Advocate at Kestra, explores that question through the lens of workflow orchestration and developer education. Will is known for his technical video content on workflow orchestration and for building open source education programs that help new contributors make their first pull requests.

We cover core topics including technical leadership in community settings, practical approaches to open source mentorship, and the power of demo-driven communication and documentation to make complex tools approachable. Will discusses how creating targeted videos and clear docs lowers barriers for developers and nurtures sustainable contributor pipelines.

Listeners will come away with concrete ideas for designing open source education programs, using demos to explain concepts, and applying developer advocacy techniques to grow healthier communities. This episode is useful for developer advocates, engineering managers, open source maintainers, and anyone interested in workflow orchestration, developer education, and community-driven technical leadership." +dateadded: 2025-05-26 +duration: PT01H01M29S +quotableClips: +- name: Episode Opening & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=0 + endOffset: 201 +- name: 'Video Production Setup: Camera, Lens & Webcam Workflow' + startOffset: 201 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=201 + endOffset: 343 +- name: 'Audio Setup: Microphone, Preamp & Pop Filtering' + startOffset: 343 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=343 + endOffset: 415 +- name: 'Lighting Strategy: 45° Key Light & Background Separation' + startOffset: 415 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=415 + endOffset: 641 +- name: Early Career & Hackathon Discovery + startOffset: 641 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=641 + endOffset: 706 +- name: 'Hackathons as Learning: Git, Teamwork & Building Projects' + startOffset: 706 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=706 + endOffset: 736 +- name: 'Open Source Education Programs: Path from Contract to Full-Time' + startOffset: 736 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=736 + endOffset: 964 +- name: 'Organizing Hackathons: Leadership, Coordination & Soft Skills' + startOffset: 964 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=964 + endOffset: 1207 +- name: 'Career Trade-offs: Maintaining Technical Depth vs. Community Work' + startOffset: 1207 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1207 + endOffset: 1322 +- name: Role Variety at Small Companies vs. Specialized Teams + startOffset: 1322 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1322 + endOffset: 1398 +- name: 'Running Hackathons: Format, Online Tools & Office Hours' + startOffset: 1398 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1398 + endOffset: 1526 +- name: 'Judging Strategies: Scoring Matrices, Categories & Tie-Breakers' + startOffset: 1526 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1526 + endOffset: 1574 +- name: Sponsor-Driven Challenges & Themed Categories + startOffset: 1574 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1574 + endOffset: 1784 +- name: 'Case Study — Willmojis: Image Recognition, Font Generation & Demo' + startOffset: 1784 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1784 + endOffset: 2019 +- name: 'Major League Hacking (MLH): Community Support & Rep Experience' + startOffset: 2019 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2019 + endOffset: 2143 +- name: 'MLH Fellowship: Mentorship Model & Contributing to Large Repos' + startOffset: 2143 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2143 + endOffset: 2342 +- name: 'Contribution Best Practices: PR Quality, Git Skills & Onboarding' + startOffset: 2342 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2342 + endOffset: 2476 +- name: 'Onboarding Complex Projects: Environment Setup & Maintainer Collaboration' + startOffset: 2476 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2476 + endOffset: 2620 +- name: Hardware Constraints & Cloud Workarounds (Colab, VMs) + startOffset: 2620 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2620 + endOffset: 2762 +- name: 'Program Scalability: Repeatability, Budgets & AI Opportunities' + startOffset: 2762 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2762 + endOffset: 2890 +- name: 'Fellowship Accessibility: Students, Career Changers & Motivation' + startOffset: 2890 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2890 + endOffset: 2954 +- name: 'Developer Advocacy at Kestra: Documentation, Demos & Outreach' + startOffset: 2954 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2954 + endOffset: 3109 +- name: 'Content Workflow: Bullet Points, Demos & Collaboration with Writers' + startOffset: 3109 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3109 + endOffset: 3220 +- name: 'Video Strategy: Define Goal, Maintain Pace & Full Walkthroughs' + startOffset: 3220 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3220 + endOffset: 3270 +- name: 'Feature Demo Example: "After Execution" Notifications in Workflows' + startOffset: 3270 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3270 + endOffset: 3442 +- name: 'Learn with Kestra Series: Tool Tutorials (Docker, Postgres, Git)' + startOffset: 3442 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3442 + endOffset: 3599 +- name: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around" + startOffset: 3599 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3599 + endOffset: 3689 +- name: Episode Closing & Call to Explore Kestra Content + startOffset: 3689 + url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3689 + endOffset: 3689 transcript: - header: Episode Opening & Guest Overview - line: This week we’ll discuss many topics—developer advocacy, organizing hackathons, @@ -1416,7 +1538,7 @@ transcript: sec: 3578 time: '59:38' who: Alexey -- header: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around"' +- header: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around" - line: When I was at the fellowship, managing a group of student leaders, I felt overwhelmed and unsure if I was giving enough support, especially early in my career. @@ -1451,142 +1573,17 @@ transcript: sec: 3689 time: '1:01:29' who: Alexey -description: 'Master hackathons, MLH Fellowship & onboarding: organize events, judge - with scoring matrices, scale open-source mentorships to onboard hireable contributors.' -intro: How do you run hackathons and scale MLH‑style open source fellowships while - actually getting contributors onboarded, mentored, and judged fairly? In this episode - Will Russell — Developer Advocate at Kestra who previously built open source education - programs — walks through practical approaches to organizing hackathons and fellowship - programs that move people from first contribution to sustained involvement.

- We cover formats and online tools for running events, leadership and soft skills - for coordination, judging strategies (scoring matrices, categories, tie‑breakers), - and sponsor‑driven challenges. Will shares the MLH Fellowship mentorship model, - contribution best practices (PR quality, Git skills), and concrete onboarding tactics - for complex repos — including environment setup, maintainer collaboration, and cloud - workarounds like Colab and VMs. A Willmojis case study highlights image recognition - and demo workflow ideas. The conversation also addresses program scalability, budgets, - accessibility for students and career changers, and how developer advocacy, documentation, - and video demos support adoption.

Listen to learn actionable frameworks - for organizing hackathons, onboarding contributors, and scaling MLH‑style open source - fellowships so your program produces real contributions and sustainable community - growth. -dateadded: '2025-05-26' -duration: PT01H01M29S -quotableClips: -- name: Episode Opening & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=0 - endOffset: 201 -- name: 'Video Production Setup: Camera, Lens & Webcam Workflow' - startOffset: 201 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=201 - endOffset: 343 -- name: 'Audio Setup: Microphone, Preamp & Pop Filtering' - startOffset: 343 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=343 - endOffset: 415 -- name: 'Lighting Strategy: 45° Key Light & Background Separation' - startOffset: 415 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=415 - endOffset: 641 -- name: Early Career & Hackathon Discovery - startOffset: 641 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=641 - endOffset: 706 -- name: 'Hackathons as Learning: Git, Teamwork & Building Projects' - startOffset: 706 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=706 - endOffset: 736 -- name: 'Open Source Education Programs: Path from Contract to Full-Time' - startOffset: 736 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=736 - endOffset: 964 -- name: 'Organizing Hackathons: Leadership, Coordination & Soft Skills' - startOffset: 964 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=964 - endOffset: 1207 -- name: 'Career Trade-offs: Maintaining Technical Depth vs. Community Work' - startOffset: 1207 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1207 - endOffset: 1322 -- name: Role Variety at Small Companies vs. Specialized Teams - startOffset: 1322 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1322 - endOffset: 1398 -- name: 'Running Hackathons: Format, Online Tools & Office Hours' - startOffset: 1398 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1398 - endOffset: 1526 -- name: 'Judging Strategies: Scoring Matrices, Categories & Tie-Breakers' - startOffset: 1526 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1526 - endOffset: 1574 -- name: Sponsor-Driven Challenges & Themed Categories - startOffset: 1574 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1574 - endOffset: 1784 -- name: 'Case Study — Willmojis: Image Recognition, Font Generation & Demo' - startOffset: 1784 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=1784 - endOffset: 2019 -- name: 'Major League Hacking (MLH): Community Support & Rep Experience' - startOffset: 2019 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2019 - endOffset: 2143 -- name: 'MLH Fellowship: Mentorship Model & Contributing to Large Repos' - startOffset: 2143 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2143 - endOffset: 2342 -- name: 'Contribution Best Practices: PR Quality, Git Skills & Onboarding' - startOffset: 2342 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2342 - endOffset: 2476 -- name: 'Onboarding Complex Projects: Environment Setup & Maintainer Collaboration' - startOffset: 2476 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2476 - endOffset: 2620 -- name: Hardware Constraints & Cloud Workarounds (Colab, VMs) - startOffset: 2620 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2620 - endOffset: 2762 -- name: 'Program Scalability: Repeatability, Budgets & AI Opportunities' - startOffset: 2762 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2762 - endOffset: 2890 -- name: 'Fellowship Accessibility: Students, Career Changers & Motivation' - startOffset: 2890 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2890 - endOffset: 2954 -- name: 'Developer Advocacy at Kestra: Documentation, Demos & Outreach' - startOffset: 2954 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=2954 - endOffset: 3109 -- name: 'Content Workflow: Bullet Points, Demos & Collaboration with Writers' - startOffset: 3109 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3109 - endOffset: 3220 -- name: 'Video Strategy: Define Goal, Maintain Pace & Full Walkthroughs' - startOffset: 3220 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3220 - endOffset: 3270 -- name: 'Feature Demo Example: "After Execution" Notifications in Workflows' - startOffset: 3270 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3270 - endOffset: 3442 -- name: 'Learn with Kestra Series: Tool Tutorials (Docker, Postgres, Git)' - startOffset: 3442 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3442 - endOffset: 3599 -- name: 'Leadership & Team Empowerment Recommendation: "Turn the Ship Around"' - startOffset: 3599 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3599 - endOffset: 3689 -- name: Episode Closing & Call to Explore Kestra Content - startOffset: 3689 - url: https://www.youtube.com/watch?v=vXbMUfHE1OE&t=3689 - endOffset: 3689 ---- +context: 'Context: This episode weaves together practical production tips (video, + audio, lighting), hands-on developer programs (hackathons, MLH fellowship), contribution + and onboarding best practices, content/demo strategies, and leadership for scaling + community initiatives. + Core narrative: Empowering developer growth by building repeatable, hands-on learning + and contribution pathways—well-run hackathons, mentorship-driven fellowships, clear + onboarding and demo workflows, and scalable program design—so more people can learn + by doing, successfully contribute to real projects, and transition into lasting + technical roles.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/wrussell1999/){:target="_blank"} diff --git a/_podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md similarity index 94% rename from _podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md rename to _podcast/practical-generative-ai-consulting-from-expertise-to-impact.md index b9930a32..fe7717b5 100644 --- a/_podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.md +++ b/_podcast/practical-generative-ai-consulting-from-expertise-to-impact.md @@ -1,21 +1,144 @@ --- +title: "Launching a Freelance Generative AI Business: NLP Services and Client Acquisition" +short: "From a Research Scientist at Amazon to a Machine Learning/AI Consultant" +season: 16 episode: 5 guests: - verenaweber -date: 2025-11-07 +image: images/podcast/practical-generative-ai-consulting-from-expertise-to-impact.jpg ids: - anchor: atatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr + anchor: datatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr youtube: 4RargY8iOaE -image: images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-a-Research-Scientist-at-Amazon-to-a-Machine-learningAI-Consultant---Verena-Webber-e2bbmgr apple: https://podcasts.apple.com/us/podcast/from-a-research-scientist-at-amazon-to-a/id1541710331?i=1000634411188 spotify: https://open.spotify.com/episode/7gJI3ds3k1vXd3m3W9iRj9?si=oG6A7BuTSjaEoH6FhvEVug youtube: https://www.youtube.com/watch?v=4RargY8iOaE -season: 16 -short: From a Research Scientist at Amazon to a Machine learning/AI Consultant -title: 'Launch a Generative AI Freelance Business: NLP, Model-in-the-Loop Annotation - & Client Pitch' +description: "Learn to launch a freelance generative AI business: package NLP services, master client acquisition and pricing to win projects and scale revenue." +topics: +- AI +- LLMs +- NLP +- freelance +- production +- career transition +intro: "How do you move from research scientist to running a freelance generative AI business focused on NLP—and actually win clients? In this episode Verena Weber, a former Research Scientist at Alexa AI with 7+ years in machine learning and a background in statistics, walks through that transition and what it takes to offer NLP services as a freelancer. Verena's mission is to help companies prepare for the GenAI shift, and she draws on deep NLP expertise to explain which service offerings make sense, how to position technical skills for business clients, and practical approaches to client acquisition in the generative AI space. Listeners will get a clear view of launching a freelance generative AI business, including how to translate research experience into marketable NLP services, approaches to finding and engaging clients, and what to expect when stepping out on your own. If you're a machine learning professional or aspiring NLP freelancer trying to build a sustainable freelance practice in generative AI, this episode provides grounded, experience-based guidance to help you get started." +dateadded: 2023-11-12 +date: 2025-11-07 +duration: PT00H59M53S +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=0 + endOffset: 106 +- name: 'Early Education: From Economics & Chinese to Statistics' + startOffset: 106 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=106 + endOffset: 189 +- name: Discovering Data Science During Master’s Studies + startOffset: 189 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=189 + endOffset: 219 +- name: 'Career Progression: Consulting, In-house Roles, and Platform Data' + startOffset: 219 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=219 + endOffset: 397 +- name: 'Freelance Transition: Becoming a Generative AI Consultant' + startOffset: 397 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=397 + endOffset: 416 +- name: 'Landing a Research Role Without a PhD: Hiring Dynamics' + startOffset: 416 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=416 + endOffset: 641 +- name: 'Amazon Research: Customer-Focused, Production-Oriented Work' + startOffset: 641 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=641 + endOffset: 702 +- name: Using State-of-the-Art Models and Publishing in Industry Tracks + startOffset: 702 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=702 + endOffset: 986 +- name: Prioritizing Business Impact Over Publication Counts + startOffset: 986 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=986 + endOffset: 1069 +- name: 'Research Output: Project Cadence and Paper Frequency' + startOffset: 1069 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1069 + endOffset: 1147 +- name: 'Project Leadership: Ownership, Stakeholders, and Delivery' + startOffset: 1147 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1147 + endOffset: 1391 +- name: 'Model-in-the-Loop Annotation Study: Design and Rationale' + startOffset: 1391 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1391 + endOffset: 1520 +- name: 'Annotation Outcomes: Time Savings and Improved Consistency' + startOffset: 1520 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1520 + endOffset: 1667 +- name: Model Evaluation Strategy and Stabilizing High-Traffic Utterances + startOffset: 1667 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1667 + endOffset: 1903 +- name: 'Why Freelance: Impact on SMEs, Flexibility, and Entrepreneurship' + startOffset: 1903 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1903 + endOffset: 1927 +- name: 'Service Offerings: Generative AI Workshops and Use-Case Discovery' + startOffset: 1927 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1927 + endOffset: 2135 +- name: 'Supporting Women in AI: Goals and Planned Initiatives' + startOffset: 2135 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2135 + endOffset: 2275 +- name: 'Self-Employment Realities: Taxes, Health Insurance, and Admin' + startOffset: 2275 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2275 + endOffset: 2343 +- name: 'Crafting a Pitch Deck: Positioning, Evidence, and Rates' + startOffset: 2343 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2343 + endOffset: 2519 +- name: 'Finding Clients: Network Conversations, Mentorship, and Events' + startOffset: 2519 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2519 + endOffset: 2871 +- name: 'Choosing Generative AI: NLP Passion and Market Opportunity' + startOffset: 2871 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2871 + endOffset: 2948 +- name: 'Tailoring the Deck: Long Format, Short Versions, and Website' + startOffset: 2948 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2948 + endOffset: 3102 +- name: 'Early Leads: LinkedIn Visibility and Network Referrals' + startOffset: 3102 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3102 + endOffset: 3154 +- name: 'Content Strategy: Technical Posts, Papers, and Personal Growth' + startOffset: 3154 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3154 + endOffset: 3213 +- name: 'Side Projects & Wellbeing: Sound Baths and Creative Outlets' + startOffset: 3213 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3213 + endOffset: 3306 +- name: 'Educational Foundation: Statistics, Probability, and Reading Papers' + startOffset: 3306 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3306 + endOffset: 3473 +- name: 'Recommended Resources: Books and Podcasts' + startOffset: 3473 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3473 + endOffset: 3639 +- name: Closing Remarks and Contact Information + startOffset: 3639 + url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3639 + endOffset: 3593 transcript: - header: Episode Introduction - line: This week, we'll talk about being a research scientist at Amazon, and transitioning @@ -79,7 +202,7 @@ transcript: sec: 217 time: '3:37' who: Alexey -- header: 'Career Progression: Consulting, In‑house Roles, and Platform Data' +- header: 'Career Progression: Consulting, In-house Roles, and Platform Data' - line: I did the Master's in Berlin, yeah. This was a joint statistics program from the Free University, Humboldt University, and the Technical University. They have this Joint Master Program. Basically, I discovered data science and machine learning @@ -191,7 +314,7 @@ transcript: sec: 605 time: '10:05' who: Verena -- header: 'Amazon Research: Customer‑Focused, Production‑Oriented Work' +- header: 'Amazon Research: Customer-Focused, Production-Oriented Work' - line: You said that research at Amazon is not the same as research in academia. So what does research at Amazon look like? What do you actually do there? sec: 641 @@ -213,7 +336,7 @@ transcript: sec: 693 time: '11:33' who: Alexey -- header: Using State‑of‑the‑Art Models and Publishing in Industry Tracks +- header: Using State-of-the-Art Models and Publishing in Industry Tracks - line: Yeah. Basically, before you start, there's three things you need to look into. First, “What is the customer problem? How do we define the problem clearly?” Then, second, “What are the possible solutions?” and, “How are they going to solve the @@ -437,7 +560,7 @@ transcript: sec: 1323 time: '22:03' who: Verena -- header: 'Model‑in‑the‑Loop Annotation Study: Design and Rationale' +- header: 'Model-in-the-Loop Annotation Study: Design and Rationale' - line: I'm looking at your Google Scholar, and there is one paper with six citations that you published in 2021, which is, “Is it better to verify semi-supervised learning with a human in the loop or large scale NLU models?” It’s a long name. @@ -526,7 +649,7 @@ transcript: sec: 1662 time: '27:42' who: Verena -- header: Model Evaluation Strategy and Stabilizing High‑Traffic Utterances +- header: Model Evaluation Strategy and Stabilizing High-Traffic Utterances - line: How do you evaluate the performance of these models? I guess, you send it to the annotators – they say “Yes, no.” Right? sec: 1667 @@ -639,7 +762,7 @@ transcript: sec: 1903 time: '31:43' who: Alexey -- header: 'Service Offerings: Generative AI Workshops and Use‑Case Discovery' +- header: 'Service Offerings: Generative AI Workshops and Use-Case Discovery' - line: Yeah. I don't know if it's… I mean, it is different in the sense that I'm not working on one model anymore. But, of course, there is still going to be quite some overlap in terms of topic. As I said before, my goal is to support companies @@ -755,7 +878,7 @@ transcript: sec: 2274 time: '37:54' who: Verena -- header: 'Self‑Employment Realities: Taxes, Health Insurance, and Admin' +- header: 'Self-Employment Realities: Taxes, Health Insurance, and Admin' - line: When I became self-employed this year, I was very surprised by how expensive health insurance is in Germany. Maybe it's less expensive than in the States, but still, when all these costs (all these taxes) are hidden and being a full-time @@ -1232,138 +1355,18 @@ transcript: sec: 3656 time: '1:00:56' who: Alexey -intro: 'How do you turn NLP research experience into a viable generative AI freelance - business — and how do you actually win clients? In this episode, Verena Weber, a - former Research Scientist at Alexa AI with 7+ years in machine learning and a background - in statistics, walks through that transition and the practical work that sells. - We cover launching a freelance generative AI business, designing and running model-in-the-loop - annotation studies (why they save time and improve consistency), model evaluation - strategies for stabilizing high‑traffic utterances, and how to package offerings - like generative AI workshops and use‑case discovery.

Verena also breaks - down the nuts-and-bolts of client acquisition: crafting pitch decks (long and short - formats), positioning, evidence and rates, LinkedIn visibility, network referrals, - events and mentorship. She doesn’t skip the realities of self-employment — taxes, - health insurance and admin — or content strategies to showcase expertise (technical - posts, papers, side projects). Listen to learn concrete steps for becoming an NLP - consultant, running annotation experiments that scale, and pitching value to SMEs - and product teams.' -description: 'Discover how to launch a generative AI freelance business: NLP services, - model-in-the-loop annotation, pitch-deck strategies, client leads & scalable workshops.' -dateadded: '2023-11-12' -duration: PT00H59M53S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=0 - endOffset: 106 -- name: 'Early Education: From Economics & Chinese to Statistics' - startOffset: 106 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=106 - endOffset: 189 -- name: Discovering Data Science During Master’s Studies - startOffset: 189 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=189 - endOffset: 219 -- name: 'Career Progression: Consulting, In‑house Roles, and Platform Data' - startOffset: 219 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=219 - endOffset: 397 -- name: 'Freelance Transition: Becoming a Generative AI Consultant' - startOffset: 397 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=397 - endOffset: 416 -- name: 'Landing a Research Role Without a PhD: Hiring Dynamics' - startOffset: 416 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=416 - endOffset: 641 -- name: 'Amazon Research: Customer‑Focused, Production‑Oriented Work' - startOffset: 641 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=641 - endOffset: 702 -- name: Using State‑of‑the‑Art Models and Publishing in Industry Tracks - startOffset: 702 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=702 - endOffset: 986 -- name: Prioritizing Business Impact Over Publication Counts - startOffset: 986 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=986 - endOffset: 1069 -- name: 'Research Output: Project Cadence and Paper Frequency' - startOffset: 1069 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1069 - endOffset: 1147 -- name: 'Project Leadership: Ownership, Stakeholders, and Delivery' - startOffset: 1147 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1147 - endOffset: 1391 -- name: 'Model‑in‑the‑Loop Annotation Study: Design and Rationale' - startOffset: 1391 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1391 - endOffset: 1520 -- name: 'Annotation Outcomes: Time Savings and Improved Consistency' - startOffset: 1520 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1520 - endOffset: 1667 -- name: Model Evaluation Strategy and Stabilizing High‑Traffic Utterances - startOffset: 1667 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1667 - endOffset: 1903 -- name: 'Why Freelance: Impact on SMEs, Flexibility, and Entrepreneurship' - startOffset: 1903 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1903 - endOffset: 1927 -- name: 'Service Offerings: Generative AI Workshops and Use‑Case Discovery' - startOffset: 1927 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=1927 - endOffset: 2135 -- name: 'Supporting Women in AI: Goals and Planned Initiatives' - startOffset: 2135 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2135 - endOffset: 2275 -- name: 'Self‑Employment Realities: Taxes, Health Insurance, and Admin' - startOffset: 2275 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2275 - endOffset: 2343 -- name: 'Crafting a Pitch Deck: Positioning, Evidence, and Rates' - startOffset: 2343 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2343 - endOffset: 2519 -- name: 'Finding Clients: Network Conversations, Mentorship, and Events' - startOffset: 2519 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2519 - endOffset: 2871 -- name: 'Choosing Generative AI: NLP Passion and Market Opportunity' - startOffset: 2871 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2871 - endOffset: 2948 -- name: 'Tailoring the Deck: Long Format, Short Versions, and Website' - startOffset: 2948 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=2948 - endOffset: 3102 -- name: 'Early Leads: LinkedIn Visibility and Network Referrals' - startOffset: 3102 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3102 - endOffset: 3154 -- name: 'Content Strategy: Technical Posts, Papers, and Personal Growth' - startOffset: 3154 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3154 - endOffset: 3213 -- name: 'Side Projects & Wellbeing: Sound Baths and Creative Outlets' - startOffset: 3213 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3213 - endOffset: 3306 -- name: 'Educational Foundation: Statistics, Probability, and Reading Papers' - startOffset: 3306 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3306 - endOffset: 3473 -- name: 'Recommended Resources: Books and Podcasts' - startOffset: 3473 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3473 - endOffset: 3639 -- name: Closing Remarks and Contact Information - startOffset: 3639 - url: https://www.youtube.com/watch?v=4RargY8iOaE&t=3639 - endOffset: 3593 +context: 'Context: The episode follows a journey from academic foundations in economics, + Chinese, and statistics through industry research and platform data roles to independent + generative-AI consultancy, touching on technical approaches (SOTA models, model-in-the-loop + annotation, evaluation), product and business priorities, client acquisition and + pitching, entrepreneurship realities, and community support initiatives. + + Core: The unifying idea is translating deep technical expertise into pragmatic, + production-oriented generative-AI solutions that deliver measurable business impact—anchored + in rigorous evaluation, stakeholder ownership, evidence-based communication (workshops, + decks, case studies), continual learning, and a commitment to accessibility and + mentorship while managing the practicalities of running a sustainable freelance + practice.' --- Links: diff --git a/_podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md b/_podcast/practical-llm-engineering-and-rag.md similarity index 94% rename from _podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md rename to _podcast/practical-llm-engineering-and-rag.md index e8b46675..e8c1c95b 100644 --- a/_podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.md +++ b/_podcast/practical-llm-engineering-and-rag.md @@ -1,37 +1,131 @@ --- +title: "Practical LLM Engineering and RAG: Prompting, Evaluation and Real-World Workflows" +short: "How to Build and Evaluate AI systems in the Age of LLMs" +season: 22 episode: 4 guests: - hugobowneanderson -date: 2025-11-07 -intro: How do you move from prototypes to reliable, scalable LLM systems that actually - deliver business value?

In this episode, Hugo Bowne‑Anderson—tracing a path - from biology research into Python, PyData, DataCamp curriculum and product work, - then into consulting, teaching, and developer relations—walks through practical - engineering and evaluation patterns for building LLM-driven workflows.

- We cover prompt engineering (role prompts, structured output, timestamps), everyday - LLM use cases (summaries, translation, CSV workflows), transcript pipelines (Gemini, - Descript, Loom) and automation with GitHub Actions. Hugo explains the generator–evaluator - pattern for automated quality control, how to design evaluation sets and failure - analysis, and techniques for logging, traces, and debuggable MVPs.

You’ll - hear when to prioritize RAG (retrieval-augmented generation) and chunking strategies, - when to add tool calls or agents, plus a concrete email assistant build using Gmail - API + RAG. The episode closes with a four‑step framework for agents and guidance - on retrieval‑based vs multi‑turn memory.

If you’re building LLM systems, - this conversation gives actionable tactics for prompt engineering, evaluation, scaling - transcript pipelines, and deciding when to adopt agents, embeddings, and automation. +image: images/podcast/practical-llm-engineering-and-rag.jpg ids: anchor: datatalksclub/episodes/How-to-Build-and-Evaluate-AI-systems-in-the-Age-of-LLMs---Hugo-Bowne-Anderson-e39vt24 youtube: eC3RNuI6ow0 -image: images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/How-to-Build-and-Evaluate-AI-systems-in-the-Age-of-LLMs---Hugo-Bowne-Anderson-e39vt24 apple: https://podcasts.apple.com/us/podcast/how-to-build-and-evaluate-ai-systems-in-the-age-of/id1541710331?i=1000733350691 spotify: https://open.spotify.com/episode/2RD2qXaYa2ZjKjuIE7Aj6O youtube: https://www.youtube.com/watch?v=eC3RNuI6ow0 -season: 22 -short: How to Build and Evaluate AI systems in the Age of LLMs -title: 'Build & Scale LLM Agents and RAG Pipelines: Prompting, Transcript Automation, - Evaluation' +description: "Discover LLM engineering and RAG best practices: practical prompting, evaluation methods and deployment workflows to boost accuracy and retrieval." +topics: +- LLMs +- NLP +- MLOps +- tools + +intro: "How do you move from experimentation to reliable, production-ready LLM engineering and retrieval-augmented generation (RAG)? In this episode Hugo Bowne-Anderson — Head of Developer Relations at Outerbounds, longtime data scientist, educator, and host of Vanishing Gradients — walks through practical patterns for building, evaluating, and scaling real-world LLM workflows.

We cover everyday LLM use cases (summaries, translation, CSV work), prompting best practices (role prompts, structured output, timestamps), and transcript pipelines using Gemini, Descript, Loom and automation with GitHub Actions. Hugo explains the generator–evaluator pattern for automated quality control, how to design evaluation sets and failure analysis, and concrete chunking strategies (fixed length, sliding windows, context rotation) that unlock RAG performance. He also discusses when to add tooling or agentic capabilities, a four-step framework for agents, memory design tradeoffs, and a practical email assistant example using the Gmail API plus RAG.

Listen to learn actionable guidance on prioritizing RAG for quick business wins, building debuggable MVPs with logging and traces, and setting up evaluation and monitoring so your LLMs deliver dependable results in production." +dateadded: 2025-10-27 +date: 2025-11-07 +duration: PT01H01M30S +quotableClips: +- name: Podcast Kickoff & Hugo Bowne-Anderson Background + startOffset: 0 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=0 + endOffset: 72 +- name: 'Vanishing Gradients vs High Signal: Podcast Formats & Audiences' + startOffset: 72 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=72 + endOffset: 124 +- name: 'From Academia to Industry: Biology Research, Python, and PyData' + startOffset: 124 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=124 + endOffset: 207 +- name: 'Early Industry Work: DataCamp Curriculum and Product Roles' + startOffset: 207 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=207 + endOffset: 237 +- name: 'Transition to Freelance: Consulting, Teaching, and DevRel' + startOffset: 237 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=237 + endOffset: 431 +- name: 'Consulting vs Advisory: Hands-On Coding and Organizational Advice' + startOffset: 431 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=431 + endOffset: 504 +- name: 'Driving AI Adoption: Loss Aversion and Dedicated Experimentation Time' + startOffset: 504 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=504 + endOffset: 568 +- name: 'Everyday LLM Use Cases: Summaries, Translation, and CSV Workflows' + startOffset: 568 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=568 + endOffset: 671 +- name: 'Prompting Best Practices: Role Prompts, Structured Output, and Timestamps' + startOffset: 671 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=671 + endOffset: 742 +- name: 'Transcript Workflows: Gemini, Descript, Loom and Automation Tools' + startOffset: 742 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=742 + endOffset: 836 +- name: 'Generator–Evaluator Pattern: Automated Quality Control for Outputs' + startOffset: 836 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=836 + endOffset: 1058 +- name: 'Scaling Transcript Pipelines: Automation with GitHub Actions' + startOffset: 1058 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1058 + endOffset: 1380 +- name: 'Evaluation Sets for LLMs: Gold Tests, Size, Cost, and Representativeness' + startOffset: 1380 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1380 + endOffset: 1603 +- name: 'Failure Analysis: Categorizing Errors and Prioritizing Retrieval Fixes' + startOffset: 1603 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1603 + endOffset: 1658 +- name: 'Vibe Coding & Monitoring: Logging, Traces, and Debuggable MVPs' + startOffset: 1658 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1658 + endOffset: 1916 +- name: 'Developer Tools & Assistants: GitHub Copilot, Cursor, and IDE Agents' + startOffset: 1916 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1916 + endOffset: 1994 +- name: 'Embedded Agents in Workflows: Slack Integration and Proactive Assistants' + startOffset: 1994 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1994 + endOffset: 2412 +- name: 'Agentic Value Beyond Chat: Actions, Documents, and Automation' + startOffset: 2412 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2412 + endOffset: 2666 +- name: 'Prioritizing RAG: Quick Business Wins with Chunking and Embeddings' + startOffset: 2666 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2666 + endOffset: 2900 +- name: 'Chunking Strategies: Fixed Length, Sliding Windows, and Context Rot' + startOffset: 2900 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2900 + endOffset: 3019 +- name: 'When to Add Tooling: Moving from RAG to Agents and Tool Calls' + startOffset: 3019 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3019 + endOffset: 3214 +- name: 'Practical Build: Email Assistant Example using Gmail API + RAG' + startOffset: 3214 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3214 + endOffset: 3381 +- name: 'Four-Step Framework for Agents: Problem, Start Small, Data, Evaluation' + startOffset: 3381 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3381 + endOffset: 3461 +- name: 'Memory Design: Retrieval-Based Memory vs Multi-Turn Conversation Memory' + startOffset: 3461 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3461 + endOffset: 3655 +- name: 'Episode Wrap-Up: Key Takeaways, Courses, and Next Steps' + startOffset: 3655 + url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3655 + endOffset: 3690 transcript: - header: Episode Introduction & Guest Bio - line: This week we will talk about LLMs and AI like everyone else, I guess. @@ -1131,112 +1225,18 @@ transcript: sec: 3690 time: '1:01:30' who: Alexey -description: Build LLM agents and RAG pipelines using prompting, transcript automation, - and evaluation to scale systems - learn chunking, monitoring, and practical build - steps. -dateadded: '2025-10-27' -duration: PT01H01M30S -quotableClips: -- name: Podcast Kickoff & Hugo Bowne‑Anderson Background - startOffset: 0 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=0 - endOffset: 72 -- name: 'Vanishing Gradients vs High Signal: Podcast Formats & Audiences' - startOffset: 72 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=72 - endOffset: 124 -- name: 'From Academia to Industry: Biology Research, Python, and PyData' - startOffset: 124 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=124 - endOffset: 207 -- name: 'Early Industry Work: DataCamp Curriculum and Product Roles' - startOffset: 207 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=207 - endOffset: 237 -- name: 'Transition to Freelance: Consulting, Teaching, and DevRel' - startOffset: 237 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=237 - endOffset: 431 -- name: 'Consulting vs Advisory: Hands‑On Coding and Organizational Advice' - startOffset: 431 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=431 - endOffset: 504 -- name: 'Driving AI Adoption: Loss Aversion and Dedicated Experimentation Time' - startOffset: 504 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=504 - endOffset: 568 -- name: 'Everyday LLM Use Cases: Summaries, Translation, and CSV Workflows' - startOffset: 568 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=568 - endOffset: 671 -- name: 'Prompting Best Practices: Role Prompts, Structured Output, and Timestamps' - startOffset: 671 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=671 - endOffset: 742 -- name: 'Transcript Workflows: Gemini, Descript, Loom and Automation Tools' - startOffset: 742 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=742 - endOffset: 836 -- name: 'Generator–Evaluator Pattern: Automated Quality Control for Outputs' - startOffset: 836 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=836 - endOffset: 1058 -- name: 'Scaling Transcript Pipelines: Automation with GitHub Actions' - startOffset: 1058 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1058 - endOffset: 1380 -- name: 'Evaluation Sets for LLMs: Gold Tests, Size, Cost, and Representativeness' - startOffset: 1380 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1380 - endOffset: 1603 -- name: 'Failure Analysis: Categorizing Errors and Prioritizing Retrieval Fixes' - startOffset: 1603 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1603 - endOffset: 1658 -- name: 'Vibe Coding & Monitoring: Logging, Traces, and Debuggable MVPs' - startOffset: 1658 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1658 - endOffset: 1916 -- name: 'Developer Tools & Assistants: GitHub Copilot, Cursor, and IDE Agents' - startOffset: 1916 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1916 - endOffset: 1994 -- name: 'Embedded Agents in Workflows: Slack Integration and Proactive Assistants' - startOffset: 1994 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=1994 - endOffset: 2412 -- name: 'Agentic Value Beyond Chat: Actions, Documents, and Automation' - startOffset: 2412 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2412 - endOffset: 2666 -- name: 'Prioritizing RAG: Quick Business Wins with Chunking and Embeddings' - startOffset: 2666 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2666 - endOffset: 2900 -- name: 'Chunking Strategies: Fixed Length, Sliding Windows, and Context Rot' - startOffset: 2900 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=2900 - endOffset: 3019 -- name: 'When to Add Tooling: Moving from RAG to Agents and Tool Calls' - startOffset: 3019 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3019 - endOffset: 3214 -- name: 'Practical Build: Email Assistant Example using Gmail API + RAG' - startOffset: 3214 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3214 - endOffset: 3381 -- name: 'Four‑Step Framework for Agents: Problem, Start Small, Data, Evaluation' - startOffset: 3381 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3381 - endOffset: 3461 -- name: 'Memory Design: Retrieval‑Based Memory vs Multi‑Turn Conversation Memory' - startOffset: 3461 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3461 - endOffset: 3655 -- name: 'Episode Wrap‑Up: Key Takeaways, Courses, and Next Steps' - startOffset: 3655 - url: https://www.youtube.com/watch?v=eC3RNuI6ow0&t=3655 - endOffset: 3690 +context: 'Context: This episode surveys practical, hands-on patterns—RAG, chunking, + prompting, generator–evaluator workflows, transcript pipelines, evaluation sets, + monitoring, agents, memory design, and developer tooling—drawn from moving models + from prototypes into real products across consulting, DevRel, and engineering roles. + + Core: The unifying idea is pragmatic, iterative engineering of LLM-powered systems: + prioritize retrieval-first solutions that deliver immediate business value, instrument + rigorous evaluation and monitoring (gold tests, failure analysis, generator–evaluator), + automate pipelines and reproducible workflows, and only escalate to agentic tooling + or persistent memory once data, metrics, and clear ROI justify the added complexity—treating + AI as an integrated augmentation that must be built, tested, and scaled with standard + software engineering practices.' --- Links: diff --git a/_podcast/s15e04-good-bad-and-ugly-of-gpt.md b/_podcast/practical-llm-use-cases-and-product-patterns.md similarity index 96% rename from _podcast/s15e04-good-bad-and-ugly-of-gpt.md rename to _podcast/practical-llm-use-cases-and-product-patterns.md index ce8a0e76..0462eeaa 100644 --- a/_podcast/s15e04-good-bad-and-ugly-of-gpt.md +++ b/_podcast/practical-llm-use-cases-and-product-patterns.md @@ -1,19 +1,117 @@ --- +title: "LLM Value Creation: GPT Communities, Business Use Cases & Human-in-the-Loop AI Applications" +short: "The Good, the Bad and the Ugly of GPT" +season: 15 episode: 4 guests: - sandrakublik +image: images/podcast/practical-llm-use-cases-and-product-patterns.jpg ids: - anchor: atatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 + anchor: datatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 youtube: bM6AR4A-f98 -image: images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/The-Good--the-Bad-and-the-Ugly-of-GPT---Sandra-Kublik-e27o8r4 apple: https://podcasts.apple.com/us/podcast/the-good-the-bad-and-the-ugly-of-gpt-sandra-kublik/id1541710331?i=1000623464507 spotify: https://open.spotify.com/episode/5fZ89re1YLiVZ7QNxdoKVH?si=pD96Dv_tRvaHci5N8PZv9g youtube: https://www.youtube.com/watch?v=bM6AR4A-f98 -season: 15 -short: The Good, the Bad and the Ugly of GPT -title: 'Build Secure LLM Apps: GPT, Prompt Engineering, Embeddings & Semantic Search' + +description: "Create real business value with LLMs: from early GPT communities to production applications that actually work." +intro: "How do you create real business value with LLMs — from early GPT communities to production applications that actually work? In this episode, Sandra Kublik — AI entrepreneur, community builder, and author on GPT — shares a practical, entrepreneurial perspective on building LLM-powered products that deliver results.

Sandra traces her journey through the early GPT community (Nextgrid, Lablab.AI, YouTube) and breaks down proven business use cases: text generation workflows, semantic search with embeddings, and domain-specific AI assistants. You'll get actionable guidance on product patterns, prompt engineering techniques, and the critical human-in-the-loop requirements for reliable AI applications. The conversation covers real-world trade-offs between proprietary and open-source models, security considerations for enterprise deployment, and practical strategies to mitigate hallucinations while maintaining brand safety.

Listen for concrete frameworks to evaluate LLM integration opportunities, a 7-day experiment to test LLMs in your workflow, and proven patterns for scaling AI applications from prototype to production. Sandra shares resources on YouTube, X, and LinkedIn for continued learning and implementation examples." +topics: +- LLMs +dateadded: 2023-08-06 +duration: PT01H09S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=0 + endOffset: 75 +- name: 'Guest Introduction: Sandra Kublik, AI entrepreneur and GPT-3 author' + startOffset: 75 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=75 + endOffset: 146 +- name: 'LLM Landscape: Why GPT and large language models are everywhere' + startOffset: 146 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=146 + endOffset: 185 +- name: 'Career Journey: Nextgrid, Lablab.AI and YouTube entry into AI' + startOffset: 185 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=185 + endOffset: 488 +- name: 'Early GPT Community: Gaining access and demo-driven growth' + startOffset: 488 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=488 + endOffset: 600 +- name: 'GPT & LLM Business Use Cases: Text generation, embeddings, and semantic search' + startOffset: 600 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=600 + endOffset: 953 +- name: 'Cohere Focus: Community building and LLM education' + startOffset: 953 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=953 + endOffset: 1002 +- name: 'Market Adoption: Startups, VC interest, and generative AI trends' + startOffset: 1002 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1002 + endOffset: 1162 +- name: 'LLMs as Amplifiers: Impact on authenticity and content scaling' + startOffset: 1162 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1162 + endOffset: 1409 +- name: 'Human-in-the-Loop: Hallucinations, brand safety, and editorial curation' + startOffset: 1409 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1409 + endOffset: 1676 +- name: 'Specialist Assistants: Secure, domain-specific chatbots for professionals' + startOffset: 1676 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1676 + endOffset: 1948 +- name: 'Building LLM Apps: Model choice, architecture, and integration trade-offs' + startOffset: 1948 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1948 + endOffset: 2128 +- name: 'Proprietary vs Open Source: Cost, latency, IP and data risk considerations' + startOffset: 2128 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2128 + endOffset: 2241 +- name: 'Prompt Engineering: Iteration, examples, and prompt whisperer techniques' + startOffset: 2241 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2241 + endOffset: 2421 +- name: 'Fine-Tuning & Embeddings: Domain adaptation and semantic retrieval' + startOffset: 2421 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2421 + endOffset: 2672 +- name: 'Prompt Tips: Providing examples, context, and SEO-focused instructions' + startOffset: 2672 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2672 + endOffset: 3061 +- name: '7-Day LLM Experiment: Integrating language models into daily workflow' + startOffset: 3061 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3061 + endOffset: 3363 +- name: 'Productivity Tools: Email assistants and content automation extensions' + startOffset: 3363 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3363 + endOffset: 3484 +- name: 'Learning Resources: LLM University, Cohere blog, and recommended readings' + startOffset: 3484 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3484 + endOffset: 3630 +- name: 'Contact & Social: Where to find Sandra online (YouTube, X, LinkedIn)' + startOffset: 3630 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3630 + endOffset: 3664 +- name: Episode Wrap-Up and Next Steps + startOffset: 3664 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3664 + endOffset: 3684 +- name: Closing Remarks + startOffset: 3684 + url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3684 + endOffset: 3609 + transcript: - header: Podcast Introduction - header: 'Guest Introduction: Sandra Kublik, AI entrepreneur and GPT-3 author' @@ -900,114 +998,6 @@ transcript: sec: 3684 time: '1:01:24' who: Sandra -description: 'Build secure LLM apps with GPT: master prompt engineering and embeddings - to cut hallucinations, protect data, scale workflows, and boost content ROI.' -intro: 'How do you build secure LLM apps that use GPT, embeddings and semantic search - while avoiding hallucinations and data risk? In this episode, Sandra Kublik — AI - entrepreneur, community builder, and author on GPT — walks through practical trade-offs - for building production LLM systems.

Sandra traces the LLM landscape and - her career (Nextgrid, Lablab.AI, YouTube), then digs into real-world use cases like - text generation, semantic retrieval with embeddings, and domain-specific chatbots. - You’ll hear guidance on model choice, architecture, proprietary vs open source trade-offs - (cost, latency, IP and data risk), and concrete prompt engineering techniques including - examples, iteration strategies, and “prompt whisperer” tips. The conversation covers - security and quality: human-in-the-loop workflows to mitigate hallucinations, brand - safety, and editorial curation, plus fine-tuning and semantic search strategies - for domain adaptation.

Listeners get a practical value proposition: frameworks - to evaluate LLM security and integration trade-offs, a 7-day experiment to embed - LLMs into your workflow, and pointers to productivity tools and learning resources. - Find Sandra on YouTube, X, and LinkedIn for follow-up resources and examples.' -dateadded: '2023-08-06' -duration: PT01H09S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=0 - endOffset: 75 -- name: 'Guest Introduction: Sandra Kublik, AI entrepreneur and GPT-3 author' - startOffset: 75 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=75 - endOffset: 146 -- name: 'LLM Landscape: Why GPT and large language models are everywhere' - startOffset: 146 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=146 - endOffset: 185 -- name: 'Career Journey: Nextgrid, Lablab.AI and YouTube entry into AI' - startOffset: 185 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=185 - endOffset: 488 -- name: 'Early GPT Community: Gaining access and demo-driven growth' - startOffset: 488 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=488 - endOffset: 600 -- name: 'GPT & LLM Business Use Cases: Text generation, embeddings, and semantic search' - startOffset: 600 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=600 - endOffset: 953 -- name: 'Cohere Focus: Community building and LLM education' - startOffset: 953 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=953 - endOffset: 1002 -- name: 'Market Adoption: Startups, VC interest, and generative AI trends' - startOffset: 1002 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1002 - endOffset: 1162 -- name: 'LLMs as Amplifiers: Impact on authenticity and content scaling' - startOffset: 1162 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1162 - endOffset: 1409 -- name: 'Human-in-the-Loop: Hallucinations, brand safety, and editorial curation' - startOffset: 1409 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1409 - endOffset: 1676 -- name: 'Specialist Assistants: Secure, domain-specific chatbots for professionals' - startOffset: 1676 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1676 - endOffset: 1948 -- name: 'Building LLM Apps: Model choice, architecture, and integration trade-offs' - startOffset: 1948 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=1948 - endOffset: 2128 -- name: 'Proprietary vs Open Source: Cost, latency, IP and data risk considerations' - startOffset: 2128 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2128 - endOffset: 2241 -- name: 'Prompt Engineering: Iteration, examples, and prompt whisperer techniques' - startOffset: 2241 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2241 - endOffset: 2421 -- name: 'Fine-Tuning & Embeddings: Domain adaptation and semantic retrieval' - startOffset: 2421 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2421 - endOffset: 2672 -- name: 'Prompt Tips: Providing examples, context, and SEO-focused instructions' - startOffset: 2672 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=2672 - endOffset: 3061 -- name: '7-Day LLM Experiment: Integrating language models into daily workflow' - startOffset: 3061 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3061 - endOffset: 3363 -- name: 'Productivity Tools: Email assistants and content automation extensions' - startOffset: 3363 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3363 - endOffset: 3484 -- name: 'Learning Resources: LLM University, Cohere blog, and recommended readings' - startOffset: 3484 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3484 - endOffset: 3630 -- name: 'Contact & Social: Where to find Sandra online (YouTube, X, LinkedIn)' - startOffset: 3630 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3630 - endOffset: 3664 -- name: Episode Wrap-Up and Next Steps - startOffset: 3664 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3664 - endOffset: 3684 -- name: Closing Remarks - startOffset: 3684 - url: https://www.youtube.com/watch?v=bM6AR4A-f98&t=3684 - endOffset: 3609 --- Links: diff --git a/_podcast/s15e07-pragmatic-and-standardized-mlops.md b/_podcast/pragmatic-and-standardized-mlops.md similarity index 95% rename from _podcast/s15e07-pragmatic-and-standardized-mlops.md rename to _podcast/pragmatic-and-standardized-mlops.md index b0a00eea..6cfa9458 100644 --- a/_podcast/s15e07-pragmatic-and-standardized-mlops.md +++ b/_podcast/pragmatic-and-standardized-mlops.md @@ -1,20 +1,157 @@ --- +title: "Pragmatic MLOps: Build Standardized CI/CD, Model Registries, Monitoring & Org Best Practices" +short: "Pragmatic and Standardized MLOps" +season: 15 episode: 7 guests: - mariavechtomova +image: images/podcast/pragmatic-and-standardized-mlops.jpg ids: - anchor: lub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv + anchor: datatalksclub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv youtube: q3DTR3Od1MA -image: images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg links: anchor: https://podcasters.spotify.com/datatalksclub/episodes/Pragmatic-and-Standardized-MLOps---Maria-Vechtomova-e292ksv apple: https://podcasts.apple.com/us/podcast/pragmatic-and-standardized-mlops-maria-vechtomova/id1541710331?i=1000627227242 spotify: https://open.spotify.com/episode/5UZPZTDllam3RrbI9sOyqS?si=Ghm1oD8bSFS6l0ULDlatpQ youtube: https://www.youtube.com/watch?v=q3DTR3Od1MA -season: 15 -short: Pragmatic and Standardized MLOps -title: 'Pragmatic MLOps: Build Standardized CI/CD, Model Registries, Monitoring & - Org Best Practices' + +description: "Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring to boost reproducibility, deployment reliability, and team productivity." +intro: "How do you build pragmatic, standardized MLOps across teams without chasing every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager with roots in econometrics and early work moving from R to Python — tackles MLOps as an organizational challenge, not just a technology problem.

Maria walks through core, actionable topics: building reusable CI/CD and standardized repos, choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines central MLOps responsibilities — infrastructure, registries, deployment patterns, and monitoring — and contrasts centralized platform teams with embedded feature teams and guardrails. You’ll hear practical advice on moving logic out of notebooks into packages and pipelines, conducting maturity assessments (reproducibility, testing, documentation), and securing DevOps buy-in. The conversation also covers monitoring standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus retail use cases like demand forecasting and personalization.

Listen to learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring — and how to prioritize organizational change to make MLOps work in production." +topics: +- MLOps +dateadded: 2023-09-25 + +duration: PT00H57M05S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=0 + endOffset: 101 +- name: 'Episode Overview: Pragmatic and Standardized MLOps with Maria Vechtomova' + startOffset: 101 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=101 + endOffset: 187 +- name: 'Background: Early career in data, econometrics, R to Python, and early MLOps + work' + startOffset: 187 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=187 + endOffset: 345 +- name: 'Early MLOps stacks: Teradata Aster, custom metadata, and orchestration' + startOffset: 345 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=345 + endOffset: 483 +- name: 'Role Overview: MLOps Tech Lead / Manager of Machine Learning Engineering' + startOffset: 483 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=483 + endOffset: 585 +- name: 'Marvelous MLOps: blog, LinkedIn presence, and content cadence' + startOffset: 585 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=585 + endOffset: 670 +- name: 'Defining MLOps: enablement, reproducibility, and teaching data scientists' + startOffset: 670 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=670 + endOffset: 762 +- name: 'Central MLOps team responsibilities: infrastructure, reusable CI/CD, and + monitoring' + startOffset: 762 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=762 + endOffset: 885 +- name: 'Toollandscape overload: MAD landscape, FOMO, and organizational challenges' + startOffset: 885 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=885 + endOffset: 987 +- name: 'Pragmatic MLOps: leverage existing infra (Kubernetes, Git, CI/CD) not new + tools' + startOffset: 987 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=987 + endOffset: 1121 +- name: 'Essential MLOps stack: version control, CI/CD, registries, model registry, + deployment, monitoring' + startOffset: 1121 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1121 + endOffset: 1249 +- name: 'Model artifacts & registry options: Artifactory, S3, and MLflow alternatives' + startOffset: 1249 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1249 + endOffset: 1343 +- name: 'MLOps maturity assessment: documentation, reproducibility, code quality, + and testing' + startOffset: 1343 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1343 + endOffset: 1441 +- name: 'Startup priorities: reproducibility, versioning, traceability as first steps' + startOffset: 1441 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1441 + endOffset: 1626 +- name: 'Team organization: centralized MLOps vs. embedded feature teams and guardrails' + startOffset: 1626 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1626 + endOffset: 1795 +- name: 'Standardization: cookie-cutter repos, service principals, and Databricks + integration' + startOffset: 1795 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1795 + endOffset: 2004 +- name: 'Production best practices: move logic from notebooks to packages and CI/CD + pipelines' + startOffset: 2004 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2004 + endOffset: 2069 +- name: 'Implementation timeline: technical build vs. organizational buy-in and permissions' + startOffset: 2069 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2069 + endOffset: 2121 +- name: 'Securing DevOps buy-in: expose pain, deliver standards, and enable internal + audit' + startOffset: 2121 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2121 + endOffset: 2281 +- name: 'Team composition: small senior ML engineering team building MLOps platform' + startOffset: 2281 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2281 + endOffset: 2369 +- name: 'Tool-agnostic skills: learn fundamentals and stitch tools together end-to-end' + startOffset: 2369 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2369 + endOffset: 2573 +- name: 'Roadmap priorities: monitoring standardization, A/B testing, and LLM pilots' + startOffset: 2573 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2573 + endOffset: 2744 +- name: 'LLM Ops perspective: hype, cost, GPU constraints, and multilingual limits' + startOffset: 2744 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2744 + endOffset: 2982 +- name: 'Retail use cases: demand forecasting, personalization, and loyalty programs' + startOffset: 2982 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2982 + endOffset: 3084 +- name: 'Cross-brand model: centralized MLOps support for smaller brands and cooperation + with large brands' + startOffset: 3084 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3084 + endOffset: 3245 +- name: 'Learning recommendations: hands-on projects, MLOps Zoomcamp, and pairing + with engineers' + startOffset: 3245 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3245 + endOffset: 3368 +- name: 'Skill balance: ML fundamentals plus software engineering and system design' + startOffset: 3368 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3368 + endOffset: 3434 +- name: 'Data engineering importance: pipeline design, optimization, and data quality + for MLOps' + startOffset: 3434 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3434 + endOffset: 3496 +- name: 'Closing Remarks: upcoming course, LLM updates, and follow Marvelous MLOps' + startOffset: 3496 + url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3496 + endOffset: 3425 + transcript: - header: Podcast Introduction - header: 'Episode Overview: Pragmatic and Standardized MLOps with Maria Vechtomova' @@ -550,7 +687,7 @@ transcript: sec: 1722 time: '28:42' who: Alexey -- header: 'Standardization: cookie‑cutter repos, service principals, and Databricks +- header: 'Standardization: cookie-cutter repos, service principals, and Databricks integration' - line: 'Yeah, I guess so. I think the choice of the tooling is related to standardization. We work in a large corporate organization with 19 brands all over the world – @@ -616,7 +753,7 @@ transcript: sec: 2004 time: '33:24' who: Maria -- header: 'Implementation timeline: technical build vs. organizational buy‑in and +- header: 'Implementation timeline: technical build vs. organizational buy-in and permissions' - line: Amazing. How long did it take to implement this? sec: 2069 @@ -647,7 +784,7 @@ transcript: sec: 2119 time: '35:19' who: Maria -- header: 'Securing DevOps buy‑in: expose pain, deliver standards, and enable internal +- header: 'Securing DevOps buy-in: expose pain, deliver standards, and enable internal audit' - line: Did you do this yourself as a tech lead? [Maria agrees] Do you have any tips on how to address that if somebody is also facing some hesitation from the DevOps @@ -744,7 +881,7 @@ transcript: sec: 2350 time: '39:10' who: Maria -- header: 'Tool‑agnostic skills: learn fundamentals and stitch tools together end-to-end' +- header: 'Tool-agnostic skills: learn fundamentals and stitch tools together end-to-end' - line: What is also interesting, and the reason I'm asking that, is because in our MLOps course, we try to cover the fundamentals. We break down what we think MLOps is into multiple areas, which is something like experiment tracking, machine learning @@ -971,7 +1108,7 @@ transcript: sec: 3067 time: '51:07' who: Maria -- header: 'Cross‑brand model: centralized MLOps support for smaller brands and cooperation +- header: 'Cross-brand model: centralized MLOps support for smaller brands and cooperation with large brands' - line: Does each of these brands have a separate team – and separate a bunch of teams – for data science and they do data science separately from the rest of the organization? @@ -1020,7 +1157,7 @@ transcript: sec: 3204 time: '53:24' who: Alexey -- header: 'Learning recommendations: hands‑on projects, MLOps Zoomcamp, and pairing +- header: 'Learning recommendations: hands-on projects, MLOps Zoomcamp, and pairing with engineers' - line: There is one question, “What is the course that you take to become an MLOps engineer?” @@ -1143,154 +1280,6 @@ transcript: sec: 3526 time: '58:46' who: Alexey -description: 'Learn pragmatic MLOps: standardize CI/CD, model registry and monitoring - to boost reproducibility, deployment reliability, and team productivity.' -intro: 'How do you build pragmatic, standardized MLOps across teams without chasing - every new tool? In this episode, Maria Vechtomova — an MLOps tech lead and manager - with roots in econometrics and early work moving from R to Python — tackles MLOps - as an organizational challenge, not just a technology problem.

Maria walks - through core, actionable topics: building reusable CI/CD and standardized repos, - choosing model artifact and registry strategies (Artifactory, S3, MLflow alternatives), - and leveraging existing infra like Kubernetes, Git, and CI systems. She outlines - central MLOps responsibilities — infrastructure, registries, deployment patterns, - and monitoring — and contrasts centralized platform teams with embedded feature - teams and guardrails. You’ll hear practical advice on moving logic out of notebooks - into packages and pipelines, conducting maturity assessments (reproducibility, testing, - documentation), and securing DevOps buy‑in. The conversation also covers monitoring - standardization, A/B testing, early LLM pilots and their cost/GPU constraints, plus - retail use cases like demand forecasting and personalization.

Listen to - learn concrete steps for implementing CI/CD, model versioning, registries, and monitoring - — and how to prioritize organizational change to make MLOps work in production.' -dateadded: '2023-09-25' -duration: PT00H57M05S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=0 - endOffset: 101 -- name: 'Episode Overview: Pragmatic and Standardized MLOps with Maria Vechtomova' - startOffset: 101 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=101 - endOffset: 187 -- name: 'Background: Early career in data, econometrics, R to Python, and early MLOps - work' - startOffset: 187 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=187 - endOffset: 345 -- name: 'Early MLOps stacks: Teradata Aster, custom metadata, and orchestration' - startOffset: 345 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=345 - endOffset: 483 -- name: 'Role Overview: MLOps Tech Lead / Manager of Machine Learning Engineering' - startOffset: 483 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=483 - endOffset: 585 -- name: 'Marvelous MLOps: blog, LinkedIn presence, and content cadence' - startOffset: 585 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=585 - endOffset: 670 -- name: 'Defining MLOps: enablement, reproducibility, and teaching data scientists' - startOffset: 670 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=670 - endOffset: 762 -- name: 'Central MLOps team responsibilities: infrastructure, reusable CI/CD, and - monitoring' - startOffset: 762 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=762 - endOffset: 885 -- name: 'Toollandscape overload: MAD landscape, FOMO, and organizational challenges' - startOffset: 885 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=885 - endOffset: 987 -- name: 'Pragmatic MLOps: leverage existing infra (Kubernetes, Git, CI/CD) not new - tools' - startOffset: 987 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=987 - endOffset: 1121 -- name: 'Essential MLOps stack: version control, CI/CD, registries, model registry, - deployment, monitoring' - startOffset: 1121 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1121 - endOffset: 1249 -- name: 'Model artifacts & registry options: Artifactory, S3, and MLflow alternatives' - startOffset: 1249 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1249 - endOffset: 1343 -- name: 'MLOps maturity assessment: documentation, reproducibility, code quality, - and testing' - startOffset: 1343 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1343 - endOffset: 1441 -- name: 'Startup priorities: reproducibility, versioning, traceability as first steps' - startOffset: 1441 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1441 - endOffset: 1626 -- name: 'Team organization: centralized MLOps vs. embedded feature teams and guardrails' - startOffset: 1626 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1626 - endOffset: 1795 -- name: 'Standardization: cookie‑cutter repos, service principals, and Databricks - integration' - startOffset: 1795 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=1795 - endOffset: 2004 -- name: 'Production best practices: move logic from notebooks to packages and CI/CD - pipelines' - startOffset: 2004 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2004 - endOffset: 2069 -- name: 'Implementation timeline: technical build vs. organizational buy‑in and permissions' - startOffset: 2069 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2069 - endOffset: 2121 -- name: 'Securing DevOps buy‑in: expose pain, deliver standards, and enable internal - audit' - startOffset: 2121 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2121 - endOffset: 2281 -- name: 'Team composition: small senior ML engineering team building MLOps platform' - startOffset: 2281 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2281 - endOffset: 2369 -- name: 'Tool‑agnostic skills: learn fundamentals and stitch tools together end-to-end' - startOffset: 2369 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2369 - endOffset: 2573 -- name: 'Roadmap priorities: monitoring standardization, A/B testing, and LLM pilots' - startOffset: 2573 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2573 - endOffset: 2744 -- name: 'LLM Ops perspective: hype, cost, GPU constraints, and multilingual limits' - startOffset: 2744 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2744 - endOffset: 2982 -- name: 'Retail use cases: demand forecasting, personalization, and loyalty programs' - startOffset: 2982 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=2982 - endOffset: 3084 -- name: 'Cross‑brand model: centralized MLOps support for smaller brands and cooperation - with large brands' - startOffset: 3084 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3084 - endOffset: 3245 -- name: 'Learning recommendations: hands‑on projects, MLOps Zoomcamp, and pairing - with engineers' - startOffset: 3245 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3245 - endOffset: 3368 -- name: 'Skill balance: ML fundamentals plus software engineering and system design' - startOffset: 3368 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3368 - endOffset: 3434 -- name: 'Data engineering importance: pipeline design, optimization, and data quality - for MLOps' - startOffset: 3434 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3434 - endOffset: 3496 -- name: 'Closing Remarks: upcoming course, LLM updates, and follow Marvelous MLOps' - startOffset: 3496 - url: https://www.youtube.com/watch?v=q3DTR3Od1MA&t=3496 - endOffset: 3425 --- Links: diff --git a/_podcast/s06e04-becoming-a-data-product-manager.md b/_podcast/product-designer-to-data-product-manager.md similarity index 97% rename from _podcast/s06e04-becoming-a-data-product-manager.md rename to _podcast/product-designer-to-data-product-manager.md index 601487c0..14851fc3 100644 --- a/_podcast/s06e04-becoming-a-data-product-manager.md +++ b/_podcast/product-designer-to-data-product-manager.md @@ -1,12 +1,11 @@ --- -title: 'How to Transition from Design to Data Product Manager: SQL, Customer Discovery - & Data Quality' -short: Becoming a Data Product Manager -guests: -- saramenefee -image: images/podcast/s06e04-becoming-a-data-product-manager.jpg +title: "How to Transition from Design to Data Product Manager: SQL, Customer Discovery & Data Quality" +short: "Becoming a Data Product Manager" season: 6 episode: 4 +guests: +- saramenefee +image: images/podcast/product-designer-to-data-product-manager.jpg ids: youtube: nt__pVuuC-k anchor: Becoming-a-Data-Product-Manager---Sara-Menefee-e1arc4a @@ -15,6 +14,111 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Data-Product-Manager---Sara-Menefee-e1arc4a spotify: https://open.spotify.com/episode/3NZhd5kgQFpGckyxTQH9bF apple: https://podcasts.apple.com/us/podcast/becoming-a-data-product-manager-sara-menefee/id1541710331?i=1000543165093 + +description: "Learn to transition from product design to Data Product Manager: master SQL, customer discovery, build a portfolio and lead analytics products." +intro: "How do you move from product design into a data product manager role — and which technical and discovery skills will make that transition practical and persuasive? Sara Menefee, a product manager at Meroxa and former product designer at Sora, Checkr, Change.org, and Zendesk, walks through her path and the concrete steps designers can take to become data-focused PMs.

This episode covers customer discovery and hypothesis formation, SQL and data engineering fundamentals, and the operational realities of data product management: data quality, PII/compliance, and the data lifecycle from sources to warehouses and apps. Sara explains how design thinking and PM–designer collaboration inform discovery and prioritization, and lays out a transition strategy that emphasizes networking, on-the-job learning, mentorship, and a portfolio built around case-study structure (problem, research, solution, outcome). You’ll also hear practical workflows — standups, analytics, customer development interviews — plus documentation-first practices (PRDs, knowledge bases), resource recommendations (including Reforge), and where ML and data science fit into the PM role.

Listen for actionable steps, portfolio guidance, and the technical literacy (SQL, documentation, data curiosity) you''ll need to move from design to data product manager." +topics: +- career transition +- product design +- product management +dateadded: 2021-11-26 + +duration: PT01H01M07S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=0 + endOffset: 87 +- name: 'Career Path: From Technical Support to Product Design' + startOffset: 87 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=87 + endOffset: 298 +- name: 'Product Design: User Research, Prototyping & UX' + startOffset: 298 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=298 + endOffset: 424 +- name: 'Data Product Management: Customer Discovery & Hypothesis Formation' + startOffset: 424 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=424 + endOffset: 698 +- name: 'Product Lifecycle: Discovery, Planning, Engineering & Launch' + startOffset: 698 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=698 + endOffset: 910 +- name: 'Design Thinking: PM–Designer Collaboration in Ideation' + startOffset: 910 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=910 + endOffset: 986 +- name: 'Transition Motivation: Moving from Design to Product Management' + startOffset: 986 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=986 + endOffset: 1178 +- name: 'Data-focused PM: Data Quality, PII & Compliance Considerations' + startOffset: 1178 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1178 + endOffset: 1380 +- name: 'Core Technical Skills: SQL & Data Engineering Fundamentals' + startOffset: 1380 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1380 + endOffset: 1470 +- name: 'Essential Traits: Data Curiosity, Documentation Literacy & Empathy' + startOffset: 1470 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1470 + endOffset: 1593 +- name: 'Data Lifecycle: Sources, Transformation, Warehouses & Apps' + startOffset: 1593 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1593 + endOffset: 1710 +- name: 'Transition Strategy: Networking, On-the-Job Learning & Mentorship' + startOffset: 1710 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1710 + endOffset: 1980 +- name: 'Practical Steps: Building a Portfolio & Learning After the Switch' + startOffset: 1980 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1980 + endOffset: 2151 +- name: 'Case Study Structure: Problem, Research, Solution & Outcome' + startOffset: 2151 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2151 + endOffset: 2344 +- name: 'Learning Resources: Courses, Reforge & Recommended Reading' + startOffset: 2344 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2344 + endOffset: 2761 +- name: 'Daily Workflow: Standups, Analytics, CusDev & Context Switching' + startOffset: 2761 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2761 + endOffset: 2977 +- name: 'Customer Development: Interview Focus & Tactical Questions' + startOffset: 2977 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2977 + endOffset: 3115 +- name: 'Key Insight: Data Teams Spend Time Educating the Organization' + startOffset: 3115 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3115 + endOffset: 3249 +- name: 'Adopting New Tools: Documentation First, Pairing & Slack Help' + startOffset: 3249 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3249 + endOffset: 3368 +- name: 'Product Documentation: PRDs, Customer Notes & Knowledge Base' + startOffset: 3368 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3368 + endOffset: 3504 +- name: 'Idea Flow: Sources, Validation & Backlog Prioritization' + startOffset: 3504 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3504 + endOffset: 3640 +- name: 'Analytics vs Data Science: Where ML Fits in the PM Role' + startOffset: 3640 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3640 + endOffset: 3697 +- name: Closing Remarks & How to Reach Out + startOffset: 3697 + url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3697 + endOffset: 3667 + transcript: - header: Podcast Introduction - line: This week, we will talk about a data product manager. We have a special guest @@ -1057,120 +1161,6 @@ transcript: sec: 3722 time: '1:02:02' who: Alexey -description: 'Learn to transition from product design to Data Product Manager: master - SQL, customer discovery, build a portfolio and lead analytics products.' -intro: 'How do you move from product design into a data product manager role — and - which technical and discovery skills will make that transition practical and persuasive? - Sara Menefee, a product manager at Meroxa and former product designer at Sora, Checkr, - Change.org, and Zendesk, walks through her path and the concrete steps designers - can take to become data-focused PMs.

This episode covers customer discovery - and hypothesis formation, SQL and data engineering fundamentals, and the operational - realities of data product management: data quality, PII/compliance, and the data - lifecycle from sources to warehouses and apps. Sara explains how design thinking - and PM–designer collaboration inform discovery and prioritization, and lays out - a transition strategy that emphasizes networking, on-the-job learning, mentorship, - and a portfolio built around case-study structure (problem, research, solution, - outcome). You’ll also hear practical workflows — standups, analytics, customer development - interviews — plus documentation-first practices (PRDs, knowledge bases), resource - recommendations (including Reforge), and where ML and data science fit into the - PM role.

Listen for actionable steps, portfolio guidance, and the technical - literacy (SQL, documentation, data curiosity) you''ll need to move from design to - data product manager.' -dateadded: '2021-11-26' -duration: PT01H01M07S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=0 - endOffset: 87 -- name: 'Career Path: From Technical Support to Product Design' - startOffset: 87 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=87 - endOffset: 298 -- name: 'Product Design: User Research, Prototyping & UX' - startOffset: 298 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=298 - endOffset: 424 -- name: 'Data Product Management: Customer Discovery & Hypothesis Formation' - startOffset: 424 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=424 - endOffset: 698 -- name: 'Product Lifecycle: Discovery, Planning, Engineering & Launch' - startOffset: 698 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=698 - endOffset: 910 -- name: 'Design Thinking: PM–Designer Collaboration in Ideation' - startOffset: 910 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=910 - endOffset: 986 -- name: 'Transition Motivation: Moving from Design to Product Management' - startOffset: 986 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=986 - endOffset: 1178 -- name: 'Data-focused PM: Data Quality, PII & Compliance Considerations' - startOffset: 1178 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1178 - endOffset: 1380 -- name: 'Core Technical Skills: SQL & Data Engineering Fundamentals' - startOffset: 1380 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1380 - endOffset: 1470 -- name: 'Essential Traits: Data Curiosity, Documentation Literacy & Empathy' - startOffset: 1470 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1470 - endOffset: 1593 -- name: 'Data Lifecycle: Sources, Transformation, Warehouses & Apps' - startOffset: 1593 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1593 - endOffset: 1710 -- name: 'Transition Strategy: Networking, On-the-Job Learning & Mentorship' - startOffset: 1710 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1710 - endOffset: 1980 -- name: 'Practical Steps: Building a Portfolio & Learning After the Switch' - startOffset: 1980 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=1980 - endOffset: 2151 -- name: 'Case Study Structure: Problem, Research, Solution & Outcome' - startOffset: 2151 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2151 - endOffset: 2344 -- name: 'Learning Resources: Courses, Reforge & Recommended Reading' - startOffset: 2344 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2344 - endOffset: 2761 -- name: 'Daily Workflow: Standups, Analytics, CusDev & Context Switching' - startOffset: 2761 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2761 - endOffset: 2977 -- name: 'Customer Development: Interview Focus & Tactical Questions' - startOffset: 2977 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=2977 - endOffset: 3115 -- name: 'Key Insight: Data Teams Spend Time Educating the Organization' - startOffset: 3115 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3115 - endOffset: 3249 -- name: 'Adopting New Tools: Documentation First, Pairing & Slack Help' - startOffset: 3249 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3249 - endOffset: 3368 -- name: 'Product Documentation: PRDs, Customer Notes & Knowledge Base' - startOffset: 3368 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3368 - endOffset: 3504 -- name: 'Idea Flow: Sources, Validation & Backlog Prioritization' - startOffset: 3504 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3504 - endOffset: 3640 -- name: 'Analytics vs Data Science: Where ML Fits in the PM Role' - startOffset: 3640 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3640 - endOffset: 3697 -- name: Closing Remarks & How to Reach Out - startOffset: 3697 - url: https://www.youtube.com/watch?v=nt__pVuuC-k&t=3697 - endOffset: 3667 --- Links: diff --git a/_podcast/s05e07-ml-vs-analytics.md b/_podcast/production-ml-mlops-and-data-team-building.md similarity index 97% rename from _podcast/s05e07-ml-vs-analytics.md rename to _podcast/production-ml-mlops-and-data-team-building.md index f4f885b0..5d7a2c09 100644 --- a/_podcast/s05e07-ml-vs-analytics.md +++ b/_podcast/production-ml-mlops-and-data-team-building.md @@ -1,12 +1,11 @@ --- -title: 'From Analytics to Production ML: Team Building, Experiments, MLOps & Fraud - Detection' -short: Similarities and Differences between ML and Analytics -guests: -- rishabhbhargava -image: images/podcast/s05e07-ml-vs-analytics.jpg +title: "From Analytics to Production ML: Team Building, Experiments, MLOps & Fraud Detection" +short: "Similarities and Differences between ML and Analytics" season: 5 episode: 7 +guests: +- rishabhbhargava +image: images/podcast/production-ml-mlops-and-data-team-building.jpg ids: youtube: rMRUa8WxDz4 anchor: Similarities-and-Differences-between-ML-and-Analytics---Rishabh-Bhargava-e18rcam @@ -15,6 +14,112 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Similarities-and-Differences-between-ML-and-Analytics---Rishabh-Bhargava-e18rcam spotify: https://open.spotify.com/episode/19fWdSuxTLwIdzVT45qF9x apple: https://podcasts.apple.com/us/podcast/similarities-and-differences-between-ml-and/id1541710331?i=1000538713607 + +description: "Master building data teams, deploying production machine learning and MLOps, running A/B experiments and fraud detection to boost model reliability and ROI" +intro: "How do teams move beyond dashboards to reliable production ML—while organizing people, running experiments, and tackling use cases like fraud detection? In this episode Rishabh Bhargava (7+ years in analytics and ML, former Sales Engineering lead at Datacoral—acquired by Cloudera—and early Primer.ai engineer; MS CS Stanford) walks through the practical bridge from analytics to ML in production.

We cover data infrastructure and sales-engineering lessons (demos, POCs, integration), early NLP work (summarization, entity extraction), and the differences between prescriptive and predictive analytics. Rishabh explains day-to-day ML operations—models, APIs, SLAs—and the evolution of fraud detection from rule-based systems to machine learning. He digs into experimental workflows (A/B testing, shadow mode), experiment analysis (segmentation, uplift, root cause), and why documentation and analysts’ tribal knowledge matter. We also discuss hiring and team structure—hire data engineers, then analysts, then data scientists—and trade-offs between embedded versus centralized data roles.

If you’re responsible for data strategy, MLOps, or deploying fraud detection models, this episode provides actionable perspectives on experiments, team building, and moving ML into production" +topics: +- machine learning +- production +- data analytics +- MLOps +- team building +- data teams +- leadership +- career growth +dateadded: 2021-10-16 + +duration: PT00H59M15S + +quotableClips: +- name: Episode Introduction & Guest Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=0 + endOffset: 128 +- name: 'Career Path: Data Infrastructure and Stanford ML Background' + startOffset: 128 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=128 + endOffset: 235 +- name: 'Sales Engineering: Demos, POCs and Data Integration' + startOffset: 235 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=235 + endOffset: 335 +- name: 'Early Machine Learning Work: NLP, Summarization and Entity Extraction' + startOffset: 335 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=335 + endOffset: 406 +- name: 'Prescriptive vs Predictive Analytics: Definitions and Business Use Cases' + startOffset: 406 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=406 + endOffset: 572 +- name: 'Terminology Problems: The Ambiguity of "Data Science" + startOffset: 572 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=572 + endOffset: 648 +- name: 'ML vs Analytics: Different Goals, Shared Data Infrastructure' + startOffset: 648 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=648 + endOffset: 828 +- name: 'Machine Learning Day-to-Day: Models, APIs, Predictions and SLAs' + startOffset: 828 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=828 + endOffset: 1058 +- name: 'Fraud Detection: From Rule-Based Systems to Machine Learning' + startOffset: 1058 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1058 + endOffset: 1119 +- name: 'Analyst Responsibilities: Dashboards, Reports and Ad-hoc Queries' + startOffset: 1119 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1119 + endOffset: 1463 +- name: 'Domain Expertise: Analysts'' Tribal Knowledge and SQL Proficiency' + startOffset: 1463 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1463 + endOffset: 1593 +- name: Documentation Limitations and Attempts to Improve Knowledge Sharing + startOffset: 1593 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1593 + endOffset: 1722 +- name: 'Experimental Workflows: Model Experiments, A/B Testing and Shadow Mode' + startOffset: 1722 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1722 + endOffset: 1879 +- name: 'Experiment Analysis: Segmentation, Uplift and Root Cause Investigation' + startOffset: 1879 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1879 + endOffset: 2010 +- name: 'Overlaps and Differences: Data Quality, Timescales and Outputs' + startOffset: 2010 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2010 + endOffset: 2344 +- name: 'Bridging Roles: Notebooks, SQL+Python Workflows and Analytics Engineering' + startOffset: 2344 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2344 + endOffset: 2473 +- name: 'Investment Trends: ML Hype, Analytics Underspend and Data Infrastructure' + startOffset: 2473 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2473 + endOffset: 2582 +- name: 'Hiring Imbalance: Prioritizing Data Scientists vs Data Analysts' + startOffset: 2582 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2582 + endOffset: 2941 +- name: 'Team Organization: Embedded Data Roles Versus Centralized Structures' + startOffset: 2941 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2941 + endOffset: 3341 +- name: 'Building a Data Team: Hire Data Engineers, Then Analysts, Then DS' + startOffset: 3341 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3341 + endOffset: 3499 +- name: 'MLOpsRoundup Newsletter: ML Production, MLOps Insights and Resources' + startOffset: 3499 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3499 + endOffset: 3599 +- name: 'Contact and Community: Twitter, Slack and Episode Close' + startOffset: 3599 + url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3599 + endOffset: 3555 + transcript: - header: Episode Introduction & Guest Overview - line: This week, we'll talk about the similarities and differences between machine @@ -175,7 +280,7 @@ transcript: sec: 555 time: '9:15' who: Rishabh -- header: 'Terminology Problems: The Ambiguity of "Data Science"' +- header: 'Terminology Problems: The Ambiguity of "Data Science" - line: 'Actually, in the question that I initially put, I wrote ‘data science’. Then you left a comment saying: “Hey, let''s not use ‘science’ here because it''s too ambiguous. It can mean too many things.”' @@ -1020,116 +1125,6 @@ transcript: sec: 3620 time: '1:00:20' who: Alexey -description: Master building data teams, deploying production machine learning and - MLOps, running A/B experiments and fraud detection to boost model reliability and - ROI. -intro: How do teams move beyond dashboards to reliable production ML—while organizing - people, running experiments, and tackling use cases like fraud detection? In this - episode Rishabh Bhargava (7+ years in analytics and ML, former Sales Engineering - lead at Datacoral—acquired by Cloudera—and early Primer.ai engineer; MS CS Stanford) - walks through the practical bridge from analytics to ML in production.

- We cover data infrastructure and sales-engineering lessons (demos, POCs, integration), - early NLP work (summarization, entity extraction), and the differences between prescriptive - and predictive analytics. Rishabh explains day-to-day ML operations—models, APIs, - SLAs—and the evolution of fraud detection from rule-based systems to machine learning. - He digs into experimental workflows (A/B testing, shadow mode), experiment analysis - (segmentation, uplift, root cause), and why documentation and analysts’ tribal knowledge - matter. We also discuss hiring and team structure—hire data engineers, then analysts, - then data scientists—and trade-offs between embedded versus centralized data roles. -

If you’re responsible for data strategy, MLOps, or deploying fraud detection - models, this episode provides actionable perspectives on experiments, team building, - and moving ML into production. -dateadded: '2021-10-16' -duration: PT00H59M15S -quotableClips: -- name: Episode Introduction & Guest Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=0 - endOffset: 128 -- name: 'Career Path: Data Infrastructure and Stanford ML Background' - startOffset: 128 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=128 - endOffset: 235 -- name: 'Sales Engineering: Demos, POCs and Data Integration' - startOffset: 235 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=235 - endOffset: 335 -- name: 'Early Machine Learning Work: NLP, Summarization and Entity Extraction' - startOffset: 335 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=335 - endOffset: 406 -- name: 'Prescriptive vs Predictive Analytics: Definitions and Business Use Cases' - startOffset: 406 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=406 - endOffset: 572 -- name: 'Terminology Problems: The Ambiguity of "Data Science"' - startOffset: 572 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=572 - endOffset: 648 -- name: 'ML vs Analytics: Different Goals, Shared Data Infrastructure' - startOffset: 648 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=648 - endOffset: 828 -- name: 'Machine Learning Day-to-Day: Models, APIs, Predictions and SLAs' - startOffset: 828 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=828 - endOffset: 1058 -- name: 'Fraud Detection: From Rule-Based Systems to Machine Learning' - startOffset: 1058 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1058 - endOffset: 1119 -- name: 'Analyst Responsibilities: Dashboards, Reports and Ad-hoc Queries' - startOffset: 1119 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1119 - endOffset: 1463 -- name: 'Domain Expertise: Analysts'' Tribal Knowledge and SQL Proficiency' - startOffset: 1463 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1463 - endOffset: 1593 -- name: Documentation Limitations and Attempts to Improve Knowledge Sharing - startOffset: 1593 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1593 - endOffset: 1722 -- name: 'Experimental Workflows: Model Experiments, A/B Testing and Shadow Mode' - startOffset: 1722 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1722 - endOffset: 1879 -- name: 'Experiment Analysis: Segmentation, Uplift and Root Cause Investigation' - startOffset: 1879 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=1879 - endOffset: 2010 -- name: 'Overlaps and Differences: Data Quality, Timescales and Outputs' - startOffset: 2010 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2010 - endOffset: 2344 -- name: 'Bridging Roles: Notebooks, SQL+Python Workflows and Analytics Engineering' - startOffset: 2344 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2344 - endOffset: 2473 -- name: 'Investment Trends: ML Hype, Analytics Underspend and Data Infrastructure' - startOffset: 2473 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2473 - endOffset: 2582 -- name: 'Hiring Imbalance: Prioritizing Data Scientists vs Data Analysts' - startOffset: 2582 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2582 - endOffset: 2941 -- name: 'Team Organization: Embedded Data Roles Versus Centralized Structures' - startOffset: 2941 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=2941 - endOffset: 3341 -- name: 'Building a Data Team: Hire Data Engineers, Then Analysts, Then DS' - startOffset: 3341 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3341 - endOffset: 3499 -- name: 'MLOpsRoundup Newsletter: ML Production, MLOps Insights and Resources' - startOffset: 3499 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3499 - endOffset: 3599 -- name: 'Contact and Community: Twitter, Slack and Episode Close' - startOffset: 3599 - url: https://www.youtube.com/watch?v=rMRUa8WxDz4&t=3599 - endOffset: 3555 --- Links: diff --git a/_podcast/s04e02-build-your-own-data-pipeline.md b/_podcast/production-ml-pipelines-with-aws-and-kafka.md similarity index 96% rename from _podcast/s04e02-build-your-own-data-pipeline.md rename to _podcast/production-ml-pipelines-with-aws-and-kafka.md index dd99c3d0..b697b074 100644 --- a/_podcast/s04e02-build-your-own-data-pipeline.md +++ b/_podcast/production-ml-pipelines-with-aws-and-kafka.md @@ -1,12 +1,11 @@ --- -title: 'From Notebooks to Production: Build Data Pipelines & Deploy ML (AWS, Kafka, - Streaming)' -short: Build Your Own Data Pipeline -guests: -- andreaskretz -image: images/podcast/s04e02-build-your-own-data-pipeline.jpg +title: "From Notebooks to Production: Build Data Pipelines & Deploy ML (AWS, Kafka, Streaming)" +short: "Build Your Own Data Pipeline" season: 4 episode: 2 +guests: +- andreaskretz +image: images/podcast/production-ml-pipelines-with-aws-and-kafka.jpg ids: youtube: IrZPAG6OBqo anchor: Build-Your-Own-Data-Pipeline---Andreas-Kretz-e139se1 @@ -15,6 +14,140 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Build-Your-Own-Data-Pipeline---Andreas-Kretz-e139se1 spotify: https://open.spotify.com/episode/0fFRCAYFCReMxEiq2RDVak apple: https://podcasts.apple.com/us/podcast/build-your-own-data-pipeline-andreas-kretz/id1541710331?i=1000527643914 + +description: "Learn to build data pipelines and deploy ML on AWS: productionize notebooks, cut ops risk, choose cost-effective serving and orchestration." +intro: "How do you move models out of notebooks and into reliable production data pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas Kretz — the “Plumber of Data Science” — walks through the practical steps engineers and data scientists need to productionize notebooks and deploy ML systems.

Andreas, a data engineer focused on platform architecture, explains why data engineering demand is rising and why teams should hire both a data scientist and engineer early. We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, message queues). You’ll also hear about inference strategies, SageMaker endpoints vs precomputed predictions, model storage, and operational trade-offs.

Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, selecting tools, and getting from prototype to production with minimal operational risk. Find practical learning paths and project ideas to accelerate your data engineering skills." +topics: +- data engineering +- machine learning +- production +- tools +dateadded: 2021-07-02 + +duration: PT01H01M15S + +quotableClips: +- name: Episode Introduction & Andreas Kretz — "Plumber of Data Science" + startOffset: 116 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=116 + endOffset: 199 +- name: 'Guest Bio: Andreas’s path from software to big data and data engineering' + startOffset: 199 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=199 + endOffset: 343 +- name: 'Market Trend: Why data engineering demand is rising' + startOffset: 343 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=343 + endOffset: 526 +- name: 'Hiring Strategy: Hire a data scientist and engineer early' + startOffset: 526 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=526 + endOffset: 587 +- name: 'Data Scientist Growth: From notebooks to production pipelines' + startOffset: 587 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=587 + endOffset: 723 +- name: 'Operational Risk: Why using many tools breaks operations' + startOffset: 723 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=723 + endOffset: 805 +- name: 'Data Pipeline Anatomy: Ingestion, buffer, processing, storage, visualization' + startOffset: 805 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=805 + endOffset: 911 +- name: 'Ingestion Explained: Events, message queues (Kafka, Kinesis)' + startOffset: 911 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=911 + endOffset: 1011 +- name: 'Processing Modes: Streaming vs. batch processing' + startOffset: 1011 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1011 + endOffset: 1094 +- name: 'One-Person Feasibility: Tooling, cloud vs on-prem, and schema design' + startOffset: 1094 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1094 + endOffset: 1265 +- name: 'Practical Stack for Scientists: Python, Docker, Flask/FastAPI for prototypes' + startOffset: 1265 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1265 + endOffset: 1356 +- name: 'Processing Frameworks Overview: Spark, Flink, Lambda, Glue, Docker jobs' + startOffset: 1356 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1356 + endOffset: 1444 +- name: 'Data Transformation: Role of SQL and dataframe processing' + startOffset: 1444 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1444 + endOffset: 1536 +- name: 'AWS Example: Parquet on S3 and processing options' + startOffset: 1536 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1536 + endOffset: 1642 +- name: 'Case Study: Car price prediction — data sources and architecture' + startOffset: 1642 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1642 + endOffset: 1893 +- name: 'Inference Strategy: Live API calls versus precomputed predictions' + startOffset: 1893 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1893 + endOffset: 2056 +- name: 'Productionizing Notebooks: Dockerized training and model storage on S3' + startOffset: 2056 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2056 + endOffset: 2146 +- name: 'Scheduling Options: Airflow vs CloudWatch/Lambda vs simple schedulers' + startOffset: 2146 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2146 + endOffset: 2273 +- name: 'Model Serving: SageMaker endpoints and cost trade-offs' + startOffset: 2273 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2273 + endOffset: 2401 +- name: 'Orchestration Patterns: Message queues for job sequencing' + startOffset: 2401 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2401 + endOffset: 2466 +- name: 'Start Simple: Iterate from Lambda/queues to Airflow/Kubernetes' + startOffset: 2466 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2466 + endOffset: 2585 +- name: 'Learning DevOps: Pick tools, read docs, and practice by doing' + startOffset: 2585 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2585 + endOffset: 2731 +- name: 'Tool Selection: Use docs and tutorials to validate choices' + startOffset: 2731 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2731 + endOffset: 2916 +- name: 'Early-Career Skills: Python, SQL, basic networking; AWS and OSS basics' + startOffset: 2916 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2916 + endOffset: 3074 +- name: 'Hadoop Today: Cloud replaces Hadoop for many, but Hadoop persists in legacy' + startOffset: 3074 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3074 + endOffset: 3141 +- name: 'LearnDataEngineering Academy: Curriculum, capstones, and resources' + startOffset: 3141 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3141 + endOffset: 3292 +- name: 'Hands-on Projects: Build an e-commerce pipeline; use Kaggle datasets' + startOffset: 3292 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3292 + endOffset: 3453 +- name: 'Learning Advice: Avoid huge datasets; start small and iterate' + startOffset: 3453 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3453 + endOffset: 3536 +- name: 'Convincing Stakeholders: Build a $0 proof-of-concept and quantify ROI' + startOffset: 3536 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3536 + endOffset: 3725 +- name: 'Find Andreas & Resources: LearnDataEngineering, YouTube, Telegram' + startOffset: 3725 + url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3725 + endOffset: 3675 + transcript: - header: Episode Introduction & Andreas Kretz — "Plumber of Data Science" - line: Today we'll talk about learning how to build data pipelines for data scientists. @@ -273,7 +406,7 @@ transcript: sec: 1053 time: '17:33' who: Andreas -- header: 'One-Person Feasibility: Tooling, cloud vs on‑prem, and schema design' +- header: 'One-Person Feasibility: Tooling, cloud vs on-prem, and schema design' - line: That seems like a lot of work. Something that a data scientist – just one person – probably cannot really implement on his or her own, so it needs multiple people and a data engineer, at the very least. @@ -856,7 +989,7 @@ transcript: sec: 3286 time: '54:46' who: Andreas -- header: 'Hands-on Projects: Build an e‑commerce pipeline; use Kaggle datasets' +- header: 'Hands-on Projects: Build an e-commerce pipeline; use Kaggle datasets' - line: What I understood by talking to you now is that one of the most important skills data scientists need to have in order to pick up data engineering is cloud skills. But ‘cloud skills’ is such a broad term, right? But there are a couple @@ -916,7 +1049,7 @@ transcript: sec: 3535 time: '58:55' who: Andreas -- header: 'Convincing Stakeholders: Build a $0 proof‑of‑concept and quantify ROI' +- header: 'Convincing Stakeholders: Build a $0 proof-of-concept and quantify ROI' - line: Good advice. So it's almost time for us to finish. But there is one interesting question. Maybe we can take a couple of minutes to answer it. Maybe it's a tough one. Let's try. “I'm trying to convince my company to start a data science department. @@ -1004,148 +1137,6 @@ transcript: sec: 3791 time: '1:03:11' who: Alexey -description: 'Learn to build data pipelines and deploy ML on AWS: productionize notebooks, - cut ops risk, choose cost‑effective serving and orchestration.' -intro: 'How do you move models out of notebooks and into reliable production data - pipelines using AWS, Kafka, and streaming architectures? In this episode, Andreas - Kretz — the “Plumber of Data Science” — walks through the practical steps engineers - and data scientists need to productionize notebooks and deploy ML systems.

- Andreas, a data engineer focused on platform architecture, explains why data engineering - demand is rising and why teams should hire both a data scientist and engineer early. - We cover the anatomy of data pipelines — ingestion (events, Kafka/Kinesis), buffering, - processing (streaming vs. batch), storage (Parquet on S3) and visualization — plus - processing frameworks like Spark, Flink, Glue, and Docker jobs. Andreas outlines - a pragmatic stack for scientists: Python, Docker, Flask/FastAPI for prototypes, - and how to choose orchestration and scheduling (Lambda/CloudWatch, Airflow, Kubernetes, - message queues). You’ll also hear about inference strategies, SageMaker endpoints - vs precomputed predictions, model storage, and operational trade-offs.

- Listen to gain actionable guidance on building data pipelines, deploying ML on AWS, - selecting tools, and getting from prototype to production with minimal operational - risk. Find practical learning paths and project ideas to accelerate your data engineering - skills.' -dateadded: '2021-07-02' -duration: PT01H01M15S -quotableClips: -- name: Episode Introduction & Andreas Kretz — "Plumber of Data Science" - startOffset: 116 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=116 - endOffset: 199 -- name: 'Guest Bio: Andreas’s path from software to big data and data engineering' - startOffset: 199 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=199 - endOffset: 343 -- name: 'Market Trend: Why data engineering demand is rising' - startOffset: 343 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=343 - endOffset: 526 -- name: 'Hiring Strategy: Hire a data scientist and engineer early' - startOffset: 526 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=526 - endOffset: 587 -- name: 'Data Scientist Growth: From notebooks to production pipelines' - startOffset: 587 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=587 - endOffset: 723 -- name: 'Operational Risk: Why using many tools breaks operations' - startOffset: 723 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=723 - endOffset: 805 -- name: 'Data Pipeline Anatomy: Ingestion, buffer, processing, storage, visualization' - startOffset: 805 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=805 - endOffset: 911 -- name: 'Ingestion Explained: Events, message queues (Kafka, Kinesis)' - startOffset: 911 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=911 - endOffset: 1011 -- name: 'Processing Modes: Streaming vs. batch processing' - startOffset: 1011 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1011 - endOffset: 1094 -- name: 'One-Person Feasibility: Tooling, cloud vs on‑prem, and schema design' - startOffset: 1094 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1094 - endOffset: 1265 -- name: 'Practical Stack for Scientists: Python, Docker, Flask/FastAPI for prototypes' - startOffset: 1265 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1265 - endOffset: 1356 -- name: 'Processing Frameworks Overview: Spark, Flink, Lambda, Glue, Docker jobs' - startOffset: 1356 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1356 - endOffset: 1444 -- name: 'Data Transformation: Role of SQL and dataframe processing' - startOffset: 1444 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1444 - endOffset: 1536 -- name: 'AWS Example: Parquet on S3 and processing options' - startOffset: 1536 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1536 - endOffset: 1642 -- name: 'Case Study: Car price prediction — data sources and architecture' - startOffset: 1642 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1642 - endOffset: 1893 -- name: 'Inference Strategy: Live API calls versus precomputed predictions' - startOffset: 1893 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=1893 - endOffset: 2056 -- name: 'Productionizing Notebooks: Dockerized training and model storage on S3' - startOffset: 2056 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2056 - endOffset: 2146 -- name: 'Scheduling Options: Airflow vs CloudWatch/Lambda vs simple schedulers' - startOffset: 2146 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2146 - endOffset: 2273 -- name: 'Model Serving: SageMaker endpoints and cost trade-offs' - startOffset: 2273 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2273 - endOffset: 2401 -- name: 'Orchestration Patterns: Message queues for job sequencing' - startOffset: 2401 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2401 - endOffset: 2466 -- name: 'Start Simple: Iterate from Lambda/queues to Airflow/Kubernetes' - startOffset: 2466 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2466 - endOffset: 2585 -- name: 'Learning DevOps: Pick tools, read docs, and practice by doing' - startOffset: 2585 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2585 - endOffset: 2731 -- name: 'Tool Selection: Use docs and tutorials to validate choices' - startOffset: 2731 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2731 - endOffset: 2916 -- name: 'Early-Career Skills: Python, SQL, basic networking; AWS and OSS basics' - startOffset: 2916 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=2916 - endOffset: 3074 -- name: 'Hadoop Today: Cloud replaces Hadoop for many, but Hadoop persists in legacy' - startOffset: 3074 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3074 - endOffset: 3141 -- name: 'LearnDataEngineering Academy: Curriculum, capstones, and resources' - startOffset: 3141 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3141 - endOffset: 3292 -- name: 'Hands-on Projects: Build an e‑commerce pipeline; use Kaggle datasets' - startOffset: 3292 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3292 - endOffset: 3453 -- name: 'Learning Advice: Avoid huge datasets; start small and iterate' - startOffset: 3453 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3453 - endOffset: 3536 -- name: 'Convincing Stakeholders: Build a $0 proof‑of‑concept and quantify ROI' - startOffset: 3536 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3536 - endOffset: 3725 -- name: 'Find Andreas & Resources: LearnDataEngineering, YouTube, Telegram' - startOffset: 3725 - url: https://www.youtube.com/watch?v=IrZPAG6OBqo&t=3725 - endOffset: 3675 --- diff --git a/_podcast/s17e08-building-machine-learning-products.md b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md similarity index 96% rename from _podcast/s17e08-building-machine-learning-products.md rename to _podcast/production-ml-search-vector-search-embeddings-hybrid search.md index d2749024..3a32c013 100644 --- a/_podcast/s17e08-building-machine-learning-products.md +++ b/_podcast/production-ml-search-vector-search-embeddings-hybrid search.md @@ -1,20 +1,136 @@ --- +title: "Production ML Search: Embeddings, Hybrid Architectures and Scalable Indexing" +short: "Building Machine Learning Products" +season: 17 episode: 8 guests: - reemmahmoud +image: images/podcast/production-ml-search-vector-search-embeddings-hybrid search.jpg ids: - anchor: atatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd + anchor: datatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd youtube: m45tNY-8gY8 -image: images/podcast/s17e08-building-machine-learning-products.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Building-Machine-Learning-Products---Reem-Mahmoud-e2gttcd apple: https://podcasts.apple.com/us/podcast/building-machine-learning-products-reem-mahmoud/id1541710331?i=1000649393833 spotify: https://open.spotify.com/episode/4jNredXndQ2b2evgfSmD2G?si=gU2kT-zXSX27hDPgLtwMgQ youtube: https://www.youtube.com/watch?v=m45tNY-8gY8 -season: 17 -short: Building Machine Learning Products -title: 'Vector Search & Hybrid Retrieval: Practical Guide to Embeddings, Indexing, - Multimodal Fusion' +description: "Master vector search, embeddings and hybrid search: scalable indexing, multimodal retrieval and ranking tactics to boost relevance and reduce latency." +topics: +- LLMs +- NLP +- machine learning +- MLOps +- data engineering + +intro: "How do you move from prototypes to production ML search that scales and stays relevant? In this episode Reem Mahmoud, Director of Data Science at intervu.ai, breaks down practical approaches to building production ML search systems—focusing on embeddings, hybrid architectures, and scalable indexing.

We cover core concepts like inverted indexes and Lucene basics, candidate generation versus ML ranking, and why you should avoid hand-rolling indexes. Dive into vector search fundamentals—embeddings as shared representations, embedding pipelines, and the trade-offs between vector compute and storage. Learn how multimodal embeddings (text, images, CLIP) and feature fusion enable richer relevance, and how hybrid search combines vector similarity with filters, recency, and business constraints. The episode also explores time encoding in embeddings, query-time weighting, LLMs versus specialized encoders, vector DB selection, and operationalization—offline tests, A/B metrics, and enabling engineers for fast iteration.

Listen for actionable guidance on scalable indexing strategies, choosing a vector DB, and measuring search impact so you can design reliable production search that balances latency, relevance, and business KPIs." +dateadded: 2024-03-17 +duration: PT01H05M23S +quotableClips: +- name: 'Guest Introduction: Daniel, Superlinked, and VectorHub' + startOffset: 107 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=107 + endOffset: 149 +- name: 'Career Journey: Competitive programming, startups, and YouTube Ads' + startOffset: 149 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=149 + endOffset: 380 +- name: 'Competitive Programming to Infrastructure: relevance of algorithms' + startOffset: 380 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=380 + endOffset: 480 +- name: 'Defining Search: Information retrieval as a decision problem' + startOffset: 480 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=480 + endOffset: 550 +- name: 'Search vs Recommenders: Representation learning overview' + startOffset: 550 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=550 + endOffset: 645 +- name: 'Search Constraints: Latency and user experience impact' + startOffset: 645 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=645 + endOffset: 689 +- name: 'Text Search Fundamentals: Inverted index and Lucene basics' + startOffset: 689 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=689 + endOffset: 765 +- name: 'Search Architecture: Candidate generation (retrieval) and ML ranking' + startOffset: 765 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=765 + endOffset: 1060 +- name: 'Indexing Documents: Practical tools and why not to hand-roll indexes' + startOffset: 1060 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1060 + endOffset: 1202 +- name: 'Keyword Search Challenges: Brittleness, synonyms, and rule complexity' + startOffset: 1202 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1202 + endOffset: 1315 +- name: 'Vector Search Fundamentals: Embeddings as shared representations' + startOffset: 1315 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1315 + endOffset: 1740 +- name: 'Vector Compute vs Storage: Embedding generation and ingestion pipelines' + startOffset: 1740 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1740 + endOffset: 1993 +- name: 'Multimodal Embeddings: Images, text, CLIP, and modality fusion' + startOffset: 1993 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1993 + endOffset: 2040 +- name: 'Hybrid Search: Combining vector similarity with filters and recency' + startOffset: 2040 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2040 + endOffset: 2330 +- name: 'Feature Fusion: Encoding metadata, behavior, and popularity into vectors' + startOffset: 2330 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2330 + endOffset: 2393 +- name: 'Expressing Constraints: Translating filters and business rules to vectors' + startOffset: 2393 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2393 + endOffset: 2516 +- name: 'Time Encoding in Embeddings: Timestamps, positional encodings, and decay' + startOffset: 2516 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2516 + endOffset: 2711 +- name: 'Query-Time Weighting: Normalization, weights, and context-specific tuning' + startOffset: 2711 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2711 + endOffset: 2857 +- name: 'LLMs vs Specialized Encoders: Prompting trade-offs and efficiency limits' + startOffset: 2857 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2857 + endOffset: 2976 +- name: 'Learning Resources: VectorHub tutorials, graph and multimodal examples' + startOffset: 2976 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2976 + endOffset: 3155 +- name: 'Vector DB Selection: Vendor comparison and trade-offs' + startOffset: 3155 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3155 + endOffset: 3353 +- name: 'Monolithic vs Specialized Systems: Lucene/elasticsearch versus dedicated + VDBs' + startOffset: 3353 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3353 + endOffset: 3497 +- name: 'E-commerce Personalization: Prototyping with embeddings and CLIP' + startOffset: 3497 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3497 + endOffset: 3685 +- name: 'Search Metrics: Business KPIs, A/B tests, and revenue attribution' + startOffset: 3685 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3685 + endOffset: 3830 +- name: 'Operationalization: Enabling engineers, offline tests, and fast iteration' + startOffset: 3830 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3830 + endOffset: 4008 +- name: Episode Recap and Closing + startOffset: 4008 + url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=4008 + endOffset: 3923 transcript: - header: 'Guest Introduction: Daniel, Superlinked, and VectorHub' - line: This week, we'll talk about building production search systems. We have a @@ -912,7 +1028,7 @@ transcript: sec: 3494 time: '58:14' who: Alexey -- header: 'E‑commerce Personalization: Prototyping with embeddings and CLIP' +- header: 'E-commerce Personalization: Prototyping with embeddings and CLIP' - line: Also, for any questions that remain unanswered, I think there'll be a link to my LinkedIn – people should connect to me and shoot those questions over. For e-commerce, I think there is a huge opportunity to do real-time personalization @@ -1038,134 +1154,15 @@ transcript: sec: 4030 time: '1:07:10' who: Daniel -description: Master vector search, embeddings & hybrid search—learn indexing, multimodal - fusion, vector DB trade-offs & ops to boost relevance, latency & personalization -intro: 'How do you build vector search and hybrid retrieval that actually works in - production—balancing embeddings, indexing, multimodal fusion, latency, and business - constraints? In this episode, Reem Mahmoud, Director of Data Science at intervu.ai, - breaks down practical approaches to vector search, hybrid retrieval, and embedding - pipelines for real-world systems.

Reem guides listeners through fundamentals—text - search and inverted indexes (Lucene), candidate generation and ML ranking—then dives - into vector search: embedding generation, compute vs. storage trade-offs, and when - to use LLMs versus specialized encoders. You’ll hear concrete advice on multimodal - embeddings (text, images, CLIP), hybrid search that combines vector similarity with - filters and recency, and techniques for feature fusion, time encoding, and query-time - weighting. The conversation also covers vector DB selection, operationalization - best practices, search metrics and A/B testing, and prototyping e-commerce personalization - with embeddings.

If you’re building or evaluating search/retrieval systems, - this episode offers actionable guidance on embeddings, indexing strategies, multimodal - fusion, and how to translate business rules into performant hybrid retrieval—so - you can iterate faster and measure impact.' -dateadded: '2024-03-17' -duration: PT01H05M23S -quotableClips: -- name: 'Guest Introduction: Daniel, Superlinked, and VectorHub' - startOffset: 107 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=107 - endOffset: 149 -- name: 'Career Journey: Competitive programming, startups, and YouTube Ads' - startOffset: 149 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=149 - endOffset: 380 -- name: 'Competitive Programming to Infrastructure: relevance of algorithms' - startOffset: 380 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=380 - endOffset: 480 -- name: 'Defining Search: Information retrieval as a decision problem' - startOffset: 480 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=480 - endOffset: 550 -- name: 'Search vs Recommenders: Representation learning overview' - startOffset: 550 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=550 - endOffset: 645 -- name: 'Search Constraints: Latency and user experience impact' - startOffset: 645 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=645 - endOffset: 689 -- name: 'Text Search Fundamentals: Inverted index and Lucene basics' - startOffset: 689 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=689 - endOffset: 765 -- name: 'Search Architecture: Candidate generation (retrieval) and ML ranking' - startOffset: 765 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=765 - endOffset: 1060 -- name: 'Indexing Documents: Practical tools and why not to hand-roll indexes' - startOffset: 1060 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1060 - endOffset: 1202 -- name: 'Keyword Search Challenges: Brittleness, synonyms, and rule complexity' - startOffset: 1202 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1202 - endOffset: 1315 -- name: 'Vector Search Fundamentals: Embeddings as shared representations' - startOffset: 1315 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1315 - endOffset: 1740 -- name: 'Vector Compute vs Storage: Embedding generation and ingestion pipelines' - startOffset: 1740 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1740 - endOffset: 1993 -- name: 'Multimodal Embeddings: Images, text, CLIP, and modality fusion' - startOffset: 1993 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=1993 - endOffset: 2040 -- name: 'Hybrid Search: Combining vector similarity with filters and recency' - startOffset: 2040 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2040 - endOffset: 2330 -- name: 'Feature Fusion: Encoding metadata, behavior, and popularity into vectors' - startOffset: 2330 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2330 - endOffset: 2393 -- name: 'Expressing Constraints: Translating filters and business rules to vectors' - startOffset: 2393 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2393 - endOffset: 2516 -- name: 'Time Encoding in Embeddings: Timestamps, positional encodings, and decay' - startOffset: 2516 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2516 - endOffset: 2711 -- name: 'Query-Time Weighting: Normalization, weights, and context-specific tuning' - startOffset: 2711 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2711 - endOffset: 2857 -- name: 'LLMs vs Specialized Encoders: Prompting trade-offs and efficiency limits' - startOffset: 2857 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2857 - endOffset: 2976 -- name: 'Learning Resources: VectorHub tutorials, graph and multimodal examples' - startOffset: 2976 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=2976 - endOffset: 3155 -- name: 'Vector DB Selection: Vendor comparison and trade-offs' - startOffset: 3155 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3155 - endOffset: 3353 -- name: 'Monolithic vs Specialized Systems: Lucene/elasticsearch versus dedicated - VDBs' - startOffset: 3353 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3353 - endOffset: 3497 -- name: 'E‑commerce Personalization: Prototyping with embeddings and CLIP' - startOffset: 3497 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3497 - endOffset: 3685 -- name: 'Search Metrics: Business KPIs, A/B tests, and revenue attribution' - startOffset: 3685 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3685 - endOffset: 3830 -- name: 'Operationalization: Enabling engineers, offline tests, and fast iteration' - startOffset: 3830 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=3830 - endOffset: 4008 -- name: Episode Recap and Closing - startOffset: 4008 - url: https://www.youtube.com/watch?v=m45tNY-8gY8&t=4008 - endOffset: 3923 +context: 'Modern search is best understood as a decision-making system: moving beyond + brittle keyword matching to learned, shared representations (embeddings) that, when + combined with traditional IR constraints (filters, recency, business rules), multimodal + signals, and time-aware encodings, enable scalable, reliable retrieval and ranking. + The real unifying challenge is not just model choice (LLMs vs specialized encoders) + but engineering—indexing, compute/storage trade-offs, hybrid architectures, query-time + weighting, operational tooling, vendor selection, and metrics-driven iteration—so + that representation learning translates into measurable product and business outcomes.' --- - Links: * [LinkedIn](https://www.linkedin.com/in/reemmahmoud/recent-activity/all/){:target="_blank"} diff --git a/_podcast/s20e05-data-intensive-ai.md b/_podcast/production-ready-ai-engineering.md similarity index 94% rename from _podcast/s20e05-data-intensive-ai.md rename to _podcast/production-ready-ai-engineering.md index d5b494fa..65ce833d 100644 --- a/_podcast/s20e05-data-intensive-ai.md +++ b/_podcast/production-ready-ai-engineering.md @@ -1,20 +1,126 @@ --- +title: "Production AI Engineering: Data Pipelines, Prompt Optimization and Caching" +short: "Data Intensive AI" +season: 20 episode: 5 guests: - bartoszmikulski +image: images/podcast/production-ready-ai-engineering.jpg ids: - anchor: atalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi + anchor: datatalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi youtube: BP6w_vKySN0 -image: images/podcast/s20e05-data-intensive-ai.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Data-Intensive-AI---Bartosz-Mikulski-e30fhoi apple: https://podcasts.apple.com/us/podcast/data-intensive-ai-bartosz-mikulski/id1541710331?i=1000700288876 spotify: https://open.spotify.com/episode/0nFSU92IQDbM4C9FLvdn4z youtube: https://www.youtube.com/watch?v=BP6w_vKySN0 -season: 20 -short: Data Intensive AI -title: Build Trustworthy AI with Data Pipeline Testing & Prompt Engineering (Caching, - Compression & Tools) +description: "Master production AI engineering: build scalable data pipelines, optimize prompts, and implement caching to cut latency and costs for production-ready models" +topics: +- data engineering +- AI +- LLMs +- MLOps +- tools +intro: "How do you move AI projects from proof-of-concept to reliable production systems while keeping prompts, pipelines, and response times under control? In this episode Bartosz Mikulski, an AI and data engineer who specializes in productionizing AI, breaks down the engineering work required to make models dependable beyond demos. Bartosz explains how to design robust data pipelines, apply prompt optimization practices, and introduce caching strategies that reduce load and improve responsiveness. He also covers building testing infrastructure and using tests to surface issues that block production readiness—then how to fix those issues. Listeners will get concrete, engineering-focused insights into production AI, including practical approaches to pipeline orchestration, prompt tuning for stability, and where caching fits in an operational stack. Whether you're responsible for deploying models, improving inference reliability, or creating reproducible pipelines, this conversation offers actionable techniques and perspectives for turning experiments into maintainable production systems." +dateadded: 2025-03-26 +duration: PT01H01M37S +quotableClips: +- name: Episode Opening & Guest Overview (Data Intensive AI) + startOffset: 0 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=0 + endOffset: 122 +- name: Book Contribution Clarified & Testing Focus + startOffset: 122 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=122 + endOffset: 240 +- name: 'Career Path: Java → Data Engineering → AI Engineering' + startOffset: 240 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=240 + endOffset: 364 +- name: 'Publishing Routine: Blogging Frequency & Content Practice' + startOffset: 364 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=364 + endOffset: 545 +- name: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct" + startOffset: 545 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=545 + endOffset: 707 +- name: 'Test Strategy for Data Pipelines: Snapshot & Integration Testing' + startOffset: 707 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=707 + endOffset: 794 +- name: 'Testing Tools: Great Expectations, Soda, SQL Tests vs Spark Tests' + startOffset: 794 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=794 + endOffset: 1030 +- name: 'Technology Choice: When to Use Apache Spark' + startOffset: 1030 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1030 + endOffset: 1118 +- name: 'Data Engineering’s Role in AI: Preprocessing & Fine-Tuning Data' + startOffset: 1118 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1118 + endOffset: 1306 +- name: 'Invisible AI Use Cases: Augmented Generation & Review Analysis' + startOffset: 1306 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1306 + endOffset: 1513 +- name: 'Prompt Engineering Basics: In-Context Learning & Examples' + startOffset: 1513 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1513 + endOffset: 1696 +- name: 'Prompt Evaluation: Formatting, Examples, and Cost Tradeoffs' + startOffset: 1696 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1696 + endOffset: 1800 +- name: 'Prompt Compression: Token Optimization Techniques' + startOffset: 1800 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1800 + endOffset: 1905 +- name: Prompt Caching & Model Efficiency (attention caching, Claude) + startOffset: 1905 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1905 + endOffset: 2022 +- name: Open-Source Models & Tools Experience (DeepSeek, Perplexity) + startOffset: 2022 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2022 + endOffset: 2154 +- name: 'AI for Lead Scoring: LinkedIn Automation & Qualification' + startOffset: 2154 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2154 + endOffset: 2464 +- name: 'Chrome Extension Architecture: Backend AI Integration Pattern' + startOffset: 2464 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2464 + endOffset: 2525 +- name: 'Coding Assistants: Cursor Workflow & Productivity Boosts' + startOffset: 2525 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2525 + endOffset: 2678 +- name: 'Code AI Comparison: Cursor vs GitHub Copilot & Alternatives' + startOffset: 2678 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2678 + endOffset: 2839 +- name: 'Search-Focused Assistants: Using Perplexity & Tool Selection' + startOffset: 2839 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2839 + endOffset: 3129 +- name: 'Website Hosting: Static Site Generators & GitHub Pages' + startOffset: 3129 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3129 + endOffset: 3190 +- name: 'Blogging as Business: Attracting Clients & Teaching Workshops' + startOffset: 3190 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3190 + endOffset: 3377 +- name: 'AI-Assisted Writing: Drafting, Rewriting, and Maintaining Voice' + startOffset: 3377 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3377 + endOffset: 3621 +- name: Episode Wrap-Up & Guest Resources (blog link invitation) + startOffset: 3621 + url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3621 + endOffset: 3697 transcript: - header: Episode Opening & Guest Overview (Data Intensive AI) - line: This week, we’ll talk about Data Intensive AI. Our special guest today is @@ -165,7 +271,7 @@ transcript: sec: 528 time: '8:48' who: Bartosz -- header: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct"' +- header: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct" - line: How did you end up writing a chapter for 97 Things Every Data Engineer Should Know? sec: 545 @@ -1028,125 +1134,17 @@ transcript: sec: 3697 time: '1:01:37' who: Alexey -description: Master data pipeline testing and prompt engineering—learn snapshot tests, - prompt compression & caching to ensure data trust and cut model costs. -intro: How do you turn prototype AI into reliable production systems that stakeholders - can trust? In this episode, Bartosz Mikulski — an AI and data engineer who helps - move projects from demo to production, builds testing infrastructure, and teaches - practitioners — walks through practical approaches to building trustworthy AI through - data pipeline testing and prompt engineering.

We dig into testing strategies - for data pipelines (snapshot and integration testing), tools like Great Expectations, - Soda, SQL vs Spark tests, and guidance on when to use Apache Spark. Bartosz explains - the data engineering role in preprocessing and fine-tuning, plus “invisible” AI - use cases like augmented generation and review analysis. On the prompt side, he - covers in-context learning, prompt evaluation and formatting tradeoffs, token optimization - with prompt compression, and prompt caching and model efficiency (attention caching, - Claude). He also discusses open-source tools (DeepSeek, Perplexity), AI-driven product - patterns (lead scoring, Chrome extension architectures), and coding assistants like - Cursor versus GitHub Copilot.

Listen for concrete testing practices, prompt - optimization techniques (caching and compression), and tool recommendations you - can apply to increase model reliability and reduce production risk. -dateadded: '2025-03-26' -duration: PT01H01M37S -quotableClips: -- name: Episode Opening & Guest Overview (Data Intensive AI) - startOffset: 0 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=0 - endOffset: 122 -- name: Book Contribution Clarified & Testing Focus - startOffset: 122 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=122 - endOffset: 240 -- name: 'Career Path: Java → Data Engineering → AI Engineering' - startOffset: 240 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=240 - endOffset: 364 -- name: 'Publishing Routine: Blogging Frequency & Content Practice' - startOffset: 364 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=364 - endOffset: 545 -- name: 'Data Trust: Why Testing Prevents "This Number Doesn’t Look Correct"' - startOffset: 545 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=545 - endOffset: 707 -- name: 'Test Strategy for Data Pipelines: Snapshot & Integration Testing' - startOffset: 707 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=707 - endOffset: 794 -- name: 'Testing Tools: Great Expectations, Soda, SQL Tests vs Spark Tests' - startOffset: 794 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=794 - endOffset: 1030 -- name: 'Technology Choice: When to Use Apache Spark' - startOffset: 1030 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1030 - endOffset: 1118 -- name: 'Data Engineering’s Role in AI: Preprocessing & Fine-Tuning Data' - startOffset: 1118 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1118 - endOffset: 1306 -- name: 'Invisible AI Use Cases: Augmented Generation & Review Analysis' - startOffset: 1306 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1306 - endOffset: 1513 -- name: 'Prompt Engineering Basics: In-Context Learning & Examples' - startOffset: 1513 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1513 - endOffset: 1696 -- name: 'Prompt Evaluation: Formatting, Examples, and Cost Tradeoffs' - startOffset: 1696 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1696 - endOffset: 1800 -- name: 'Prompt Compression: Token Optimization Techniques' - startOffset: 1800 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1800 - endOffset: 1905 -- name: Prompt Caching & Model Efficiency (attention caching, Claude) - startOffset: 1905 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=1905 - endOffset: 2022 -- name: Open-Source Models & Tools Experience (DeepSeek, Perplexity) - startOffset: 2022 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2022 - endOffset: 2154 -- name: 'AI for Lead Scoring: LinkedIn Automation & Qualification' - startOffset: 2154 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2154 - endOffset: 2464 -- name: 'Chrome Extension Architecture: Backend AI Integration Pattern' - startOffset: 2464 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2464 - endOffset: 2525 -- name: 'Coding Assistants: Cursor Workflow & Productivity Boosts' - startOffset: 2525 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2525 - endOffset: 2678 -- name: 'Code AI Comparison: Cursor vs GitHub Copilot & Alternatives' - startOffset: 2678 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2678 - endOffset: 2839 -- name: 'Search-Focused Assistants: Using Perplexity & Tool Selection' - startOffset: 2839 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=2839 - endOffset: 3129 -- name: 'Website Hosting: Static Site Generators & GitHub Pages' - startOffset: 3129 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3129 - endOffset: 3190 -- name: 'Blogging as Business: Attracting Clients & Teaching Workshops' - startOffset: 3190 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3190 - endOffset: 3377 -- name: 'AI-Assisted Writing: Drafting, Rewriting, and Maintaining Voice' - startOffset: 3377 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3377 - endOffset: 3621 -- name: Episode Wrap-Up & Guest Resources (blog link invitation) - startOffset: 3621 - url: https://www.youtube.com/watch?v=BP6w_vKySN0&t=3621 - endOffset: 3697 ---- +context: 'Context: a practitioner’s tour through the end-to-end work of turning data + and models into reliable, efficient products—from Java and data engineering foundations + to AI fine-tuning, prompt craft, tooling choices, and developer workflows. + Core: the episode’s through-line is a data-centric engineering mindset for trustworthy, + production-ready AI: rigorous testing and pipeline design to ensure data trust, + deliberate choices about models and tools for cost and performance, prompt and token-efficiency + techniques to make inference practical, and pragmatic engineering patterns (architecture, + caching, assistants) that let teams ship AI features and sustain them—while using + content and teaching as a way to refine thinking and capture business value.' +--- Links: * [LinkedIn](https://www.linkedin.com/in/mikulskibartosz/){:target="_blank"} diff --git a/_podcast/s03e01-from-pm-to-ds.md b/_podcast/project-manager-to-data-scientist.md similarity index 96% rename from _podcast/s03e01-from-pm-to-ds.md rename to _podcast/project-manager-to-data-scientist.md index 8c632511..02e6a569 100644 --- a/_podcast/s03e01-from-pm-to-ds.md +++ b/_podcast/project-manager-to-data-scientist.md @@ -1,11 +1,11 @@ --- -title: 'From Project Manager to Data Scientist: Skills, Tools, ML Courses & Job Search' -short: Transitioning from Project Management to Data Science -guests: -- ksenialegostay -image: images/podcast/s03e01-from-pm-to-ds.jpg +title: "From Project Manager to Data Scientist: Skills, Tools, ML Courses & Job Search" +short: "Transitioning from Project Management to Data Science" season: 3 episode: 1 +guests: +- ksenialegostay +image: images/podcast/project-manager-to-data-scientist.jpg ids: youtube: rBKezdb9jEc anchor: Transitioning-from-Project-Management-to-Data-Science---Ksenia-Legostay-euig2a @@ -14,6 +14,128 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Transitioning-from-Project-Management-to-Data-Science---Ksenia-Legostay-euig2a spotify: https://open.spotify.com/episode/3vF1B2mKwImsVC7h3NIDJW apple: https://podcasts.apple.com/us/podcast/transitioning-from-project-management-to-data-science/id1541710331?i=1000516467544 + +description: "Discover how project managers switch to data science: master machine learning, Python, CRISP-DM, build a portfolio, and land data roles faster." +intro: "How do you move from project management into a data science career — and what skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data Scientist at momox GmbH, walks through her transition after four years as a project manager into three years researching fraud and anomaly detection and earning a degree in data analysis. We cover career foundations, the difference between analytics and data science, and a concrete learning strategy: assess strengths, target gaps, and build core skills in programming, statistics, and domain expertise.

Ksenia outlines recommended coursework (machine learning, time series, graph analysis), online resources including mlcourse.ai, and a practical tools progression from spreadsheets and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP-DM to structure projects, starting as a data analyst to build a portfolio, using Kaggle and community resources (OpenDataScience, DataTalks), and preparing for production with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain specialization (fraud detection, node2vec), realistic job search expectations, part-time learning plans, and essential math topics — a clear roadmap for transitioning to data science." +topics: +- career transition +- project management +- data science +- career growth +- job search +- tools +- production +dateadded: 2021-04-10 + +duration: PT01H03M20S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=0 + endOffset: 144 +- name: 'Guest Overview: Ksenia and episode focus (project management → data science)' + startOffset: 144 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=144 + endOffset: 180 +- name: 'Career Foundations: math degree, management, and early PM roles' + startOffset: 180 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=180 + endOffset: 275 +- name: 'Motivation for Analytics: customer-centric, data-driven decision making' + startOffset: 275 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=275 + endOffset: 414 +- name: 'Transition Path: moving from data analysis into machine learning' + startOffset: 414 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=414 + endOffset: 450 +- name: 'Analytics vs. Data Science: descriptive analysis vs. forecasting' + startOffset: 450 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=450 + endOffset: 513 +- name: 'Learning Strategy: assess strengths and target skill gaps' + startOffset: 513 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=513 + endOffset: 670 +- name: 'Education Choices: benefits of formal degrees vs. self-study' + startOffset: 670 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=670 + endOffset: 780 +- name: 'Core Skill Set: programming, statistics, and domain expertise' + startOffset: 780 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=780 + endOffset: 1038 +- name: 'Recommended Coursework: machine learning, time series, graph analysis' + startOffset: 1038 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1038 + endOffset: 1176 +- name: Online Resources & Course Picks (including mlcourse.ai) + startOffset: 1176 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1176 + endOffset: 1352 +- name: 'Transferable PM Skills: planning, stakeholder communication, business KPIs' + startOffset: 1352 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1352 + endOffset: 1820 +- name: 'Project Frameworks: using CRISP-DM to structure data projects' + startOffset: 1820 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1820 + endOffset: 1963 +- name: 'Starting as a Data Analyst: apply analysis at work and build portfolio' + startOffset: 1963 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1963 + endOffset: 2088 +- name: 'Tools Progression: spreadsheets → BI tools (Tableau/Trifacta) → Python & + Pandas' + startOffset: 2088 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2088 + endOffset: 2207 +- name: 'Community Learning: OpenDataScience, DataTalks, and mentorship' + startOffset: 2207 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2207 + endOffset: 2334 +- name: 'Kaggle Practice: studying notebooks and collaborative competitions' + startOffset: 2334 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2334 + endOffset: 2467 +- name: 'Production Readiness: Git, testing, Docker, deployment, and Clean Code' + startOffset: 2467 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2467 + endOffset: 2596 +- name: 'Domain Specialization: research experience in fraud detection and node2vec' + startOffset: 2596 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2596 + endOffset: 2915 +- name: 'Job Search Reality: applications, interviews, and persistence' + startOffset: 2915 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2915 + endOffset: 3075 +- name: 'Bridging Theory and Practice: applying university work in industry' + startOffset: 3075 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3075 + endOffset: 3249 +- name: 'Part-time Learning Plan: nanodegrees and structured six-month paths' + startOffset: 3249 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3249 + endOffset: 3462 +- name: 'Essential Math Topics: probability, statistics, and graph theory' + startOffset: 3462 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3462 + endOffset: 3661 +- name: 'Career Habits: critical path, study techniques, and lifelong learning' + startOffset: 3661 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3661 + endOffset: 3687 +- name: 'Final Advice: contribute to projects, narrow your scope, join communities' + startOffset: 3687 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3687 + endOffset: 3907 +- name: Episode Close and Final Wishes + startOffset: 3907 + url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3907 + endOffset: 3800 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Ksenia and episode focus (project management → data science)' @@ -377,7 +499,7 @@ transcript: sec: 1778 time: '29:38' who: Alexey -- header: 'Project Frameworks: using CRISP‑DM to structure data projects' +- header: 'Project Frameworks: using CRISP-DM to structure data projects' - line: Definitely. It's fortunate that it's already developed. I would recommend to use the standard of the industry in data mining — CRISP-DM framework. It's really convenient for data science projects. I also use it. It's nicely structured. @@ -703,7 +825,7 @@ transcript: sec: 3209 time: '53:29' who: Alexey -- header: 'Part‑time Learning Plan: nanodegrees and structured six‑month paths' +- header: 'Part-time Learning Plan: nanodegrees and structured six-month paths' - line: Yes, I think I can give some tips or some recommendations. First of all, start to be interested in data analysis and start to apply this at your work already. This would be the first step of getting involved in data analysis. Then, when @@ -877,132 +999,6 @@ transcript: sec: 3944 time: '1:05:44' who: Alexey -description: 'Discover how project managers switch to data science: master machine - learning, Python, CRISP‑DM, build a portfolio, and land data roles faster.' -intro: 'How do you move from project management into a data science career — and what - skills, tools, and courses actually matter? In this episode, Ksenia Legostay, Manager/Data - Scientist at momox GmbH, walks through her transition after four years as a project - manager into three years researching fraud and anomaly detection and earning a degree - in data analysis. We cover career foundations, the difference between analytics - and data science, and a concrete learning strategy: assess strengths, target gaps, - and build core skills in programming, statistics, and domain expertise.

- Ksenia outlines recommended coursework (machine learning, time series, graph analysis), - online resources including mlcourse.ai, and a practical tools progression from spreadsheets - and BI (Tableau/Trifacta) to Python and Pandas. She explains applying CRISP‑DM to - structure projects, starting as a data analyst to build a portfolio, using Kaggle - and community resources (OpenDataScience, DataTalks), and preparing for production - with Git, testing, Docker, and Clean Code. Listen for actionable advice on domain - specialization (fraud detection, node2vec), realistic job search expectations, part‑time - learning plans, and essential math topics — a clear roadmap for transitioning to - data science.' -dateadded: '2021-04-10' -duration: PT01H03M20S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=0 - endOffset: 144 -- name: 'Guest Overview: Ksenia and episode focus (project management → data science)' - startOffset: 144 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=144 - endOffset: 180 -- name: 'Career Foundations: math degree, management, and early PM roles' - startOffset: 180 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=180 - endOffset: 275 -- name: 'Motivation for Analytics: customer-centric, data-driven decision making' - startOffset: 275 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=275 - endOffset: 414 -- name: 'Transition Path: moving from data analysis into machine learning' - startOffset: 414 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=414 - endOffset: 450 -- name: 'Analytics vs. Data Science: descriptive analysis vs. forecasting' - startOffset: 450 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=450 - endOffset: 513 -- name: 'Learning Strategy: assess strengths and target skill gaps' - startOffset: 513 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=513 - endOffset: 670 -- name: 'Education Choices: benefits of formal degrees vs. self-study' - startOffset: 670 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=670 - endOffset: 780 -- name: 'Core Skill Set: programming, statistics, and domain expertise' - startOffset: 780 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=780 - endOffset: 1038 -- name: 'Recommended Coursework: machine learning, time series, graph analysis' - startOffset: 1038 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1038 - endOffset: 1176 -- name: Online Resources & Course Picks (including mlcourse.ai) - startOffset: 1176 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1176 - endOffset: 1352 -- name: 'Transferable PM Skills: planning, stakeholder communication, business KPIs' - startOffset: 1352 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1352 - endOffset: 1820 -- name: 'Project Frameworks: using CRISP‑DM to structure data projects' - startOffset: 1820 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1820 - endOffset: 1963 -- name: 'Starting as a Data Analyst: apply analysis at work and build portfolio' - startOffset: 1963 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=1963 - endOffset: 2088 -- name: 'Tools Progression: spreadsheets → BI tools (Tableau/Trifacta) → Python & - Pandas' - startOffset: 2088 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2088 - endOffset: 2207 -- name: 'Community Learning: OpenDataScience, DataTalks, and mentorship' - startOffset: 2207 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2207 - endOffset: 2334 -- name: 'Kaggle Practice: studying notebooks and collaborative competitions' - startOffset: 2334 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2334 - endOffset: 2467 -- name: 'Production Readiness: Git, testing, Docker, deployment, and Clean Code' - startOffset: 2467 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2467 - endOffset: 2596 -- name: 'Domain Specialization: research experience in fraud detection and node2vec' - startOffset: 2596 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2596 - endOffset: 2915 -- name: 'Job Search Reality: applications, interviews, and persistence' - startOffset: 2915 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=2915 - endOffset: 3075 -- name: 'Bridging Theory and Practice: applying university work in industry' - startOffset: 3075 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3075 - endOffset: 3249 -- name: 'Part‑time Learning Plan: nanodegrees and structured six‑month paths' - startOffset: 3249 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3249 - endOffset: 3462 -- name: 'Essential Math Topics: probability, statistics, and graph theory' - startOffset: 3462 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3462 - endOffset: 3661 -- name: 'Career Habits: critical path, study techniques, and lifelong learning' - startOffset: 3661 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3661 - endOffset: 3687 -- name: 'Final Advice: contribute to projects, narrow your scope, join communities' - startOffset: 3687 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3687 - endOffset: 3907 -- name: Episode Close and Final Wishes - startOffset: 3907 - url: https://www.youtube.com/watch?v=rBKezdb9jEc&t=3907 - endOffset: 3800 --- We talked about: diff --git a/_podcast/s02e10-public-speaking.md b/_podcast/public-speaking-for-data-scientists.md similarity index 97% rename from _podcast/s02e10-public-speaking.md rename to _podcast/public-speaking-for-data-scientists.md index 80291852..ea8b9979 100644 --- a/_podcast/s02e10-public-speaking.md +++ b/_podcast/public-speaking-for-data-scientists.md @@ -1,12 +1,11 @@ --- -title: 'Public Speaking for Data Scientists: Master AI Evangelism, Storytelling & - Keynotes' -short: The Essentials of Public Speaking for Career in Data Science -guests: -- bentaylor -image: images/podcast/s02e10-public-speaking.jpg +title: "Public Speaking for Data Scientists: Master AI Evangelism, Storytelling & Keynotes" +short: "The Essentials of Public Speaking for Career in Data Science" season: 2 episode: 10 +guests: +- bentaylor +image: images/podcast/public-speaking-for-data-scientists.jpg ids: youtube: wOFvlR9UBxI anchor: The-Essentials-of-Public-Speaking-for-Career-in-Data-Science---Ben-Taylor-et0m4p @@ -15,6 +14,123 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-Essentials-of-Public-Speaking-for-Career-in-Data-Science---Ben-Taylor-et0m4p spotify: https://open.spotify.com/episode/4QWfObiuYmtOCtpSL5LZf9 apple: https://podcasts.apple.com/us/podcast/essentials-public-speaking-for-career-in-data-science/id1541710331?i=1000513669829 + +description: "Master public speaking, AI evangelism & storytelling for data scientists: learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth." +intro: "How do data scientists move from technical deep dives to memorable keynotes and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at DataRobot, breaks down the public speaking playbook for data practitioners who want to persuade, teach, and scale their talks.

Ben draws on a career from engineering and quant roles through startups and acquisitions to explain the mindset for improvement, practical rehearsal habits, and the positioning and messaging that define AI evangelism. Key topics include crafting repeatable keynotes, avoiding early mistakes like technical overload, using story hooks and warm-ups to capture attention, and structuring talks around 1–3 clear takeaways and calls to action. He also covers introductions that work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling exercises (Pixar lessons), and executive presentations that lead with recommendations while keeping an appendix ready.

Listeners will find actionable guidance on earning speaking stages, writing conference proposals that push boundaries, Q&A strategies (including how and when to admit unknowns), starter topics for newcomers, and resources like Toastmasters and story practice to build a speaker resume and break into AI evangelism" +topics: +- developer relations +- public speaking +- career growth +dateadded: 2021-03-20 + +duration: PT01H09M46S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=0 + endOffset: 92 +- name: 'Guest Overview: Ben Taylor, AI Evangelist at DataRobot' + startOffset: 92 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=92 + endOffset: 188 +- name: 'Mindset for Improvement: Practice and Public Speaking Growth' + startOffset: 188 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=188 + endOffset: 234 +- name: 'Career Path: Engineering, Quant, HireVue, Startup, Acquisition' + startOffset: 234 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=234 + endOffset: 364 +- name: 'AI Evangelism: Role, Positioning, and Messaging Strategy' + startOffset: 364 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=364 + endOffset: 577 +- name: 'Scaling Talks: Process for Crafting Repeatable Keynotes' + startOffset: 577 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=577 + endOffset: 774 +- name: 'Early Mistakes: Technical Overload and Audience Awareness' + startOffset: 774 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=774 + endOffset: 957 +- name: 'Provocative Speaking: Risks, Reception, and Storytelling Ethics' + startOffset: 957 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=957 + endOffset: 1132 +- name: 'Speaking Privately: Corporate Talks and Networking Impact' + startOffset: 1132 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1132 + endOffset: 1194 +- name: 'Attention Techniques: Warm-up, Emotion, and Story Hooks' + startOffset: 1194 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1194 + endOffset: 1315 +- name: 'Clear Outcomes: 1–3 Key Takeaways and Calls to Action' + startOffset: 1315 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1315 + endOffset: 1457 +- name: 'Introductions that Work: Hero Stories vs. Resume Intros' + startOffset: 1457 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1457 + endOffset: 1857 +- name: 'Translating Data for Impact: From Metrics to Narrative' + startOffset: 1857 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1857 + endOffset: 2052 +- name: 'Storytelling Practice: Everyday Exercises and Pixar Lessons' + startOffset: 2052 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2052 + endOffset: 2191 +- name: 'Ambitious Goals: Memorable Talks and Long-term Impact' + startOffset: 2191 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2191 + endOffset: 2395 +- name: 'Executive Presentations: Recommendations First, Appendix Ready' + startOffset: 2395 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2395 + endOffset: 2858 +- name: 'Earning Stages: From Meetups to Conference Speaking Slots' + startOffset: 2858 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2858 + endOffset: 3020 +- name: 'Conference Proposals: Novelty, Creativity, and “Scare Yourself” Topics' + startOffset: 3020 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3020 + endOffset: 3133 +- name: 'Q&A Strategy: Handling Tough Questions and Admitting Unknowns' + startOffset: 3133 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3133 + endOffset: 3228 +- name: 'Path to Keynotes: Building a Speaker Resume and Personal Brand' + startOffset: 3228 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3228 + endOffset: 3397 +- name: 'Starter Topics for New Data Scientists: Business Problems Over Hype' + startOffset: 3397 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3397 + endOffset: 3572 +- name: 'Pitching Meetups: First Impressions, Endorsements, and Networking' + startOffset: 3572 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3572 + endOffset: 3738 +- name: 'Core Skill: Maximizing Audience Attention (Public Speaking Focus)' + startOffset: 3738 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3738 + endOffset: 3848 +- name: 'Breaking into AI Evangelism: Build Speaking Experience and Presence' + startOffset: 3848 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3848 + endOffset: 3914 +- name: 'Resources & Practice: Recommended Books, Toastmasters, Story Exercises' + startOffset: 3914 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3914 + endOffset: 4124 +- name: Closing Anecdotes and Final Advice + startOffset: 4124 + url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=4124 + endOffset: 4186 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Ben Taylor, AI Evangelist at DataRobot' @@ -319,7 +435,7 @@ transcript: sec: 1188 time: '19:48' who: Alexey -- header: 'Attention Techniques: Warm‑up, Emotion, and Story Hooks' +- header: 'Attention Techniques: Warm-up, Emotion, and Story Hooks' - line: You have this concept of attention. You walk out on the stage. You being the speaker, you're given attention for free. But you can quickly lose it. You can imagine if you or myself, were walking out on stage – COVID’s over – we're gonna @@ -579,7 +695,7 @@ transcript: sec: 2189 time: '36:29' who: Alexey -- header: 'Ambitious Goals: Memorable Talks and Long‑term Impact' +- header: 'Ambitious Goals: Memorable Talks and Long-term Impact' - line: Most of them. And that’s not an insult. It's just true. Most of them. Most of the talks are forgettable. How many of those people were smart? All of them. They're very smart, they're very accomplished. Unfortunately, most of the talks @@ -1121,131 +1237,6 @@ transcript: sec: 4278 time: '1:11:18' who: Alexey -description: 'Master public speaking, AI evangelism & storytelling for data scientists: - learn repeatable keynote structure, audience hooks, Q&A tactics, and career growth.' -intro: How do data scientists move from technical deep dives to memorable keynotes - and effective AI evangelism? In this episode, Ben Taylor, Chief AI Evangelist at - DataRobot, breaks down the public speaking playbook for data practitioners who want - to persuade, teach, and scale their talks.

Ben draws on a career from engineering - and quant roles through startups and acquisitions to explain the mindset for improvement, - practical rehearsal habits, and the positioning and messaging that define AI evangelism. - Key topics include crafting repeatable keynotes, avoiding early mistakes like technical - overload, using story hooks and warm‑ups to capture attention, and structuring talks - around 1–3 clear takeaways and calls to action. He also covers introductions that - work (hero stories vs. resumes), translating metrics into narrative, everyday storytelling - exercises (Pixar lessons), and executive presentations that lead with recommendations - while keeping an appendix ready.

Listeners will find actionable guidance - on earning speaking stages, writing conference proposals that push boundaries, Q&A - strategies (including how and when to admit unknowns), starter topics for newcomers, - and resources like Toastmasters and story practice to build a speaker resume and - break into AI evangelism. -dateadded: '2021-03-20' -duration: PT01H09M46S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=0 - endOffset: 92 -- name: 'Guest Overview: Ben Taylor, AI Evangelist at DataRobot' - startOffset: 92 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=92 - endOffset: 188 -- name: 'Mindset for Improvement: Practice and Public Speaking Growth' - startOffset: 188 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=188 - endOffset: 234 -- name: 'Career Path: Engineering, Quant, HireVue, Startup, Acquisition' - startOffset: 234 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=234 - endOffset: 364 -- name: 'AI Evangelism: Role, Positioning, and Messaging Strategy' - startOffset: 364 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=364 - endOffset: 577 -- name: 'Scaling Talks: Process for Crafting Repeatable Keynotes' - startOffset: 577 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=577 - endOffset: 774 -- name: 'Early Mistakes: Technical Overload and Audience Awareness' - startOffset: 774 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=774 - endOffset: 957 -- name: 'Provocative Speaking: Risks, Reception, and Storytelling Ethics' - startOffset: 957 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=957 - endOffset: 1132 -- name: 'Speaking Privately: Corporate Talks and Networking Impact' - startOffset: 1132 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1132 - endOffset: 1194 -- name: 'Attention Techniques: Warm‑up, Emotion, and Story Hooks' - startOffset: 1194 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1194 - endOffset: 1315 -- name: 'Clear Outcomes: 1–3 Key Takeaways and Calls to Action' - startOffset: 1315 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1315 - endOffset: 1457 -- name: 'Introductions that Work: Hero Stories vs. Resume Intros' - startOffset: 1457 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1457 - endOffset: 1857 -- name: 'Translating Data for Impact: From Metrics to Narrative' - startOffset: 1857 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=1857 - endOffset: 2052 -- name: 'Storytelling Practice: Everyday Exercises and Pixar Lessons' - startOffset: 2052 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2052 - endOffset: 2191 -- name: 'Ambitious Goals: Memorable Talks and Long‑term Impact' - startOffset: 2191 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2191 - endOffset: 2395 -- name: 'Executive Presentations: Recommendations First, Appendix Ready' - startOffset: 2395 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2395 - endOffset: 2858 -- name: 'Earning Stages: From Meetups to Conference Speaking Slots' - startOffset: 2858 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=2858 - endOffset: 3020 -- name: 'Conference Proposals: Novelty, Creativity, and “Scare Yourself” Topics' - startOffset: 3020 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3020 - endOffset: 3133 -- name: 'Q&A Strategy: Handling Tough Questions and Admitting Unknowns' - startOffset: 3133 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3133 - endOffset: 3228 -- name: 'Path to Keynotes: Building a Speaker Resume and Personal Brand' - startOffset: 3228 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3228 - endOffset: 3397 -- name: 'Starter Topics for New Data Scientists: Business Problems Over Hype' - startOffset: 3397 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3397 - endOffset: 3572 -- name: 'Pitching Meetups: First Impressions, Endorsements, and Networking' - startOffset: 3572 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3572 - endOffset: 3738 -- name: 'Core Skill: Maximizing Audience Attention (Public Speaking Focus)' - startOffset: 3738 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3738 - endOffset: 3848 -- name: 'Breaking into AI Evangelism: Build Speaking Experience and Presence' - startOffset: 3848 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3848 - endOffset: 3914 -- name: 'Resources & Practice: Recommended Books, Toastmasters, Story Exercises' - startOffset: 3914 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=3914 - endOffset: 4124 -- name: Closing Anecdotes and Final Advice - startOffset: 4124 - url: https://www.youtube.com/watch?v=wOFvlR9UBxI&t=4124 - endOffset: 4186 --- diff --git a/_podcast/s15e05-mastering-data-engineering-as-remote-worker.md b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md similarity index 95% rename from _podcast/s15e05-mastering-data-engineering-as-remote-worker.md rename to _podcast/remote-data-engineering-work-and-building-iot-platforms.md index 0d3b0a67..2eca06d0 100644 --- a/_podcast/s15e05-mastering-data-engineering-as-remote-worker.md +++ b/_podcast/remote-data-engineering-work-and-building-iot-platforms.md @@ -1,20 +1,153 @@ --- +title: "Remote Data Engineering Life: Building IoT Platforms, Career Transitions & Newsletter-Driven Personal Growth" +short: "Mastering Data Engineering as a Remote Worker" +season: 15 episode: 5 guests: - josemaria +image: images/podcast/remote-data-engineering-work-and-building-iot-platforms.jpg ids: - anchor: atatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c + anchor: datatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c youtube: UX7UShEioKc -image: images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Mastering-Data-Engineering-as-a-Remote-Worker---Jos-Mara-Snchez-Salas-e28716c apple: https://podcasts.apple.com/us/podcast/mastering-data-engineering-as-a-remote-worker-jos%C3%A9/id1541710331?i=1000624908396 spotify: https://open.spotify.com/episode/2RLxjkPbUO3FBfFpKPHzls?si=TVveHW7PQcW7yGbOyJsJpg youtube: https://www.youtube.com/watch?v=UX7UShEioKc -season: 15 -short: Mastering Data Engineering as a Remote Worker -title: 'Build IoT Platforms & Data Pipelines for Remote Work: Hiring, Onboarding & - Personal Branding' + +description: "Navigate remote data engineering after relocation: IoT platform architecture, sensor onboarding workflows, and newsletter-driven personal branding for career growth." +intro: "What does it take to thrive as a remote data engineer — building IoT platforms, navigating international career moves, and leveraging writing for professional growth? In this episode, José María Sánchez Salas — a computer scientist turned data engineer and newsletter author — shares his journey from Spain to Norway and the realities of remote IoT platform work.

We explore the daily life of remote data engineering: work routines, wellness strategies, and Norway's unique hiring landscape with geographic constraints around Oslo, Bergen, and Trondheim. José breaks down IoT platform engineering fundamentals — treating platforms as an 'operating system' for sensors, sensor onboarding workflows, real-time data processing, and solving common IoT challenges like remote diagnostics and business context integration. The conversation covers data exploration patterns, ETL pipeline design, stakeholder communication, and how José uses his newsletter as both a learning tool and career advancement strategy — translating complex technical work for broader audiences and building professional visibility. You'll get actionable insights on job searching across borders, data engineering learning paths, remote work legal considerations, and communication skills that matter for distributed teams. Listen to discover practical approaches for IoT system design, remote team management, and using content creation to accelerate your data engineering career." +topics: +- data engineering +- remote work +- personal brand +- career growth +dateadded: 2023-08-28 + +duration: PT00H58M01S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=0 + endOffset: 69 +- name: Episode Overview & Guest Introduction + startOffset: 69 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=69 + endOffset: 110 +- name: 'Background: Spain to Norway and Career Transition' + startOffset: 110 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=110 + endOffset: 261 +- name: 'Relocation Story: Moving for Partner’s Job' + startOffset: 261 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=261 + endOffset: 301 +- name: 'Remote Work Routine: Two Focused Work Blocks' + startOffset: 301 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=301 + endOffset: 435 +- name: Morning Routine & Productivity Habits + startOffset: 435 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=435 + endOffset: 493 +- name: Remote-First Hiring Landscape in Norway + startOffset: 493 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=493 + endOffset: 599 +- name: 'Role Overview: IoT Platform Responsibilities' + startOffset: 599 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=599 + endOffset: 749 +- name: 'IoT Platform Architecture: "Operating System" for Sensors' + startOffset: 749 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=749 + endOffset: 797 +- name: 'Geographic Hiring Constraints: Oslo, Bergen, Trondheim' + startOffset: 797 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=797 + endOffset: 931 +- name: 'Remote Work Challenges: Loneliness & Isolation' + startOffset: 931 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=931 + endOffset: 1097 +- name: 'Workspace Boundaries: Separating Home and Work' + startOffset: 1097 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1097 + endOffset: 1231 +- name: 'IoT Data Challenges: Remote Diagnostics & Context' + startOffset: 1231 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1231 + endOffset: 1444 +- name: 'Turning Raw Data into Business Value: Understand the Why' + startOffset: 1444 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1444 + endOffset: 1654 +- name: Data Exploration, ETL, and Building Data Pipelines + startOffset: 1654 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1654 + endOffset: 1821 +- name: 'Internal Stakeholders: Platform Consumers & Users' + startOffset: 1821 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1821 + endOffset: 1864 +- name: Sensor Onboarding Workflow & Real-Time Processing + startOffset: 1864 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1864 + endOffset: 1937 +- name: 'Newsletter Purpose: Explaining Data to Non-Technical Audiences' + startOffset: 1937 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1937 + endOffset: 2002 +- name: 'Newsletter Origin: Writing as Communication for Introverts' + startOffset: 2002 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2002 + endOffset: 2157 +- name: 'Content Strategy: Inspiration and Daily Cadence' + startOffset: 2157 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2157 + endOffset: 2290 +- name: 'Personal Branding: Newsletter as Opportunity Driver' + startOffset: 2290 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2290 + endOffset: 2359 +- name: 'Newsletter Production Tactics: Idea Slicing & Repetition' + startOffset: 2359 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2359 + endOffset: 2420 +- name: 'Burnout Coping: Nature, Exercise, and Routine' + startOffset: 2420 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2420 + endOffset: 2801 +- name: 'Job Search Resources: finn.no, LinkedIn, Upwork' + startOffset: 2801 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2801 + endOffset: 2916 +- name: 'Learning Data Engineering: Software Foundations & Projects' + startOffset: 2916 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2916 + endOffset: 3132 +- name: Legal & Tax Basics for Remote Work in Norway + startOffset: 3132 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3132 + endOffset: 3211 +- name: 'Benefits of Remote Work: Location Flexibility & Time Savings' + startOffset: 3211 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3211 + endOffset: 3319 +- name: 'Personal Mobility: Partner Contracts and Remote Advantages' + startOffset: 3319 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3319 + endOffset: 3432 +- name: 'Recommendation: Develop Soft Skills, Especially Communication' + startOffset: 3432 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3432 + endOffset: 3522 +- name: Episode Closing & Final Remarks + startOffset: 3522 + url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3522 + endOffset: 3481 + transcript: - header: Podcast Introduction - header: Episode Overview & Guest Introduction @@ -1003,148 +1136,6 @@ transcript: sec: 3550 time: '59:10' who: Alexey -description: 'Master IoT platforms and data pipelines for remote work: hiring & onboarding - tips, sensor architecture, and personal branding tactics to advance your career.' -intro: 'How do you build reliable IoT platforms and end-to-end data pipelines while - hiring, onboarding, and staying visible as a remote data engineer? In this episode - José María Sánchez Salas — a computer scientist focused on data engineering and - author of a well-read data engineering newsletter — walks through practical answers - from his move from Spain to Norway to running IoT platform work remotely.

- We cover remote work routines and wellbeing, Norway’s remote-first hiring landscape - and geographic constraints (Oslo, Bergen, Trondheim), and the core responsibilities - of an IoT platform engineer: treating the platform as an “operating system” for - sensors, sensor onboarding workflows, real-time processing, and common IoT data - challenges like remote diagnostics and adding contextual business value. José explains - data exploration, ETL and pipeline patterns, stakeholder-driven platform design, - and tactics for translating technical work to non-technical audiences via a newsletter - — a tool he uses for personal branding and opportunity generation. You’ll also get - practical job-search resources, learning paths for data engineering, and tips on - legal/tax basics and communication skills for remote roles. Listen to learn concrete - strategies for building IoT systems, hiring and onboarding remotely, and using content - to advance your career.' -dateadded: '2023-08-28' -duration: PT00H58M01S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=0 - endOffset: 69 -- name: Episode Overview & Guest Introduction - startOffset: 69 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=69 - endOffset: 110 -- name: 'Background: Spain to Norway and Career Transition' - startOffset: 110 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=110 - endOffset: 261 -- name: 'Relocation Story: Moving for Partner’s Job' - startOffset: 261 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=261 - endOffset: 301 -- name: 'Remote Work Routine: Two Focused Work Blocks' - startOffset: 301 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=301 - endOffset: 435 -- name: Morning Routine & Productivity Habits - startOffset: 435 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=435 - endOffset: 493 -- name: Remote-First Hiring Landscape in Norway - startOffset: 493 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=493 - endOffset: 599 -- name: 'Role Overview: IoT Platform Responsibilities' - startOffset: 599 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=599 - endOffset: 749 -- name: 'IoT Platform Architecture: "Operating System" for Sensors' - startOffset: 749 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=749 - endOffset: 797 -- name: 'Geographic Hiring Constraints: Oslo, Bergen, Trondheim' - startOffset: 797 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=797 - endOffset: 931 -- name: 'Remote Work Challenges: Loneliness & Isolation' - startOffset: 931 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=931 - endOffset: 1097 -- name: 'Workspace Boundaries: Separating Home and Work' - startOffset: 1097 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1097 - endOffset: 1231 -- name: 'IoT Data Challenges: Remote Diagnostics & Context' - startOffset: 1231 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1231 - endOffset: 1444 -- name: 'Turning Raw Data into Business Value: Understand the Why' - startOffset: 1444 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1444 - endOffset: 1654 -- name: Data Exploration, ETL, and Building Data Pipelines - startOffset: 1654 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1654 - endOffset: 1821 -- name: 'Internal Stakeholders: Platform Consumers & Users' - startOffset: 1821 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1821 - endOffset: 1864 -- name: Sensor Onboarding Workflow & Real-Time Processing - startOffset: 1864 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1864 - endOffset: 1937 -- name: 'Newsletter Purpose: Explaining Data to Non-Technical Audiences' - startOffset: 1937 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=1937 - endOffset: 2002 -- name: 'Newsletter Origin: Writing as Communication for Introverts' - startOffset: 2002 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2002 - endOffset: 2157 -- name: 'Content Strategy: Inspiration and Daily Cadence' - startOffset: 2157 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2157 - endOffset: 2290 -- name: 'Personal Branding: Newsletter as Opportunity Driver' - startOffset: 2290 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2290 - endOffset: 2359 -- name: 'Newsletter Production Tactics: Idea Slicing & Repetition' - startOffset: 2359 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2359 - endOffset: 2420 -- name: 'Burnout Coping: Nature, Exercise, and Routine' - startOffset: 2420 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2420 - endOffset: 2801 -- name: 'Job Search Resources: finn.no, LinkedIn, Upwork' - startOffset: 2801 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2801 - endOffset: 2916 -- name: 'Learning Data Engineering: Software Foundations & Projects' - startOffset: 2916 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=2916 - endOffset: 3132 -- name: Legal & Tax Basics for Remote Work in Norway - startOffset: 3132 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3132 - endOffset: 3211 -- name: 'Benefits of Remote Work: Location Flexibility & Time Savings' - startOffset: 3211 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3211 - endOffset: 3319 -- name: 'Personal Mobility: Partner Contracts and Remote Advantages' - startOffset: 3319 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3319 - endOffset: 3432 -- name: 'Recommendation: Develop Soft Skills, Especially Communication' - startOffset: 3432 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3432 - endOffset: 3522 -- name: Episode Closing & Final Remarks - startOffset: 3522 - url: https://www.youtube.com/watch?v=UX7UShEioKc&t=3522 - endOffset: 3481 --- Links: diff --git a/_podcast/s05e05-researchers-vs-engineers.md b/_podcast/research-to-production-ml-systems-roadmap.md similarity index 97% rename from _podcast/s05e05-researchers-vs-engineers.md rename to _podcast/research-to-production-ml-systems-roadmap.md index 7191b86b..936301bd 100644 --- a/_podcast/s05e05-researchers-vs-engineers.md +++ b/_podcast/research-to-production-ml-systems-roadmap.md @@ -1,12 +1,11 @@ --- -title: 'From Research to Production: Build Reproducible, Deployable Full-Stack ML - Systems' -short: What Researchers and Engineers Can Learn from Each Other -guests: -- mihaileric -image: images/podcast/s05e05-researchers-vs-engineers.jpg +title: "From Research to Production: Build Reproducible, Deployable Full-Stack ML Systems" +short: "What Researchers and Engineers Can Learn from Each Other" season: 5 episode: 5 +guests: +- mihaileric +image: images/podcast/research-to-production-ml-systems-roadmap.jpg ids: youtube: d9xVXqKq3sU anchor: What-Researchers-and-Engineers-Can-Learn-from-Each-Other---Mihail-Eric-e1854bj @@ -15,6 +14,133 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/What-Researchers-and-Engineers-Can-Learn-from-Each-Other---Mihail-Eric-e1854bj spotify: https://open.spotify.com/episode/0cJJCjK7nX5p1PdeMvGrVL apple: https://podcasts.apple.com/us/podcast/what-researchers-and-engineers-can-learn-from-each/id1541710331?i=1000537258362 + +description: "Learn to build reproducible, deployable full-stack ML systems: deploy models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows." +intro: "How do you move ML work from research notebooks to reproducible, deployable full-stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that exact challenge. We trace Mihail’s path from academic NLP to self-driving and conversational AI, then into hybrid roles that blend hypothesis-driven research with production engineering.

Key topics include research infrastructure for data collection and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, role fluidity, code reviews for researchers, and practical skills swaps so researchers learn reproducibility and engineers learn experimental rigor.

Listeners will get concrete guidance on building end-to-end ML systems, improving reproducibility and model deployment, and actionable career advice (internships, reading papers, reproducing models). Tune in to learn practical steps and tools to bridge research to production for real-world ML systems" +topics: +- machine learning +- MLOps +- academia +- production +- career growth +dateadded: 2021-10-02 + +duration: PT01H01M36S + +quotableClips: +- name: Podcast Introduction + startOffset: 77 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=77 + endOffset: 112 +- name: 'Guest Overview: Mihail’s Roles and Work' + startOffset: 112 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=112 + endOffset: 120 +- name: 'Guest Background: Stanford NLP and Early Research' + startOffset: 120 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=120 + endOffset: 300 +- name: 'From NLP to Self-Driving: Shared Long-Tail Challenges' + startOffset: 300 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=300 + endOffset: 406 +- name: 'Transition to Industry: Building Engineering Foundations' + startOffset: 406 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=406 + endOffset: 514 +- name: 'Research Infrastructure: Data Collection and Prototyping' + startOffset: 514 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=514 + endOffset: 561 +- name: 'Hybrid Role at Amazon: Research Integrated with Production' + startOffset: 561 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=561 + endOffset: 652 +- name: 'Researcher Focus: Hypothesis-Driven Work and Benchmarks' + startOffset: 652 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=652 + endOffset: 770 +- name: 'Experimental Tooling: Notebooks, W&B, Fast Prototyping' + startOffset: 770 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=770 + endOffset: 885 +- name: 'Sourcing Research Questions: Surveys, Citations, and "Future Work" + startOffset: 885 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=885 + endOffset: 1055 +- name: 'ML Engineer Focus: Full ML Lifecycle and Production Systems' + startOffset: 1055 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1055 + endOffset: 1073 +- name: 'Engineering Tooling: PyTorch, Docker, Cloud, and Web Frameworks' + startOffset: 1073 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1073 + endOffset: 1225 +- name: 'Data Science Evolution: From Data Science 1.0 to Data Science 2.0' + startOffset: 1225 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1225 + endOffset: 1412 +- name: 'Skills Swap — Researchers Learn: Engineering Rigor and Reproducibility' + startOffset: 1412 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1412 + endOffset: 1730 +- name: 'Skills Swap — Engineers Learn: Handling Uncertainty and Experimental Rigor' + startOffset: 1730 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1730 + endOffset: 1816 +- name: 'Bridging the Gap: Cultural and Organizational Challenges' + startOffset: 1816 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1816 + endOffset: 2060 +- name: 'Embedded Teams vs. Handoffs: Avoiding the "Throw-It-Over-the-Wall" Trap' + startOffset: 2060 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2060 + endOffset: 2217 +- name: 'Breaking Silos: Leadership, Sprints, and Active Collaboration' + startOffset: 2217 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2217 + endOffset: 2348 +- name: 'Role Fluidity: Flexible Responsibilities in High-Performing Teams' + startOffset: 2348 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2348 + endOffset: 2433 +- name: 'Full-Stack Data Scientist: From Model Development to Deployment' + startOffset: 2433 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2433 + endOffset: 2676 +- name: 'Advice for Researchers: Build End-to-End Systems and Deploy' + startOffset: 2676 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2676 + endOffset: 2817 +- name: 'Code Reviews for Researchers: Rapid Engineering Skill Development' + startOffset: 2817 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2817 + endOffset: 2871 +- name: 'Advice for Engineers: Read Papers, Reproduce Models, Run Experiments' + startOffset: 2871 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2871 + endOffset: 3088 +- name: 'Practical Paper Reading: Tutorials, Code, and Researcher Collaboration' + startOffset: 3088 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3088 + endOffset: 3331 +- name: 'Choosing a Path: Internships, Masters, PhD — Try Both Early' + startOffset: 3331 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3331 + endOffset: 3536 +- name: 'Confetti.ai: Career Preparation and Learning Resources for ML Roles' + startOffset: 3536 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3536 + endOffset: 3700 +- name: 'Contact & Resources: Twitter, LinkedIn, and Confetti.ai' + startOffset: 3700 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3700 + endOffset: 3756 +- name: Episode Wrap-Up and Key Takeaways + startOffset: 3756 + url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3756 + endOffset: 3696 + transcript: - header: Podcast Introduction - line: This week, we'll talk about machine learning researchers and machine learning @@ -271,7 +397,7 @@ transcript: sec: 834 time: '13:54' who: Mihail -- header: 'Sourcing Research Questions: Surveys, Citations, and "Future Work"' +- header: 'Sourcing Research Questions: Surveys, Citations, and "Future Work" - line: Where do these open-ended questions come from? Do you have to come up with them yourself? Does your professor tell you about them or you work with companies from the industry to find them? How do you come up with these problems? @@ -1124,139 +1250,6 @@ transcript: sec: 3773 time: '1:02:53' who: Alexey -description: 'Learn to build reproducible, deployable full-stack ML systems: deploy - models, bridge research-to-production, and master PyTorch, Docker & MLOps workflows.' -intro: How do you move ML work from research notebooks to reproducible, deployable - full‑stack systems? In this episode, Mihail Eric — founder of Pametan Data Innovation - and Confetti.ai, former Stanford NLP researcher with industry experience at RideOS - and Amazon Alexa, and author of papers in ACL, AAAI, and NeurIPS — tackles that - exact challenge. We trace Mihail’s path from academic NLP to self‑driving and conversational - AI, then into hybrid roles that blend hypothesis‑driven research with production - engineering.

Key topics include research infrastructure for data collection - and prototyping, experimental tooling (notebooks, Weights & Biases, fast prototyping), - engineering stacks for deployment (PyTorch, Docker, cloud, web frameworks), and - the full ML lifecycle. Mihail also breaks down cultural solutions — embedded teams, - role fluidity, code reviews for researchers, and practical skills swaps so researchers - learn reproducibility and engineers learn experimental rigor.

Listeners - will get concrete guidance on building end‑to‑end ML systems, improving reproducibility - and model deployment, and actionable career advice (internships, reading papers, - reproducing models). Tune in to learn practical steps and tools to bridge research - to production for real‑world ML systems. -dateadded: '2021-10-02' -duration: PT01H01M36S -quotableClips: -- name: Podcast Introduction - startOffset: 77 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=77 - endOffset: 112 -- name: 'Guest Overview: Mihail’s Roles and Work' - startOffset: 112 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=112 - endOffset: 120 -- name: 'Guest Background: Stanford NLP and Early Research' - startOffset: 120 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=120 - endOffset: 300 -- name: 'From NLP to Self-Driving: Shared Long-Tail Challenges' - startOffset: 300 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=300 - endOffset: 406 -- name: 'Transition to Industry: Building Engineering Foundations' - startOffset: 406 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=406 - endOffset: 514 -- name: 'Research Infrastructure: Data Collection and Prototyping' - startOffset: 514 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=514 - endOffset: 561 -- name: 'Hybrid Role at Amazon: Research Integrated with Production' - startOffset: 561 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=561 - endOffset: 652 -- name: 'Researcher Focus: Hypothesis-Driven Work and Benchmarks' - startOffset: 652 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=652 - endOffset: 770 -- name: 'Experimental Tooling: Notebooks, W&B, Fast Prototyping' - startOffset: 770 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=770 - endOffset: 885 -- name: 'Sourcing Research Questions: Surveys, Citations, and "Future Work"' - startOffset: 885 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=885 - endOffset: 1055 -- name: 'ML Engineer Focus: Full ML Lifecycle and Production Systems' - startOffset: 1055 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1055 - endOffset: 1073 -- name: 'Engineering Tooling: PyTorch, Docker, Cloud, and Web Frameworks' - startOffset: 1073 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1073 - endOffset: 1225 -- name: 'Data Science Evolution: From Data Science 1.0 to Data Science 2.0' - startOffset: 1225 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1225 - endOffset: 1412 -- name: 'Skills Swap — Researchers Learn: Engineering Rigor and Reproducibility' - startOffset: 1412 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1412 - endOffset: 1730 -- name: 'Skills Swap — Engineers Learn: Handling Uncertainty and Experimental Rigor' - startOffset: 1730 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1730 - endOffset: 1816 -- name: 'Bridging the Gap: Cultural and Organizational Challenges' - startOffset: 1816 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=1816 - endOffset: 2060 -- name: 'Embedded Teams vs. Handoffs: Avoiding the "Throw-It-Over-the-Wall" Trap' - startOffset: 2060 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2060 - endOffset: 2217 -- name: 'Breaking Silos: Leadership, Sprints, and Active Collaboration' - startOffset: 2217 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2217 - endOffset: 2348 -- name: 'Role Fluidity: Flexible Responsibilities in High-Performing Teams' - startOffset: 2348 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2348 - endOffset: 2433 -- name: 'Full-Stack Data Scientist: From Model Development to Deployment' - startOffset: 2433 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2433 - endOffset: 2676 -- name: 'Advice for Researchers: Build End-to-End Systems and Deploy' - startOffset: 2676 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2676 - endOffset: 2817 -- name: 'Code Reviews for Researchers: Rapid Engineering Skill Development' - startOffset: 2817 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2817 - endOffset: 2871 -- name: 'Advice for Engineers: Read Papers, Reproduce Models, Run Experiments' - startOffset: 2871 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=2871 - endOffset: 3088 -- name: 'Practical Paper Reading: Tutorials, Code, and Researcher Collaboration' - startOffset: 3088 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3088 - endOffset: 3331 -- name: 'Choosing a Path: Internships, Masters, PhD — Try Both Early' - startOffset: 3331 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3331 - endOffset: 3536 -- name: 'Confetti.ai: Career Preparation and Learning Resources for ML Roles' - startOffset: 3536 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3536 - endOffset: 3700 -- name: 'Contact & Resources: Twitter, LinkedIn, and Confetti.ai' - startOffset: 3700 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3700 - endOffset: 3756 -- name: Episode Wrap-Up and Key Takeaways - startOffset: 3756 - url: https://www.youtube.com/watch?v=d9xVXqKq3sU&t=3756 - endOffset: 3696 --- Links: diff --git a/_podcast/s10e09-responsible-and-explainable-ai.md b/_podcast/responsible-explainable-ai-bias-detection.md similarity index 95% rename from _podcast/s10e09-responsible-and-explainable-ai.md rename to _podcast/responsible-explainable-ai-bias-detection.md index 5b178f3b..0b52097a 100644 --- a/_podcast/s10e09-responsible-and-explainable-ai.md +++ b/_podcast/responsible-explainable-ai-bias-detection.md @@ -1,20 +1,139 @@ --- +title: "Responsible & Explainable AI: Practical Guide to Bias Detection, Fairness & Governance" +short: "Responsible and Explainable AI" +season: 10 episode: 9 guests: - supreetkaur +image: images/podcast/responsible-explainable-ai-bias-detection.jpg ids: anchor: Responsible-and-Explainable-AI---Supreet-Kaur-e1o6mgj youtube: 8Eb5mG-pC3o -image: images/podcast/s10e09-responsible-and-explainable-ai.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Responsible-and-Explainable-AI---Supreet-Kaur-e1o6mgj apple: https://podcasts.apple.com/us/podcast/responsible-and-explainable-ai-supreet-kaur/id1541710331?i=1000581178150 spotify: https://open.spotify.com/episode/0xCSjSCG6tTiMSGfUJrMmO youtube: https://www.youtube.com/watch?v=8Eb5mG-pC3o -season: 10 -short: Responsible and Explainable AI -title: 'Responsible & Explainable AI: Practical Guide to Bias Detection, Fairness - & Governance' + +description: "Discover Responsible AI & Explainable AI tactics for bias detection, fairness checks and governance, practical tools to build trustworthy, compliant ML models" +intro: "How do you detect bias, enforce fairness, and govern AI systems in production without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia and Rutgers — walks through a practical roadmap for responsible AI and explainable AI grounded in real-world examples.

We define responsible AI and contrast it with post-hoc explainability, then unpack a credit decision bias case to show disparate outcomes in practice. Supreet outlines glass-box explainability techniques, data-level fairness checks (skewness, missingness, coverage), and EDA methods for bias detection. She covers PII handling, feature necessity assessments with SMEs and compliance, and automating data quality and monitoring. You’ll hear tool recommendations — What-If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local interpretability, drift and feedback-loop detection, and trade-offs between accuracy and interpretability.

Listeners will gain actionable guidance on bias detection, model interpretability, AI governance structures, and managing AutoML and regulated-industry risks — practical steps to make AI systems more fair, transparent, and accountable" +topics: +- responsible AI +- explainable AI +- bias detection +- fairness +- governance +- tools +dateadded: 2022-10-02 + +duration: PT00H58M56S + +quotableClips: +- name: 'Episode Introduction: Responsible and Explainable AI' + startOffset: 0 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=0 + endOffset: 134 +- name: 'Career Journey: Master''s, Consulting, and Founding DataBuzz' + startOffset: 134 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=134 + endOffset: 234 +- name: 'Data Strategy Role: Building AI Products at Morgan Stanley' + startOffset: 234 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=234 + endOffset: 283 +- name: 'Responsible AI: Definition, Trust, and Stakeholder Collaboration' + startOffset: 283 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=283 + endOffset: 402 +- name: 'Credit Decision Bias Example: Explaining Disparate Outcomes' + startOffset: 402 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=402 + endOffset: 500 +- name: 'Explainable vs Responsible AI: Post-mortem Tools vs Governance Mindset' + startOffset: 500 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=500 + endOffset: 630 +- name: 'Glass-Box Approach: Explainable AI Techniques Overview' + startOffset: 630 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=630 + endOffset: 696 +- name: 'Data-Level Fairness Checks: Skewness, Missingness, and Coverage' + startOffset: 696 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=696 + endOffset: 768 +- name: Exploratory Data Analysis for Bias Detection + startOffset: 768 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=768 + endOffset: 879 +- name: 'PII Handling: Age, Gender, Masking, and Use-case Justification' + startOffset: 879 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=879 + endOffset: 1040 +- name: 'Feature Necessity: Product, SME, and Compliance Decisioning' + startOffset: 1040 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1040 + endOffset: 1107 +- name: 'Automating Data Quality: DQ Tools, Alerts, and Monitoring' + startOffset: 1107 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1107 + endOffset: 1143 +- name: 'Model Explainability Tools: What-If, Skater, and AI Explainability 360' + startOffset: 1143 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1143 + endOffset: 1404 +- name: 'Local Interpretability: LIME, SHAP, and Surrogate Models' + startOffset: 1404 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1404 + endOffset: 1462 +- name: 'Ethics vs Profitability: Balancing Fairness and Business Objectives' + startOffset: 1462 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1462 + endOffset: 1658 +- name: 'Cross-Functional Governance: SMEs, Compliance, and Leadership Roles' + startOffset: 1658 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1658 + endOffset: 1949 +- name: 'Accuracy vs Interpretability: Managing Model Complexity Trade-offs' + startOffset: 1949 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1949 + endOffset: 2128 +- name: 'Human-in-the-Loop: Limits of Automation and Responsible Oversight' + startOffset: 2128 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2128 + endOffset: 2251 +- name: 'Detecting Drift & Feedback Loops: Demographics, Overfitting, and KS Tests' + startOffset: 2251 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2251 + endOffset: 2559 +- name: 'Regulated Industry Perspectives: Finance, Pharma, and Risk Sensitivity' + startOffset: 2559 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2559 + endOffset: 2647 +- name: 'Hiring Tool Case Study: Historical Bias and Remediation Lessons' + startOffset: 2647 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2647 + endOffset: 3017 +- name: 'AutoML Risks: Democratization, Oversight, and Responsible Usage' + startOffset: 3017 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3017 + endOffset: 3128 +- name: 'Community & Mentorship: DataBuzz Resources and Networking' + startOffset: 3128 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3128 + endOffset: 3230 +- name: 'Data Career Landscape: Analyst, MLOps, Consultant, and Strategist Roles' + startOffset: 3230 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3230 + endOffset: 3404 +- name: 'Ethics Training: Professional Responsibility for Data Practitioners' + startOffset: 3404 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3404 + endOffset: 3567 +- name: 'Closing Remarks: Follow-up, Links, and Contact Information' + startOffset: 3567 + url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3567 + endOffset: 3536 + transcript: - header: 'Episode Introduction: Responsible and Explainable AI' - line: This week, we'll talk about responsible and Explainable AI. We have a special @@ -151,7 +270,7 @@ transcript: sec: 426 time: '7:06' who: Supreet -- header: 'Explainable vs Responsible AI: Post‑mortem Tools vs Governance Mindset' +- header: 'Explainable vs Responsible AI: Post-mortem Tools vs Governance Mindset' - line: What is the relationship between responsible AI and explainable AI? From what I hear now – if we want to feel confident in the predictions, we need to be able to explain them. Does this mean that responsible AI is explainable AI? Or what's @@ -187,7 +306,7 @@ transcript: sec: 629 time: '10:29' who: Supreet -- header: 'Glass‑Box Approach: Explainable AI Techniques Overview' +- header: 'Glass-Box Approach: Explainable AI Techniques Overview' - line: You mentioned tools and you mentioned a framework. So how do we do this? sec: 630 time: '10:30' @@ -210,7 +329,7 @@ transcript: sec: 681 time: '11:21' who: Alexey -- header: 'Data‑Level Fairness Checks: Skewness, Missingness, and Coverage' +- header: 'Data-Level Fairness Checks: Skewness, Missingness, and Coverage' - line: '[laughs] Yeah. I will start with the data level. First, when we talk about the data level, we talk about fairness and bias testing. There, you have a few data quality checks that I feel every data scientist does. They do some sort of @@ -260,7 +379,7 @@ transcript: sec: 796 time: '13:16' who: Supreet -- header: 'PII Handling: Age, Gender, Masking, and Use‑case Justification' +- header: 'PII Handling: Age, Gender, Masking, and Use-case Justification' - line: So basically you need to, as a human – as an analyst or as a data scientist – you need to get your dataset from your database, CSV file, whatever, and just spend enough time trying to understand what's happening there. Right? Should we @@ -354,7 +473,7 @@ transcript: sec: 1117 time: '18:37' who: Supreet -- header: 'Model Explainability Tools: What‑If, Skater, and AI Explainability 360' +- header: 'Model Explainability Tools: What-If, Skater, and AI Explainability 360' - line: So what about this model part? sec: 1143 time: '19:03' @@ -523,7 +642,7 @@ transcript: sec: 1644 time: '27:24' who: Supreet -- header: 'Cross‑Functional Governance: SMEs, Compliance, and Leadership Roles' +- header: 'Cross-Functional Governance: SMEs, Compliance, and Leadership Roles' - line: So what kind of people do we need to have in this room to be able to have these fruitful discussions? You said that we need, perhaps, data scientists, analysts, and people from compliance. Who else should we have? @@ -622,7 +741,7 @@ transcript: sec: 1916 time: '31:56' who: Supreet -- header: 'Accuracy vs Interpretability: Managing Model Complexity Trade‑offs' +- header: 'Accuracy vs Interpretability: Managing Model Complexity Trade-offs' - line: Actually, we have a question from Shivam that is exactly about that. The question is, “How to manage the trade-off between model complexity and explainability? Complex models do not necessarily have good explainability, so how do we manage @@ -672,7 +791,7 @@ transcript: sec: 2066 time: '34:26' who: Alexey -- header: 'Human‑in‑the‑Loop: Limits of Automation and Responsible Oversight' +- header: 'Human-in-the-Loop: Limits of Automation and Responsible Oversight' - line: There is a question from Raquel, “What does ‘you need a human touch’ mean?” I think this is related to our discussion, where the first step is always a human analyzing the data. Then the question goes on “Does this mean that responsible @@ -1133,7 +1252,7 @@ transcript: sec: 3550 time: '59:10' who: Supreet -- header: 'Closing Remarks: Follow‑up, Links, and Contact Information' +- header: 'Closing Remarks: Follow-up, Links, and Contact Information' - line: I think that the time is up. So thanks for joining us. Maybe before we wrap up, is there anything you want to mention that maybe you forgot? sec: 3567 @@ -1163,131 +1282,6 @@ transcript: sec: 3630 time: '1:00:30' who: Alexey -description: Discover Responsible AI & Explainable AI tactics for bias detection, - fairness checks and governance, practical tools to build trustworthy, compliant - ML models. -intro: How do you detect bias, enforce fairness, and govern AI systems in production - without sacrificing business outcomes? In this episode, Supreet Kaur — AVP on Morgan - Stanley’s Data Strategy and Products team, founder of DataBuzz, and mentor at Columbia - and Rutgers — walks through a practical roadmap for responsible AI and explainable - AI grounded in real-world examples.

We define responsible AI and contrast - it with post‑hoc explainability, then unpack a credit decision bias case to show - disparate outcomes in practice. Supreet outlines glass‑box explainability techniques, - data‑level fairness checks (skewness, missingness, coverage), and EDA methods for - bias detection. She covers PII handling, feature necessity assessments with SMEs - and compliance, and automating data quality and monitoring. You’ll hear tool recommendations - — What‑If, Skater, AI Explainability 360, LIME, SHAP — plus approaches to local - interpretability, drift and feedback‑loop detection, and trade‑offs between accuracy - and interpretability.

Listeners will gain actionable guidance on bias detection, - model interpretability, AI governance structures, and managing AutoML and regulated‑industry - risks — practical steps to make AI systems more fair, transparent, and accountable. -dateadded: '2022-10-02' -duration: PT00H58M56S -quotableClips: -- name: 'Episode Introduction: Responsible and Explainable AI' - startOffset: 0 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=0 - endOffset: 134 -- name: 'Career Journey: Master''s, Consulting, and Founding DataBuzz' - startOffset: 134 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=134 - endOffset: 234 -- name: 'Data Strategy Role: Building AI Products at Morgan Stanley' - startOffset: 234 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=234 - endOffset: 283 -- name: 'Responsible AI: Definition, Trust, and Stakeholder Collaboration' - startOffset: 283 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=283 - endOffset: 402 -- name: 'Credit Decision Bias Example: Explaining Disparate Outcomes' - startOffset: 402 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=402 - endOffset: 500 -- name: 'Explainable vs Responsible AI: Post‑mortem Tools vs Governance Mindset' - startOffset: 500 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=500 - endOffset: 630 -- name: 'Glass‑Box Approach: Explainable AI Techniques Overview' - startOffset: 630 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=630 - endOffset: 696 -- name: 'Data‑Level Fairness Checks: Skewness, Missingness, and Coverage' - startOffset: 696 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=696 - endOffset: 768 -- name: Exploratory Data Analysis for Bias Detection - startOffset: 768 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=768 - endOffset: 879 -- name: 'PII Handling: Age, Gender, Masking, and Use‑case Justification' - startOffset: 879 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=879 - endOffset: 1040 -- name: 'Feature Necessity: Product, SME, and Compliance Decisioning' - startOffset: 1040 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1040 - endOffset: 1107 -- name: 'Automating Data Quality: DQ Tools, Alerts, and Monitoring' - startOffset: 1107 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1107 - endOffset: 1143 -- name: 'Model Explainability Tools: What‑If, Skater, and AI Explainability 360' - startOffset: 1143 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1143 - endOffset: 1404 -- name: 'Local Interpretability: LIME, SHAP, and Surrogate Models' - startOffset: 1404 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1404 - endOffset: 1462 -- name: 'Ethics vs Profitability: Balancing Fairness and Business Objectives' - startOffset: 1462 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1462 - endOffset: 1658 -- name: 'Cross‑Functional Governance: SMEs, Compliance, and Leadership Roles' - startOffset: 1658 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1658 - endOffset: 1949 -- name: 'Accuracy vs Interpretability: Managing Model Complexity Trade‑offs' - startOffset: 1949 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=1949 - endOffset: 2128 -- name: 'Human‑in‑the‑Loop: Limits of Automation and Responsible Oversight' - startOffset: 2128 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2128 - endOffset: 2251 -- name: 'Detecting Drift & Feedback Loops: Demographics, Overfitting, and KS Tests' - startOffset: 2251 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2251 - endOffset: 2559 -- name: 'Regulated Industry Perspectives: Finance, Pharma, and Risk Sensitivity' - startOffset: 2559 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2559 - endOffset: 2647 -- name: 'Hiring Tool Case Study: Historical Bias and Remediation Lessons' - startOffset: 2647 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=2647 - endOffset: 3017 -- name: 'AutoML Risks: Democratization, Oversight, and Responsible Usage' - startOffset: 3017 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3017 - endOffset: 3128 -- name: 'Community & Mentorship: DataBuzz Resources and Networking' - startOffset: 3128 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3128 - endOffset: 3230 -- name: 'Data Career Landscape: Analyst, MLOps, Consultant, and Strategist Roles' - startOffset: 3230 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3230 - endOffset: 3404 -- name: 'Ethics Training: Professional Responsibility for Data Practitioners' - startOffset: 3404 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3404 - endOffset: 3567 -- name: 'Closing Remarks: Follow‑up, Links, and Contact Information' - startOffset: 3567 - url: https://www.youtube.com/watch?v=8Eb5mG-pC3o&t=3567 - endOffset: 3536 --- Links: diff --git a/_podcast/s16e04-from-marketing-to-product-owner-in-search.md b/_podcast/s16e04-from-marketing-to-product-owner-in-search.md deleted file mode 100644 index 900c5c52..00000000 --- a/_podcast/s16e04-from-marketing-to-product-owner-in-search.md +++ /dev/null @@ -1,1076 +0,0 @@ ---- -episode: 4 -guests: -- lerakaimashnikova -ids: - anchor: atatalksclub/episodes/From-Marketing-to-Product-Owner-in-Search---Lera-Kaimashnkova-e2b33qt - youtube: -HbQQ_bVdfE -image: images/podcast/s16e04-from-marketing-to-product-owner-in-search.jpg -links: - anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/From-Marketing-to-Product-Owner-in-Search---Lera-Kaimashnkova-e2b33qt - apple: https://podcasts.apple.com/us/podcast/from-marketing-to-product-owner-in-search-lera-kaimashn%D1%96kova/id1541710331?i=1000633617858 - spotify: https://open.spotify.com/episode/540Mzul8eaulfqettzAHJH?si=OJWEa8NqSIaviV3zMyzL6Q - youtube: https://www.youtube.com/watch?v=-HbQQ_bVdfE -season: 16 -short: From Marketing to Product Owner in Search -title: 'From Marketing to Product Owner: Build E-commerce Search with Elasticsearch - & NLP' -transcript: -- header: Podcast Introduction & Guest Welcome -- line: This week, we'll talk about transitioning from marketing to being a product - owner in search. And we have a very special guest today Valeria. Valeria is a - product owner with a focus on e-commerce, site search optimization, analytics, - team management, and product development – and I think there are more things that - I omitted because your biography is quite long and extensive. Today, Valeria will - share her experience with us. Welcome! - sec: 79 - time: '1:19' - who: Alexey -- line: Thank you so much. Thank you for joining. - sec: 108 - time: '1:48' - who: Lera -- header: 'Background: Transition from performance marketing to product roles' -- line: Before we go into our main topic of transitioning from marketing to being - a product owner, let's start with your background. Can you tell us about your - career journey so far? - sec: 111 - time: '1:51' - who: Alexey -- line: What do you mean by career journey? - sec: 121 - time: '2:01' - who: Lera -- line: Your journey of how you ended up in the place where you are right now. What - did you do before? - sec: 126 - time: '2:06' - who: Alexey -- header: 'Marketing Experience: B2B e‑commerce, lead acquisition, branding' -- line: Oh, okay – fine. Well, as you learned before, I started with marketing. Actually, - I was always focused on IT. Working in Product was my vision of my career, but - to start being a product manager/product owner, you need to start from somewhere. - Basically, there was an opportunity to join Ringostat, which is a SaaS that provides - analytics and call-tracking for marketers. They had an open position of marketer - – it was like performance marketing, so I was in charge of doing promotions and - doing some events. My responsibility was to acquire leads – to generate leads - and to make them our customers. - sec: 134 - time: '2:14' - who: Lera -- line: For example, some very interesting stuff that I did – our customers were B2B - (other companies) and marketing the work for e-commerce as well. I did some interesting - events, for example, there was a rating of PPC agencies. That's how I grabbed - the attention of all our customers and provided something of value for them. For - this rating stuff, we also acquired leads. My first step in Performance Marketing - was acquiring leads and making promotions and all this advertising stuff. So after - Ringostat, I went to another company. It was a logistic holding. We have eight - companies inside this holding. Basically, it was about transportation, about oil - (we have gas stations) and they also have e-commerce that sells car parts. That - was a little bit of a journey for my career. - sec: 134 - time: '2:14' - who: Lera -- line: Basically, it was a small marketing department and we also did pretty much - everything. I was also in charge of Performance Marketing and we did some promotions - for each company. For example, we did a promotion for a logistics company, made - events, and so on. Also, it was like internal outsourcing marketing for all these - companies in the holding. The other great task was to make branding for gas stations. - I also loved this task because I was analyzing and making a “job to be done” framework, - creating great messaging in our branding, and we released this branding. The other - task was to launch a website that sells car parts. We were limited in our assortment - – we just sold tires. This e-commerce was also B2B because we sold to logistic - companies. It was stuff for trucks. We sold batteries for huge trucks, tires for - them, and oil, and all this stuff. I worked there for one year. It was based in - Odesa, Ukraine. - sec: 134 - time: '2:14' - who: Lera -- line: As you know, in 2020, the war in Ukraine started. Because I was in marketing, - it's become… I realized I needed to move to Europe, because of the situation there. - That's how I left my job at the previous company. I realized it was marketing - for the internal market in Ukraine, and so I would need to change my whole career - – to change everything I was doing – and to move to another country to focus more - on the international position. This was where the challenge began. Basically, - I didn’t really have experience being a product owner. I didn't have much experience - in e-commerce because my e-commerce experience was for B2B users. It was not about - promotions and stuff. I didn't have much of this experience. That's where the - journey was about breaking barriers in your head. - sec: 134 - time: '2:14' - who: Lera -- line: So, I was trying to find a new job because I lost mine. I did several interviews, - and while I was doing interviews, I actually started learning to code. I was learning, - for example, JavaScript, HTML, and CSS, while I was in this transition period - – I wanted to learn to code more. I really had time for this, too. It was mind-blowing - for me at first. It was hard. I really struggled to learn how to code. But basically, - I ended up in a position where I could code a website, for example. When I was - looking for a new job I used this time to learn new stuff – it was hard stuff - for me because it was completely different from what I did before. - sec: 134 - time: '2:14' - who: Lera -- line: How I joined AUTODOC is a great story. I just tried different channels of - how to find a new job and I wrote to my colleague, with whom we used to work at - Ringostat. I just wrote to him, “How are you doing? How are you doing with this - whole situation in Ukraine? Where are you now?” And he said, “I'm working at AUTODOC - right now.” And asked, “Yeah, actually I'm looking for jobs. Maybe you have some - open positions, guys?” And he said “Yeah, why not? I should ask people.” The recruiter - sent me a job description and it had nothing to actually do with my knowledge - – it was so different. - sec: 134 - time: '2:14' - who: Lera -- header: Landing Product Owner Role at AUTODOC Despite Non‑traditional Fit -- line: It was the product owner position, right? - sec: 566 - time: '9:26' - who: Alexey -- line: Yeah. Yeah, it was a product owner position. The requirements were something - like being a product owner in e-commerce for two years, knowing a lot about car - parts, and everything like that. [cross-talk] - sec: 568 - time: '9:28' - who: Lera -- line: But you knew something about car parts already from your previous logistics - experience. [Lera agrees] So it was still a match, to some extent, right? - sec: 580 - time: '9:40' - who: Alexey -- line: Yes. I use this experience to sell myself [chuckles] and to be more relevant. - We went to some interviews and I think they liked me. So that's how I joined AUTODOC. - From the beginning, at AUTODOC, I can share my challenges regarding how to transform - a new position from the lack of… [cross-talk] - sec: 587 - time: '9:47' - who: Lera -- line: Yeah, I definitely have questions about that. - sec: 620 - time: '10:20' - who: Alexey -- line: Yeah. Okay. - sec: 622 - time: '10:22' - who: Lera -- header: 'Relocation: Moving from Ukraine to Germany during 2020' -- line: I'm just curious. When the war started and you left Ukraine, did he go immediately - to Berlin? Or did you go to some other place? - sec: 623 - time: '10:23' - who: Alexey -- line: No, no. It was hard to find an apartment, actually. Maybe you know something - about this. We started from a small town in Germany, but then I realized it was - too small to live there, so we went to Breslau. And from Breslau, then I moved - to Berlin. But I moved to Berlin while already having been employed at AUTODOC. - sec: 631 - time: '10:31' - who: Lera -- line: Aha, so you have offices in both Breslau and Berlin. - sec: 658 - time: '10:58' - who: Alexey -- line: We don't have offices in Breslau, it's remote. - sec: 662 - time: '11:02' - who: Lera -- line: Okay. So you lived in Breslau, you contacted your ex-colleague from the previous - company, and he forwarded you a job description. You didn't really fit this job - description, but you still tried to interview and got this job, and then eventually - moved to Berlin. - sec: 666 - time: '11:06' - who: Alexey -- line: Yeah, that's true. - sec: 682 - time: '11:22' - who: Lera -- line: Okay. I think I interrupted you. You wanted to tell us about the challenges, - right? Your prior experience was in performance marketing. [Lera agrees] You were - talking about things like PPC campaigns, which is… What does it mean, actually? - PPC? - sec: 684 - time: '11:24' - who: Alexey -- line: Pay per click. - sec: 703 - time: '11:43' - who: Lera -- header: 'Transition Challenges: Learning product ownership on the job' -- line: Pay per click! Exactly. And now you're a product owner, right? Which is… I - don't know how related it is to Performance Marketing. Maybe there is some relation, - maybe not. But I'm curious, what were the challenges? How did you transition? - What did you need to do for this? - sec: 704 - time: '11:44' - who: Alexey -- line: Yeah, that's an interesting part. The first challenge was that I needed to - learn a lot. I spent… I worked a lot. I worked until late-late evening to become - an expert in this field. I learned from very different perspectives. For example, - first, I needed to learn what product ownership is. I took a bunch of courses. - For example, there was a special course for product ownership in Ukraine – I wanted - to take a deep dive into this role and what it actually means and… [cross-talk] - sec: 726 - time: '12:06' - who: Lera -- line: But you were already employed. [Lera agrees] So you already got hired and - you thought, “Okay, what is this product owner thing?” [chuckles] “Let me check.” - sec: 775 - time: '12:55' - who: Alexey -- line: '[chuckles] Yeah, that''s a funny part. But I really did a lot to learn. I - read so many books. For example, there is a book called Professional Product Owner. - It describes what the role of product owner is. Basically, the product owner is - a role in Scrum. But in our company, the product owner is more of a product manager. - It''s just a title, but we do pretty much everything that the product manager - does. Currently, we are in a tech transformation, and our titles would actually - be product managers. Really, the first thing I did was take a deep dive into what - the product owner role is and what this person does. I went to courses, and I - was reading books – I’ve done a lot of this. I have some knowledge on how… For - example, in my previous companies [of employment], people were doing the same - thing.' - sec: 784 - time: '13:04' - who: Lera -- line: At Ringostat, we had pretty much the same positions. It was the Visionary - Officer and the Project Manager. I basically grabbed some patterns from those - people and put those patterns into doing this role. Then I took a really deeper - dive into this role. The second challenge… I also wanted to say that, when you - transition from another position, one thing that would be super helpful is to - build connections with your colleagues and your coworkers because those people - will help you. They will teach you. My tip for this is to learn from everyone. - I learned a lot from Quality Assurance Engineers. They taught me about the product. - I also learned a lot from engineers – from them, I learned the logic of the product - and the search. I also learned from business analysts and I learned a lot from - data analysts. - sec: 784 - time: '13:04' - who: Lera -- line: I communicate with a cross-functional team and I spend really, really quality - time in one-on-one meetings. So build connections, learn from them, and deeply - understand the product. If I were to give a tip to myself back when I started, - it would be to communicate more – to do more one-on-ones and communicate with - different people, at different levels. They will all generate new insights – they - will teach you how to be more insightful and more productive. Learn internal stuff - in the company to understand the context. What else do I need to share? Basically, - about how I learned data – for example, data analysis. - sec: 784 - time: '13:04' - who: Lera -- header: 'One‑on‑Ones: Structuring meetings to align with engineers and QA' -- line: I'm curious about a few things. You mentioned you had two problems. The first - problem was that you needed to learn a lot. What you did to solve that was take - courses, and worked into late evenings – you tried to immerse yourself in the - job to learn as much as possible. Then the second thing was building connections. - You would speak with pretty much everyone in the company to learn from them – - how they work, what they work on, what kind of problems they have, and learn about - the company. I think this thing (this communication) these one-on-one meetings - are really important for everyone, regardless of the role, be it product manager, - product owner, data scientist, software engineer – anyone. So I'm just curious, - for these one-on-one meetings – let's say you want to speak with a QA engineer - or a business analyst or a data analyst. - sec: 996 - time: '16:36' - who: Alexey -- line: How do you structure this meeting? First, I guess you need to approach a person - saying, “Hey, Martin, (or Hey, Theresa). I want to have a one-on-one meeting with - you.” Right? So you agree to a meeting. But then, what happens next? How do you - structure this meeting? How do you get the most out of this meeting? Is it just - free-form or do you have some structure? - sec: 996 - time: '16:36' - who: Alexey -- line: Yeah, I want to share a specific case. When I joined, they already had development - going on – they had a specific feature to deliver. Basically, I realized that - we kind of had a miscommunication with one developer – we just didn't hear each - other. I understood that we could not find common ground – I said one thing, he - said another, and we were losing time. We did communications in JIRA and the communication - just wasn't aligned. So I decided to set up a one-on-one meeting to fix the situation - and find common ground. - sec: 1075 - time: '17:55' - who: Lera -- line: With the developer, right? - sec: 1132 - time: '18:52' - who: Alexey -- line: 'With the developer – right. The structure was like this: I will try to remember - the questions I asked him. For example, I asked him what his vision of the best - product owner was – what did he expect from me, being the product owner? The second - question was, “Who do you consider to be the best product owner in the company?” - He described what he expected from me and he described a great product owner who - was already in our company – whom I needed to learn from.' - sec: 1133 - time: '18:53' - who: Lera -- line: What I learned from him was that a product owner was supposed to be more confident - and pushier – to be a leader – and he said that was lacking for me. This was great - feedback. Developers need a person (a product owner) who will be a great leader. - Also, we were making some small talk – we talked about things outside of the job. - I actually learned that he loved tennis and I also play tennis, so we made some - small talk about tennis. - sec: 1133 - time: '18:53' - who: Lera -- line: You connected on this basis. - sec: 1223 - time: '20:23' - who: Alexey -- line: '[chuckles] Sorry?' - sec: 1226 - time: '20:26' - who: Lera -- line: You connected because you share the same hobby. So you made a connection. - sec: 1227 - time: '20:27' - who: Alexey -- line: Yeah. I asked him to give me some tips on how to improve my job and what I - do [for him]. He shared his opinion. I also asked how we could improve our work - process and what he thought our gaps were. I just wanted to collect his feedback. - This feedback was really important to me, naturally, and I really wanted him to - just share his concerns and everything. After everything, after this meeting, - this person said, “Yeah, okay. I understand you.” He wasn't angry anymore. - sec: 1230 - time: '20:30' - who: Lera -- line: He understood that I just came there and I was a newbie. He said, “Okay, you - will learn. Okay, Valerie – I'm fine with you. It's okay.” So in one hour, you - can build the connection and that's how you take it step-by-step and become a - better specialist – more mature, more confident. You learn more. Now, if you compare - me with the person I was in the beginning – it’s two different people. [chuckles] - sec: 1230 - time: '20:30' - who: Lera -- line: So you needed to find common ground with the developer and you couldn't find - it. Maybe this is more like a “step back” question because we did not really discuss - what product owners do. I was wondering, why was it important to have good communication - with this developer? What's the role of a product owner? - sec: 1313 - time: '21:53' - who: Alexey -- line: Yeah, we didn’t discuss this. [chuckles] - sec: 1337 - time: '22:17' - who: Lera -- line: You said that you took a lot of courses to figure out what your job actually - is. So, what is your job? - sec: 1339 - time: '22:19' - who: Alexey -- header: 'Product Owner Scope: Monitoring, roadmap, prioritization, team operations' -- line: '[chuckles] It''s a funny one. The funniest thing is how [different] product - owners are from what they’re considered to be in literature and how they are in - real life. I think it''s kind of different. I will share with you the reality - of e-commerce, actually. As I said before, I''m in charge of search, which is - when you type something and search in e-commerce. One part of that is monitoring - the metrics. Basically, I''m in charge of keeping an eye on the existing products. - We are e-commerce, so we already have our search working. The first thing I need - to do is monitor our day-to-day metrics – our conversion rate, the search popularity, - and all this stuff. This is one part of my job. If there are some critical bugs, - I need to fix them somehow – to find a developer and to avoid making the company - losing money.' - sec: 1347 - time: '22:27' - who: Lera -- line: I'm in charge of making sure that the functionality works fine. Sometimes - we do have some issues, for example, with updating products. I also need to control. - This is simply one part. The other part (the real product owner/product manager - part) is we need to have a clear vision of where we are going – where our product - is going. You need to have a detailed roadmap with those JIRA Epics and User Stories. - The product owner prioritizes… The product owner is a value maximizer – this person - needs to prioritize the most valuable features and say, “We’re doing this first, - and the next feature would be this one.” Basically, it’s roadmapping, planning, - defining the vision of the product and the strategy – that’s basically the product - owner’s role. - sec: 1347 - time: '22:27' - who: Lera -- line: The other thing I need to do is team management. I think I can say that I - sometimes perform the role of a Scrum Master or Project Manager, which is how - you just organize the work to deliver some features. Frankly speaking, we don’t - have unstoppable development – what I want to say is that we have some initiatives - that we deliver. For example, this year, there was some time when we didn't actively - work on search because the company had different initiatives, such as migrating - from a regular website to an adaptive website. I was taking on the responsibility - of transitioning the existing website to an adaptive website. I didn't participate - as a Scrum Master in this kind of initiative – I was doing a different role. - sec: 1347 - time: '22:27' - who: Lera -- line: Sometimes we have initiatives that are very dedicated to search, for example, - delivering filters, or a new auto-complete. For these kinds of parts, I take on - the role of Scrum Master to organize our development process. I also do this kind - of stuff. I mentioned roadmapping, but I didn't mention that I do research as - well. Basically, I monitor competitors, I read a lot about search – I was very - surprised that search is such a deep, deep topic. You need to know about machine - learning, natural [language] processing, you need to know about algorithms, about - Elasticsearch, and how to do the autocomplete, you need to know about product - structure and all the attributes of the searches. - sec: 1347 - time: '22:27' - who: Lera -- line: Also, you need to know the users, actually – what's the business context of - search, what problems are users trying to solve by searching for car parts, for - example? You need to know different query types. For example, users can search - in different ways – they can search by part numbers (OEM numbers) or they can - search by part name, or they just type something like, “I want brake pads Brembo - on my Audi a4,” and you need to know how to make your search understand all the - semantics. - sec: 1347 - time: '22:27' - who: Lera -- line: That’s a lot. [Lera agrees] You’ve been talking for 10 minutes and I'm taking - notes – the entire page is filled with what you do. [chuckles] - sec: 1664 - time: '27:44' - who: Alexey -- line: Yeah, I'm jumping a lot. I want to structure it a little bit. First, it's - to monitor how things are going right now, under the current project. Second, - you need to do research and understand users, understand competitors, and understand - patterns of users’ searches. The other part is team management – how to organize - the work. [cross-talk] - sec: 1672 - time: '27:52' - who: Lera -- line: You follow Scrum, right? All these estimating meetings, daily stand-ups, retrospectives - – you organize all that, right? - sec: 1710 - time: '28:30' - who: Alexey -- header: 'Process Choices: Scrum for deliveries vs Kanban for investigations' -- line: Yes, depending on the initiatives we have. If we, for example, have to deliver - filters – we need two developers and two QA engineers, and that's pretty much - it. For this kind of development process, for example, we did stand-ups, planning, - and retrospectives. But for the other initiative, we needed to investigate new - technologies, so you don't really need all these ceremonies. You just do Kanban, - where it's “needs to be done,” “in progress,” “done”. For different initiatives, - it’s different types of… - sec: 1727 - time: '28:47' - who: Lera -- line: From what I understood, you work with multiple teams, right? - sec: 1787 - time: '29:47' - who: Alexey -- header: 'Team Building: Forming a dedicated e‑commerce search team' -- line: Yeah, that's the interesting part. Because right now, we don't really have - a super-dedicated search team. But I'm looking forward to having one. Before we - just took developers for some initiatives. For example, for some kind of initiative, - you just grab some people – to deliver this feature, you need those people and - you acquire those people – you have this initiative and you deliver it. But we - are going to have dedicated teams – for example, dedicated to search – because - now, in our roadmap, we have natural language processing, we have machine learning, - and I think it requires a lot of context understanding. That's why I’m trying - to form a team around search right now. - sec: 1790 - time: '29:50' - who: Lera -- line: So, right now, in addition to all these things you mentioned, you're also - building a search team. - sec: 1855 - time: '30:55' - who: Alexey -- line: Yeah! [chuckles] - sec: 1860 - time: '31:00' - who: Lera -- line: Okay, I just want to summarize what you said. First, you monitor business - health (search health) if I can say that. [Lera agrees] The second thing is, you're - doing research, you're talking with users, you're watching what competitors are - doing so you know if you need any new features or things like that. Then you do - this team operational stuff, which is Scrum/Kanban – all these processes and rituals. - Then you mentioned a more strategic part, or defining a clear vision of where - you're going, and then from that vision, building a roadmap – that's another thing. - sec: 1862 - time: '31:02' - who: Alexey -- line: Then, I remember you talked about actually learning all these things – learning - about NLP, learning about machine learning. And now I think I understand why – - because you need to build a team and you want to know what kind of things they - need to know. Right? What kind of experience you need in the team, what kind of - knowledge you need in the team – you need to know that in order to build the team. - sec: 1862 - time: '31:02' - who: Alexey -- line: Yeah. For example, I need to know natural language processing is data science - stuff. Actually, it's not just me. You might have the impression that I do all - this stuff and it's like a T-shaped person. But we still have, for example, the - research team – at AUTODOC, there is the research department and they do in-depth - analytics and all this stuff. So I don't do it by myself. I don't do user interviews, - as you said – we have a special department for that. And also we have… [cross-talk] - sec: 1936 - time: '32:16' - who: Lera -- line: You still need to know that, “There was this interview, and this is the outcome - of this interview. These were the questions.” And perhaps you even watch the videos - of the interview to see how users actually use the app and then you see, “Okay, - something is wrong here. Maybe we should change the flow of the search.” Right? - sec: 1977 - time: '32:57' - who: Alexey -- line: Yeah, that’s true. My message was that I don't do it by myself, but we have - a special department for this. But I still need to read these studies and everything. - sec: 1994 - time: '33:14' - who: Lera -- line: It’s just that you don't code yourself, but you need to translate what the - researchers found into what actually needs to happen. You’re this glue that kind - of links these departments. Right? - sec: 2007 - time: '33:27' - who: Alexey -- line: Yeah. I say that being a product manager is like being a mini-CEO of the product. - Maybe you've heard this and it's true – you’re kind of responsible for everything - and if you have some kind of issues with the product, you need to react. That's - a tricky position, I know. But it's still very interesting. Some people like this - – more generalists and T-shaped people – I like to learn from different perspectives. - So I think this is fine for me, to not be a super-narrow specialist, but more - of a wide person. - sec: 2025 - time: '33:45' - who: Lera -- header: 'Search Expertise: Relevant Search book, Elasticsearch, and relevance as - business context' -- line: How do you keep up with all that? You mentioned that, in addition to all that, - you also need to learn about all this machine learning stuff. Actually, this is - how I found you. There was a post that you made about Relevant Search – the book. - Duke, the author, liked the post and it appeared in my feed. Duke was already - a guest multiple times at DataTalks.Club. I saw his reaction and then I read the - post and I thought, “Hmm… Interesting. I should invite Valeria.” So why did you… - sec: 2072 - time: '34:32' - who: Alexey -- line: The book is called Relevant Search and, as far as I remember, the book is - about – it's a very technical book. [Lera agrees] It's about using Elasticsearch. - It's Elasticsearch, right? It’s a search engine for building searches. So how - did you come across this book? Why did you decide to read it? - sec: 2072 - time: '34:32' - who: Alexey -- line: I think it's good to mention. It’s because I see that I need to understand - all this stuff to communicate with developers, for example. I was in some communities - of Elastic, and I saw that people were sharing this book. Actually, this book - is promoted to be a good one – like the Bible of Search or something. I found - that it's pretty useful to read about search. Regarding why I wrote this post - – because, in the introduction of this book, they just pointed out some of my - pain points. They just described the importance of search and how hard search - is. [chuckles] - sec: 2128 - time: '35:28' - who: Lera -- line: The main point is relevance, which is very connected to the business context. - You need to educate developers to understand the business context because you - cannot just build a relevant search from scratch – you need to tune the search - to be relevant. You also need to give the developers this context – what is relevant - for users, for example. I really like this thought, this expression. We need to - work on relevance. You cannot just grab some universal search and it will be super - fine. You need to tune it for your business. That's what I wanted to post on LinkedIn - – this thought that I really liked. The second part of the post was about cross-functional - collaboration. So it's not about requirement-driven development, it's more about - a product mindset – everyone should understand, as I said, the business context - and why it's important for users to get these kinds of results. - sec: 2128 - time: '35:28' - who: Lera -- line: I expect my developers to have a deep understanding of the business context. - I actually create those meetings, where I try to explain to developers why we're - doing this, what the problems of the users we’re solving, and what search actually - means. Even today, I will have this meeting and I want to show them, for example, - that the users can search by catalog – they can navigate by catalog, and it’s - a different way to search for car parts by text. I want them to understand this, - that it's not about just text, it's about users solving their problems. - sec: 2128 - time: '35:28' - who: Lera -- line: I imagine, if we’re talking about this domain of car parts, it's a pretty - complex one. Let's say you have a specific car model/make – Volkswagen, for example - (I'm not really into cars) – I don't know, some specific model. - sec: 2313 - time: '38:33' - who: Alexey -- line: It’s okay. That's fine. [chuckles] - sec: 2327 - time: '38:47' - who: Lera -- line: And then you need a specific part that would fit this exact car, right? You - not only need to know the name of this car part, but also the model and make of - the car for which you need the part. I can imagine that it becomes pretty complex - at some point, right? - sec: 2329 - time: '38:49' - who: Alexey -- header: 'User Journey: Vehicle selector, part fitment, and contextual search flows' -- line: Well, for this point, we have the user journey. The users need to… For example, - we have this cool feature, where you just insert your license plate number and - our vehicle selector will identify the vehicle by this license plate. For some - countries, we have this feature, and I think it's super user-friendly. You just - type 6 points. Now, when your car is identified on our website, you can just search - for stuff and you will have only the car parts suitable for your car. It works - like this. You first identify your car and then you can search for stuff and the - products that our website shows you fit your car. - sec: 2348 - time: '39:08' - who: Lera -- line: Well, you need to have this business context – you need to know this business - context in order to arrive at this solution, right? Without it, you would come - up with just a general search bar and you will be like, “Oh, I don't know. I need - a compressor for…” I don't know if there is such a thing. I know that there are - compressors in fridges. [chuckles] Whatever part name for whatever model and then, - “Okay, why is it not working? Why is it showing me this part for another Volkswagen?” - Right? - sec: 2411 - time: '40:11' - who: Alexey -- line: '[chuckles] Yeah. Right now, at AUTODOC, we still have some gaps. But there’s - a roadmap to fill those gaps. [chuckles] For example, you can buy a product without - identifying the car – you can’t do it right now. And it''s our task to solve – - to make it more user-friendly, to provide users the ability… We need to make a - website to make it clearer and understand users need to pick a car to find the - right one. It''s quite a common pattern. You can search, for example, “brake disc - for Audi” and that''s it, without specifying which Audi. We are now making machine - learning features with natural language processing that will identify the car - part maker or model in a search query and offer the user more information about - the car to make the whole journey easier. We have this feature in development.' - sec: 2441 - time: '40:41' - who: Lera -- line: This is quite a technical feature. It includes knowing what natural language - processing is, what parsing is, and extracting things from there. How technical - do you need to be? How much do you, as a product owner, need to know about that? - Is it more like a “nice to have” or is it actually a very important skill in your - case? - sec: 2539 - time: '42:19' - who: Alexey -- line: Yeah. I also went to… maybe you know this conference – it is about search, - called Haystack. - sec: 2559 - time: '42:39' - who: Lera -- line: Yeah. Is it also in Berlin? - sec: 2568 - time: '42:48' - who: Alexey -- header: 'Technical Upskilling: NLP, information retrieval, Haystack conference, - and ChatGPT' -- line: Yeah! It's in Berlin. It's a really technical conference. It’s for developers. - and I also go there to understand the technical aspects. You know what helps? - ChatGPT helps a lot, actually. What I do is, for example, I'm reading a technical - book and I don't really understand what information retrieval is. So I just go - to ChatGPT and say, “Hey, ChatGPT! What is information retrieval in the context - of car part search?” Ah, inverted index! Not information retrieval – inverted - index! “What does inverted index mean in the context of car part search? Explain - it to me like I’m 5.” [chuckles] I'm just kidding. - sec: 2571 - time: '42:51' - who: Lera -- line: That’s a very important part, right? “Explain like I’m five.” [chuckles] - sec: 2620 - time: '43:40' - who: Alexey -- line: Yeah, so it gives a simple explanation – ChatGPT gives me very simple examples - I can understand. That's a cool part. I think the more technical of a person you - are, the better. I'm trying to dig deep into technical stuff – I really try to - understand what natural language processing is from a technical standpoint. I - think the more you are in tech, the better – but still, the basics help you to - communicate with people and to give them context, for general understanding. - sec: 2623 - time: '43:43' - who: Lera -- line: For example, I need to know what a natural language processing task does. - I know it's spelling correction, I know it's named entity recognition – the natural - processing makes tokenization of the query, whether it's a brand name or a car - part name or its number – this task calls to natural language processing. So yeah, - I think it’s mandatory to learn the basics but the more you are in tech, the better. - sec: 2623 - time: '43:43' - who: Lera -- line: I guess it also depends on the field of your work. Since you work in e-commerce - search, for you, it makes sense to go and learn about search. If somebody works - in some other domain, maybe they would need to learn about some other things. - But still, for a product owner, it's important to know the technical parts of… - I'm trying to think of an example. - sec: 2709 - time: '45:09' - who: Alexey -- line: Let's say that it’s a product owner in the moderation team and the moderation - team uses machine learning to identify things that shouldn't be posted on the - website. For them, it's probably important to know how machine learning can be - used for this and what machine learning actually is. This is similar to your case. - [Lera agrees] You mentioned you found this book when you were in a search community. - [Lera agrees] So you were already a part of technical search communities, and - you came across this book. - sec: 2709 - time: '45:09' - who: Alexey -- line: Yeah, in Telegram, they have an Elasticsearch group, and I am part of this - group. Actually, we are always looking forward to developers and that's why. There - are people sharing and this is how I learned. On GitHub, I also saw a bunch of - books about Elasticsearch, and this book was the first on the list. So that's - how I realized that I really need to read this one because there’s so much social - proof on it. [chuckles] - sec: 2767 - time: '46:07' - who: Lera -- header: 'Recruiting: Remote roles, office hubs, and open developer positions' -- line: Yeah. You said you're looking for developers right now? - sec: 2802 - time: '46:42' - who: Alexey -- line: Yeah, we are. [chuckles] - sec: 2807 - time: '46:47' - who: Lera -- line: So what's the profile that you're looking for? You mentioned that it's a fully - remote position, right? I heard in our community, in DataTalks.Club, many people - say, “Hey, I really want to have a fully remote job, but it's always US-based - (remote but in the US).” So what kind of remote…? Or is it not remote? - sec: 2809 - time: '46:49' - who: Alexey -- line: We have offices across Europe, across Ukraine, and even in other countries. - If you live in the city, you can go to the office – it's fine. We do have an office - in Berlin. Our tech hub is actually in Lisbon. This previous week, I was there - at a product event. You can work remotely, but you can also go to the office if - you live in the city [where there is one]. - sec: 2832 - time: '47:12' - who: Lera -- line: You lived in Breslau and you worked and you worked remotely, right? So that’s - also possible. - sec: 2863 - time: '47:43' - who: Alexey -- line: Yeah. What I did was… I actually invest, I think, in traveling around Europe - and seeing people offline and connecting with them. Because I think it's also - crucial to make these offline connections. I've pretty much been in some offices - at AUTODOC already. - sec: 2868 - time: '47:48' - who: Lera -- line: Well, please share the links to the job descriptions. Maybe there is somebody - who is an experienced search engineer, who's listening to us right now and would - love to join your team. - sec: 2895 - time: '48:15' - who: Alexey -- line: Yeah, yeah. Why not? - sec: 2906 - time: '48:26' - who: Lera -- header: 'Hiring Criteria: Why marketing backgrounds are valued for product roles' -- line: I see that we have some questions. The question is – maybe we already answered - that – “Did you have the Scrum Master experience and project management experience - before this job? Or did you pick it up on the job and learn as you went?” - sec: 2909 - time: '48:29' - who: Alexey -- line: Yeah, I learned as I went. But, as I mentioned before, I had a great pattern. - Even at the start, we had a great project manager – she was also a Scrum Master. - Before coming [to AUTODOC], I already knew all about Scrum ceremonies. I saw how - people did stand-ups, how people did retrospectives, how they did planning, and - I've seen it and I have great patterns of how it can be done. What I did was just - read the Scrum guide and I also went to Scrum training. - sec: 2926 - time: '48:46' - who: Lera -- line: I knew just the theoretical part of this and I tried to learn it on the fly. - You just need to be a great talker, I think. If you know the basics, you just - need to be a great public speaker for this role, because you always need to keep - your guys motivated and involved in the project. And you do it by being this energetic - person – being this serving leader. So you learn the basics and then you… Basically, - to sum it up, I have previous experience of how people did it and I saw how it - worked. Then I read books about Scrum – not just the Scrum guide. I went to Scrum - training. And I just started doing it. - sec: 2926 - time: '48:46' - who: Lera -- line: For me, the most interesting part is that they decided to hire you even though - you had no experience in these areas, which probably means that maybe being an - experienced Scrum Master is not important – and you already proved that – you - can pick the skills up on the job. But do you know what they actually looked at - when deciding to hire you? What kind of skills were they interested in when making - this decision? - sec: 3049 - time: '50:49' - who: Alexey -- line: Historically, at AUTODOC, people that are now in a product ownership position - came from marketing. My boss also came from marketing, and her boss came from - marketing. So that's why – those people always had a marketing background. It's - okay to transition from marketing to product ownership. Probably, if we had product - owners come from developers, they would never have actually hired me because they - would be biased about me. I think that played a big role. Because historically, - product owners at AUTODOC came from marketing. - sec: 3078 - time: '51:18' - who: Lera -- line: Which skills, that you already had from your marketing past, helped you in - your current role? - sec: 3129 - time: '52:09' - who: Alexey -- header: 'Marketing Strengths: User understanding, internal PR, and pitching roadmaps' -- line: The first one is understanding… I think what’s common in marketing and product - management is understanding the user. For example, doing “job to be done” frameworks, - understanding customer journeys, understanding customer likes, pains, cases, and - customer needs – all this customer development stuff is present in both marketing - and in product. Basically, I already had this kind of knowledge and it's cool. - The second one I want to share is very interesting – when you work in a super - huge company like AUTODOC (we have 3000 people) it's not the same as working at - a small startup. - sec: 3136 - time: '52:16' - who: Lera -- line: What the difference is – people don't know you, and people don't know about - your product either. I think the crucial role of a product manager in a big company - is to be a PR manager of your product – to be a marketer of your product, an internal - marketer. Make people learn about your product, and about the benefits of your - product. There is a certain department in a company that’s responsible for the - search. In a small company, everybody knows and it's obvious, but in a super-huge - company, it's not obvious. You just need to shine. That's why I post on LinkedIn, - that's why I traveled to communicate with other people. - sec: 3136 - time: '52:16' - who: Lera -- line: Because when it comes to working in a huge company, you need to scale your - brand awareness and the awareness of your project. That's where marketing helped - me, because I used my social media to talk about search, and used these techniques - to educate people about my product. We are all working remotely, so information… - We are a little bit isolated. For me, LinkedIn posts are like a virtual version - of communication in our office. - sec: 3136 - time: '52:16' - who: Lera -- line: That's why you post selfies, right? - sec: 3301 - time: '55:01' - who: Alexey -- line: Yeah, it's kind of my trick. I understand the importance. Across a big company, - you need to talk more about your product publicly. That generates… For example, - people from assortment come to me to solve certain problems. It generates new - connections and new ideas. That's why I think it's crucial. To grow in a big company, - you need to be a little bit of a PR manager of yourself, your product, and your - team as well. - sec: 3304 - time: '55:04' - who: Lera -- line: That's how your marketing skills helped, right? [Lera agrees] Because you - already knew how to market something, “Okay! Let's think about what I can do to - be more noticeable within the company.” Right? [Lera agrees] “Let’s run a PPC - campaign.” [laughs] - sec: 3353 - time: '55:53' - who: Alexey -- line: '[laughs] Just kidding. The other thing I wanted to add is that you also need - to sell your ideas. Because when the company is huge and there are so many initiatives, - the top management needs to pick some initiatives that will generate more revenue. - That’s how prioritization works. Sometimes you don''t really know how this feature - will deliver money in the future – you’re selling the future. For project management, - you need to be a great salesperson. You need to encourage and to believe in this - perfect future – to generate revenue and stuff.' - sec: 3369 - time: '56:09' - who: Lera -- line: Because you’re selling the future, sometimes it’s obvious what the outcome - of this feature will be. Making presentations helps. I took this from marketing. - Understanding what information should be shown, how you can encourage people to - listen to you – to listen to your new idea. Basically, make a pitch. It's all - about marketing – how you make a pitch. For me, it's the same as when I was working - in B2B marketing – it's how we pitched our product to the B2B users. We just have - this presentation describing what the benefits of this product are. That’s the - same thing I do in this company. I explain what the benefits from my product are. - sec: 3369 - time: '56:09' - who: Lera -- header: 'Recommended Resources: Communication, Professional Product Owner, strategy, - experimentation' -- line: Yeah, thanks. That's a very comprehensive answer. I see that we are almost - running out of time. I still wanted to ask you one thing. You told us that you - needed to learn a lot – you took a lot of courses and you read a lot of books. - We already talked about one book – Relevant Search. Are there any other resources - that you would recommend to listeners who want to learn more about product ownership - and this topic? - sec: 3486 - time: '58:06' - who: Alexey -- line: Actually, it won’t be a typical answer. But I think the crucial take away - from everything I talked about in this interview – the most crucial skill for - the product owner is communication. How you build connections and how you talk, - how good of a manager you are, and how you can encourage people. Basically, I - would recommend you read books about communication, if you haven't before. Books - like Dale Carnegie, or about negotiation – some books about… I don't really know - what it is in English, I read it in another language. It's like, “You Can Negotiate - Everything,” or something like that. Robert Cialdini – about how to influence. - All these books are about communication. I also read a book about communication - called Aikido. A funny one. - sec: 3516 - time: '58:36' - who: Lera -- line: So, I think you really need to master your communication skills. That’s the - first one. There are books related to product ownership – there is a book called - Professional Product Owner. It's recommended by Scrum. You can go to Scrum.org - and you will see this book. I read it and it's pretty valuable. It gives you a - picture of what the product owner is. Also, there's a special book for Scrum – - to understand the basics of Scrum – Philosophy of Scrum, I think. I think it's - also good to read this one. What else? - sec: 3516 - time: '58:36' - who: Lera -- line: Some books related to your specific topic. For example, I read about search. - I also remember one book about strategy called Strategize. It’s also very connected - to product ownership. User Story Mapping also goes to the product ownership basket. - Also some books related to, for example, e-commerce. It is like a book about experimentation - and A/B testing. It’s also a very interesting one. I haven't read a book about - car parts, actually. [laughs] - sec: 3516 - time: '58:36' - who: Lera -- line: Not yet, maybe. [chuckles] - sec: 3694 - time: '1:01:34' - who: Alexey -- line: Not yet. [chuckles] We actually have specialists that are more knowledgeable - in cars – you can grab them into a meeting and they will explain stuff to you. - I think you can divide some skills you will develop and grab some books for the - skills, like communication, product management by itself, some books about your - features (like search) and about e-commerce, and about your business domain. When - you’re working, it's also fine to have some knowledge. - sec: 3695 - time: '1:01:35' - who: Lera -- line: One other tip that you shared with us was learning from everyone – scheduling - meetings with engineers, QA engineers, business analysts – and just asking them… - What did you ask? “What do you expect from the product owner? What’s the best - product owner in the company?” Then, also make some small talk to build a connection - and, I guess, learn about what they do. I really liked that part. I took a note - on it. - sec: 3734 - time: '1:02:14' - who: Alexey -- line: '[chuckles] Really cool.' - sec: 3762 - time: '1:02:42' - who: Lera -- line: Yeah. I mean, I take a lot of notes. Four sheets here. Anyway, We should be - wrapping up. Is there anything you want to mention before we finish? - sec: 3764 - time: '1:02:44' - who: Alexey -- header: 'Closing Advice: Breaking mental barriers and committing to continuous learning' -- line: Yeah. I just want to give some general advice. For me, this is a story about - how the barriers in your head can be broken. Yeah, I didn't have the same experience - that I did before but it's possible to learn. It's possible. If you want to change - your career, if you want to be in a different position, it's possible. You just - need to break down the barriers in your head. It will be difficult for the first - half a year. Yes. But if you have this dream, you can do it. It's possible. - sec: 3775 - time: '1:02:55' - who: Lera -- line: So the main barrier is in your head. Right? [Lera agrees] Yeah. Okay! Thanks - a lot! I really enjoyed this interview. It was great. Thanks for coming. Thanks - for joining us. Thanks for sharing your experience with us. And thanks, everyone, - for joining and asking questions. It was amazing. Thank you. Have a great week, - everyone. Bye. - sec: 3818 - time: '1:03:38' - who: Alexey -description: Discover e-commerce search with Elasticsearch & NLP, plus product owner - tactics, roadmap prioritization, team-building and hiring tips to boost conversions. -intro: 'How do you move from performance marketing into a product role and build scalable - e-commerce search with Elasticsearch and NLP? In this episode, Lera Kaimashnіkova - — an e-commerce Product Owner focused on site search optimization, analytics, and - conversion — walks through that exact journey. She explains transitioning from B2B - marketing to product ownership, landing a PO role at AUTODOC, and relocating to - Germany while learning the craft on the job.

You''ll hear practical approaches - to structuring one‑on‑ones, owning monitoring, roadmaps, and prioritization, and - choosing Scrum for deliveries vs. Kanban for investigations. Lera covers forming - a dedicated e‑commerce search team and the technical side of relevance engineering: - Elasticsearch, autocomplete, search filters, vehicle selector and part fitment flows, - plus NLP and information retrieval learnings from Haystack and ChatGPT experimentation. - She also discusses recruiting remote developers, why marketing backgrounds are valuable - for product roles, and resources for communication, strategy, and experimentation. -

If you’re responsible for site search, product discovery, or making the - leap into product ownership, this episode delivers concrete tactics for improving - relevance, boosting conversion rates, and growing your technical and team capabilities.' -dateadded: '2023-11-05' -duration: PT01H02M19S -quotableClips: -- name: Podcast Introduction & Guest Welcome - startOffset: 0 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=0 - endOffset: 111 -- name: 'Background: Transition from performance marketing to product roles' - startOffset: 111 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=111 - endOffset: 134 -- name: 'Marketing Experience: B2B e‑commerce, lead acquisition, branding' - startOffset: 134 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=134 - endOffset: 566 -- name: Landing Product Owner Role at AUTODOC Despite Non‑traditional Fit - startOffset: 566 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=566 - endOffset: 623 -- name: 'Relocation: Moving from Ukraine to Germany during 2020' - startOffset: 623 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=623 - endOffset: 704 -- name: 'Transition Challenges: Learning product ownership on the job' - startOffset: 704 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=704 - endOffset: 996 -- name: 'One‑on‑Ones: Structuring meetings to align with engineers and QA' - startOffset: 996 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=996 - endOffset: 1347 -- name: 'Product Owner Scope: Monitoring, roadmap, prioritization, team operations' - startOffset: 1347 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1347 - endOffset: 1727 -- name: 'Process Choices: Scrum for deliveries vs Kanban for investigations' - startOffset: 1727 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1727 - endOffset: 1790 -- name: 'Team Building: Forming a dedicated e‑commerce search team' - startOffset: 1790 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=1790 - endOffset: 2072 -- name: 'Search Expertise: Relevant Search book, Elasticsearch, and relevance as business - context' - startOffset: 2072 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2072 - endOffset: 2348 -- name: 'User Journey: Vehicle selector, part fitment, and contextual search flows' - startOffset: 2348 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2348 - endOffset: 2571 -- name: 'Technical Upskilling: NLP, information retrieval, Haystack conference, and - ChatGPT' - startOffset: 2571 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2571 - endOffset: 2802 -- name: 'Recruiting: Remote roles, office hubs, and open developer positions' - startOffset: 2802 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2802 - endOffset: 2909 -- name: 'Hiring Criteria: Why marketing backgrounds are valued for product roles' - startOffset: 2909 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=2909 - endOffset: 3136 -- name: 'Marketing Strengths: User understanding, internal PR, and pitching roadmaps' - startOffset: 3136 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3136 - endOffset: 3486 -- name: 'Recommended Resources: Communication, Professional Product Owner, strategy, - experimentation' - startOffset: 3486 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3486 - endOffset: 3775 -- name: 'Closing Advice: Breaking mental barriers and committing to continuous learning' - startOffset: 3775 - url: https://www.youtube.com/watch?v=-HbQQ_bVdfE&t=3775 - endOffset: 3739 ---- - -Links: - -* [Post](https://www.linkedin.com/posts/leracaiman_elasticsearch-ecommerce-activity-7106615081588674560-5WQO){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s10e05-growing-data-engineering-team-in-scale-up.md b/_podcast/scaling-data-engineering-teams-self-service-platforms.md similarity index 95% rename from _podcast/s10e05-growing-data-engineering-team-in-scale-up.md rename to _podcast/scaling-data-engineering-teams-self-service-platforms.md index cf338f28..9ab71fce 100644 --- a/_podcast/s10e05-growing-data-engineering-team-in-scale-up.md +++ b/_podcast/scaling-data-engineering-teams-self-service-platforms.md @@ -1,20 +1,130 @@ --- +title: "Scale Data Engineering Teams: Build Self-Service Data Platforms, Hire Senior Engineers & Use Kafka" +short: "Growing Data Engineering Team in a Scale-Up" +season: 10 episode: 5 guests: - mehdiouazza +image: images/podcast/scaling-data-engineering-teams-self-service-platforms.jpg ids: anchor: Growing-Data-Engineering-Team-in-a-Scale-Up---Mehdi-OUAZZA-e1mq8et youtube: acJ6sVqKOUk -image: images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Growing-Data-Engineering-Team-in-a-Scale-Up---Mehdi-OUAZZA-e1mq8et apple: https://podcasts.apple.com/us/podcast/growing-data-engineering-team-in-a-scale-up-mehdi-ouazza/id1541710331?i=1000577461365 spotify: https://open.spotify.com/episode/5DkuaYQpbJ13sU9bknFZnk?si=RtQnTHHYQb-ytMEw8J3e8g youtube: https://www.youtube.com/watch?v=acJ6sVqKOUk -season: 10 -short: Growing Data Engineering Team in a Scale-Up -title: 'Scale Data Engineering Teams: Build Self‑Service Data Platforms, Hire Senior - Engineers & Use Kafka' + +description: "Master scaling data engineering teams: build self-service data platforms, hire senior engineers, deploy Kafka best practices to boost velocity, onboarding." +topics: +- data engineering +- data governance +- tools +- open-source +- career development +intro: "How do you scale data engineering teams during hypergrowth without sacrificing quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, orchestration, infrastructure and analytics — walks through practical approaches to scale data engineering teams, build self-service data platforms, hire senior engineers and adopt Kafka-based event streaming.

We cover what “scale-up” looks like in practice (rapid hiring, product launches, US expansion), the data platform’s role in enabling self-service onboarding and scalability, and a platform anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi also digs into event streaming: Kafka, schema registries and data contracts, plus hiring-for-scale tactics — prioritizing senior experts and niche tech experience — and assessment strategies like reverse interviews. You’ll hear about balancing platform engineering and use-case pipelines, cultivating culture shifts, creating junior learning paths, and growing toward senior roles through proactivity and cross-team impact.

Listen for concrete guidance on building a self-service data platform, practical Kafka practices, and hiring strategies that help teams move fast while staying reliable." +dateadded: 2022-08-29 + +duration: PT01H01M25S + +quotableClips: +- name: 'Episode Introduction: Growing Data Engineering Team & Guest Mehdi' + startOffset: 117 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=117 + endOffset: 162 +- name: 'Guest background: BI, on-prem Big Data to staff data engineer (career highlights)' + startOffset: 162 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=162 + endOffset: 341 +- name: 'Defining scale-up: hypergrowth, funding, hiring surge, speed vs quality' + startOffset: 341 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=341 + endOffset: 621 +- name: 'Hypergrowth challenges: product launches, US expansion, operational strain' + startOffset: 621 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=621 + endOffset: 750 +- name: 'Data platform role: enabling self-service, onboarding, and scalability' + startOffset: 750 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=750 + endOffset: 1042 +- name: 'Data platform anatomy: Airflow, conventions, playbooks, and best practices' + startOffset: 1042 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1042 + endOffset: 1213 +- name: 'Hiring for scale: prioritize senior experts and niche technology experience' + startOffset: 1213 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1213 + endOffset: 1406 +- name: 'Event streaming practices: Kafka, schemas, schema registry, and data contracts' + startOffset: 1406 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1406 + endOffset: 1625 +- name: 'Velocity vs growth: managing fast pace while ensuring personal growth' + startOffset: 1625 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1625 + endOffset: 1867 +- name: 'Culture shift: evolving processes and influencing company norms' + startOffset: 1867 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1867 + endOffset: 2105 +- name: 'Career trade-offs: scale-up vs enterprise vs FAANG' + startOffset: 2105 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2105 + endOffset: 2292 +- name: 'Assessment tactics: reverse interviews to evaluate team workload and culture' + startOffset: 2292 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2292 + endOffset: 2342 +- name: 'Junior opportunities: rapid learning, promotions, and exposure in scale-ups' + startOffset: 2342 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2342 + endOffset: 2451 +- name: 'Talent sourcing: employer brand, community contributions, and open source' + startOffset: 2451 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2451 + endOffset: 2804 +- name: 'Technical content: writing, OSS contributions, and getting external feedback' + startOffset: 2804 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2804 + endOffset: 2946 +- name: 'Community engagement: reader outreach, calls, and mentorship benefits' + startOffset: 2946 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2946 + endOffset: 3017 +- name: 'Role evolution: generalist to specialist as teams and projects mature' + startOffset: 3017 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3017 + endOffset: 3175 +- name: 'Work balance: platform engineering vs use-case pipelines (~50/50)' + startOffset: 3175 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3175 + endOffset: 3271 +- name: 'Path to senior: proactivity, broader impact, and cross-team collaboration' + startOffset: 3271 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3271 + endOffset: 3394 +- name: 'Casual segment: light banter about music, caps, and hobbies' + startOffset: 3394 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3394 + endOffset: 3468 +- name: 'Creator spotlight: MehdiO DataTV, DataCreators.club, and content channels' + startOffset: 3468 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3468 + endOffset: 3612 +- name: 'Content production: time investment, process improvements, and persistence' + startOffset: 3612 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3612 + endOffset: 3713 +- name: 'Video editing tips: multi-take filming, lighting consistency, and tricks' + startOffset: 3713 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3713 + endOffset: 3776 +- name: 'Episode close: key takeaways, resources, and links' + startOffset: 3776 + url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3776 + endOffset: 3685 + transcript: - header: 'Episode Introduction: Growing Data Engineering Team & Guest Mehdi' - line: This week, we will talk about growing a data engineering team in a scale-up. @@ -36,7 +146,7 @@ transcript: sec: 155 time: '2:35' who: Alexey -- header: 'Guest background: BI, on‑prem Big Data to staff data engineer (career highlights)' +- header: 'Guest background: BI, on-prem Big Data to staff data engineer (career highlights)' - line: Yeah, sure. I started, as you mentioned, about eight years ago in the data world, doing classic BI with Microsoft tooling, and mostly click, and drag-and-drop tooling. And then I had quite quickly the opportunity to jump early on a Big Data @@ -85,7 +195,7 @@ transcript: sec: 325 time: '5:25' who: Mehdi -- header: 'Defining scale‑up: hypergrowth, funding, hiring surge, speed vs quality' +- header: 'Defining scale-up: hypergrowth, funding, hiring surge, speed vs quality' - line: '[laughs] So what does it mean to be a scale-up? I know what a startup is – a startup is a company that just started up. I also know more or less what an enterprise is – it’s a huge company with a lot of people working there. So what @@ -203,7 +313,7 @@ transcript: sec: 709 time: '11:49' who: Mehdi -- header: 'Data platform role: enabling self‑service, onboarding, and scalability' +- header: 'Data platform role: enabling self-service, onboarding, and scalability' - line: And what do data engineers do in a scale-up environment? How is this world different from your typical enterprise? sec: 750 @@ -570,7 +680,7 @@ transcript: sec: 2001 time: '33:21' who: Mehdi -- header: 'Career trade‑offs: scale‑up vs enterprise vs FAANG' +- header: 'Career trade-offs: scale-up vs enterprise vs FAANG' - line: So what do you recommend to somebody that’s a senior and that person is considering multiple offers – and one of these offers is for a scale-up company. Let's say another offer is for an enterprise. Would you suggest that the person goes with @@ -633,7 +743,7 @@ transcript: sec: 2295 time: '38:15' who: Mehdi -- header: 'Junior opportunities: rapid learning, promotions, and exposure in scale‑ups' +- header: 'Junior opportunities: rapid learning, promotions, and exposure in scale-ups' - line: Would your answer be different for a junior specialist? Somebody who is just entering the field of data engineering and maybe has less than one year of experience? For them it may be just too boring to work nine to five? @@ -840,7 +950,7 @@ transcript: sec: 3039 time: '50:39' who: Mehdi -- header: 'Work balance: platform engineering vs use‑case pipelines (~50/50)' +- header: 'Work balance: platform engineering vs use-case pipelines (~50/50)' - line: At the beginning, you also mentioned that there are different kinds of data engineers. One type of data engineers are those that work on platforms, and the other kind works more on use cases. The question that we have here is, “What is @@ -867,7 +977,7 @@ transcript: sec: 3269 time: '54:29' who: Mehdi -- header: 'Path to senior: proactivity, broader impact, and cross‑team collaboration' +- header: 'Path to senior: proactivity, broader impact, and cross-team collaboration' - line: What would you say is the most important attribute for a data engineer who wants to get promoted from a mid-level role to a senior role? sec: 3271 @@ -1048,7 +1158,7 @@ transcript: sec: 3710 time: '1:01:50' who: Mehdi -- header: 'Video editing tips: multi‑take filming, lighting consistency, and tricks' +- header: 'Video editing tips: multi-take filming, lighting consistency, and tricks' - line: How did you do this? You filmed three different things and then you kind of stitched it through a program? sec: 3713 @@ -1084,124 +1194,6 @@ transcript: sec: 3802 time: '1:03:22' who: Mehdi -description: 'Master scaling data engineering teams: build self-service data platforms, - hire senior engineers, deploy Kafka best practices to boost velocity, onboarding.' -intro: 'How do you scale data engineering teams during hypergrowth without sacrificing - quality or developer velocity? In this episode, Mehdi OUAZZA — a data engineer and - entrepreneur with 7+ years working on streaming and batch pipelines, data modeling, - orchestration, infrastructure and analytics — walks through practical approaches - to scale data engineering teams, build self‑service data platforms, hire senior - engineers and adopt Kafka-based event streaming.

We cover what “scale‑up” - looks like in practice (rapid hiring, product launches, US expansion), the data - platform’s role in enabling self‑service onboarding and scalability, and a platform - anatomy that includes Airflow, conventions, playbooks and best practices. Mehdi - also digs into event streaming: Kafka, schema registries and data contracts, plus - hiring-for-scale tactics — prioritizing senior experts and niche tech experience - — and assessment strategies like reverse interviews. You’ll hear about balancing - platform engineering and use‑case pipelines, cultivating culture shifts, creating - junior learning paths, and growing toward senior roles through proactivity and cross‑team - impact.

Listen for concrete guidance on building a self‑service data platform, - practical Kafka practices, and hiring strategies that help teams move fast while - staying reliable.' -dateadded: '2022-08-29' -duration: PT01H01M25S -quotableClips: -- name: 'Episode Introduction: Growing Data Engineering Team & Guest Mehdi' - startOffset: 117 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=117 - endOffset: 162 -- name: 'Guest background: BI, on‑prem Big Data to staff data engineer (career highlights)' - startOffset: 162 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=162 - endOffset: 341 -- name: 'Defining scale‑up: hypergrowth, funding, hiring surge, speed vs quality' - startOffset: 341 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=341 - endOffset: 621 -- name: 'Hypergrowth challenges: product launches, US expansion, operational strain' - startOffset: 621 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=621 - endOffset: 750 -- name: 'Data platform role: enabling self‑service, onboarding, and scalability' - startOffset: 750 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=750 - endOffset: 1042 -- name: 'Data platform anatomy: Airflow, conventions, playbooks, and best practices' - startOffset: 1042 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1042 - endOffset: 1213 -- name: 'Hiring for scale: prioritize senior experts and niche technology experience' - startOffset: 1213 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1213 - endOffset: 1406 -- name: 'Event streaming practices: Kafka, schemas, schema registry, and data contracts' - startOffset: 1406 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1406 - endOffset: 1625 -- name: 'Velocity vs growth: managing fast pace while ensuring personal growth' - startOffset: 1625 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1625 - endOffset: 1867 -- name: 'Culture shift: evolving processes and influencing company norms' - startOffset: 1867 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=1867 - endOffset: 2105 -- name: 'Career trade‑offs: scale‑up vs enterprise vs FAANG' - startOffset: 2105 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2105 - endOffset: 2292 -- name: 'Assessment tactics: reverse interviews to evaluate team workload and culture' - startOffset: 2292 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2292 - endOffset: 2342 -- name: 'Junior opportunities: rapid learning, promotions, and exposure in scale‑ups' - startOffset: 2342 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2342 - endOffset: 2451 -- name: 'Talent sourcing: employer brand, community contributions, and open source' - startOffset: 2451 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2451 - endOffset: 2804 -- name: 'Technical content: writing, OSS contributions, and getting external feedback' - startOffset: 2804 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2804 - endOffset: 2946 -- name: 'Community engagement: reader outreach, calls, and mentorship benefits' - startOffset: 2946 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=2946 - endOffset: 3017 -- name: 'Role evolution: generalist to specialist as teams and projects mature' - startOffset: 3017 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3017 - endOffset: 3175 -- name: 'Work balance: platform engineering vs use‑case pipelines (~50/50)' - startOffset: 3175 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3175 - endOffset: 3271 -- name: 'Path to senior: proactivity, broader impact, and cross‑team collaboration' - startOffset: 3271 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3271 - endOffset: 3394 -- name: 'Casual segment: light banter about music, caps, and hobbies' - startOffset: 3394 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3394 - endOffset: 3468 -- name: 'Creator spotlight: MehdiO DataTV, DataCreators.club, and content channels' - startOffset: 3468 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3468 - endOffset: 3612 -- name: 'Content production: time investment, process improvements, and persistence' - startOffset: 3612 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3612 - endOffset: 3713 -- name: 'Video editing tips: multi‑take filming, lighting consistency, and tricks' - startOffset: 3713 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3713 - endOffset: 3776 -- name: 'Episode close: key takeaways, resources, and links' - startOffset: 3776 - url: https://www.youtube.com/watch?v=acJ6sVqKOUk&t=3776 - endOffset: 3685 --- Links: diff --git a/_podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.md b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md similarity index 97% rename from _podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.md rename to _podcast/scaling-enterprise-ai-mlops-data-first-strategy.md index e2099c61..8c1bab0b 100644 --- a/_podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.md +++ b/_podcast/scaling-enterprise-ai-mlops-data-first-strategy.md @@ -1,20 +1,121 @@ --- +title: "Scale Enterprise AI: Data-First Strategies, MLOps Best Practices & Realistic Experiments" +short: "Lessons Learned About Data & AI at Enterprises" +season: 10 episode: 4 guests: - alexanderhendorf +image: images/podcast/scaling-enterprise-ai-mlops-data-first-strategy.jpg ids: anchor: Lessons-Learned-About-Data--AI-at-Enterprises---Alexander-Hendorf-e1milm0/a-a8d08ua youtube: Vms29u9xC3k -image: images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Lessons-Learned-About-Data--AI-at-Enterprises---Alexander-Hendorf-e1milm0/a-a8d08ua apple: https://podcasts.apple.com/us/podcast/lessons-learned-about-data-ai-at-enterprises-alexander/id1541710331?i=1000576622709 spotify: https://open.spotify.com/episode/5t3SwzH17mFjxEoDUx9i5c?si=gaTfOoFnQ7muVkBiYuMxuA youtube: https://www.youtube.com/watch?v=Vms29u9xC3k -season: 10 -short: Lessons Learned About Data & AI at Enterprises -title: 'Scale Enterprise AI: Data-First Strategies, MLOps Best Practices & Realistic - Experiments' + +description: "Discover data-first Enterprise AI strategies and MLOps best practices—learn realistic experiments, CI/CD, governance, and align ML to business impact" +topics: +- MLOps +- AI +- machine learning +- data engineering +- open-source +intro: "How do you move from proof-of-concept to scaled enterprise AI without over-investing in hype? In this episode, Alexander Hendorf — head of data and AI at KÖNIGSWEG, PyData chair and Python Software Foundation/EuroPython fellow — walks through pragmatic, data-first strategies for scaling AI across organizations.

We cover how to align AI initiatives with company goals, run realistic experiments (and why transparent evaluation matters), and set expectations about AI’s limits (the “Beethoven” example). Alexander breaks down a data-first architecture — data lake, BI vs. ML vs. deep learning splits — and explains productionization needs like retraining, feedback loops, and MLOps automation. He shares MLOps best practices: standardization, CI/CD, governance, reproducibility, and warnings about vendor lock-in and consultancy pitfalls. You’ll also hear advice on prioritization over perfection, timing innovation, and choosing platforms that fit long-term team maturity.

Listen for actionable guidance on experiment design, model evaluation, and building repeatable pipelines so you can scale enterprise AI responsibly and sustainably. Ideal for data leaders, ML engineers, and product owners implementing production ML and MLOps." +dateadded: 2022-08-19 + +duration: PT01H01M25S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=0 + endOffset: 122 +- name: 'Guest Overview: Alexander Hendorf — Königsweg partner & PyData chair' + startOffset: 122 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=122 + endOffset: 199 +- name: 'Career Path: from law and DJing to programming and machine learning' + startOffset: 199 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=199 + endOffset: 307 +- name: 'Partner Role: team leadership, strategy, and client selection' + startOffset: 307 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=307 + endOffset: 576 +- name: 'Community Engagement: PyData, cross-domain learning, and meetups' + startOffset: 576 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=576 + endOffset: 693 +- name: 'Conference Organizing: becoming chair, scaling events, and organizer summit' + startOffset: 693 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=693 + endOffset: 991 +- name: 'Public Speaking: generating talk ideas and learning through presentations' + startOffset: 991 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=991 + endOffset: 1256 +- name: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit" + startOffset: 1256 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1256 + endOffset: 1471 +- name: 'Communicating AI to Business: simplification, open source, and stakeholder + buy-in' + startOffset: 1471 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1471 + endOffset: 1878 +- name: 'Enterprise AI Strategy: aligning initiatives, experiments, and company goals' + startOffset: 1878 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1878 + endOffset: 2210 +- name: 'Experimentation Reality: evaluation, transparency, and avoiding hype-driven + bets' + startOffset: 2210 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2210 + endOffset: 2242 +- name: 'AI Limitations Illustrated: realistic expectations (Beethoven example)' + startOffset: 2242 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2242 + endOffset: 2568 +- name: 'Innovation Patience: retrospectives, avoiding over-engineering, and timing' + startOffset: 2568 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2568 + endOffset: 2763 +- name: 'Prioritization Over Perfection: "good enough" engineering and impact focus' + startOffset: 2763 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2763 + endOffset: 2950 +- name: 'Data-First Approach: data lake concept, BI vs. ML vs. deep learning split' + startOffset: 2950 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2950 + endOffset: 3132 +- name: 'Productionization Needs: retraining, feedback loops, and MLOps automation' + startOffset: 3132 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3132 + endOffset: 3214 +- name: 'MLOps Best Practices: standardization, CI/CD, governance, and reproducibility' + startOffset: 3214 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3214 + endOffset: 3335 +- name: 'MLOps Hype vs. Reality: buzzword caution and consultancy pitfalls' + startOffset: 3335 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3335 + endOffset: 3531 +- name: 'Platform Selection & Longevity: vendor lock-in, long-term planning, and team + maturity' + startOffset: 3531 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3531 + endOffset: 3667 +- name: 'How to Reach Alexander: LinkedIn, Twitter, and PyData events' + startOffset: 3667 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3667 + endOffset: 3754 +- name: Episode Wrap-Up and Upcoming Conferences + startOffset: 3754 + url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3754 + endOffset: 3685 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Alexander Hendorf — Königsweg partner & PyData chair' @@ -346,7 +447,7 @@ transcript: sec: 1251 time: '20:51' who: Alexey -- header: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit"' +- header: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit" - line: It's an important topic to say, “Hey, there's the index. It's a very important structure when you work with the data. Actually, you can do really cool things for that as well, which are really useful and big timesavers.” That was then. @@ -1029,113 +1130,6 @@ transcript: sec: 3807 time: '1:03:27' who: Alexander -description: Discover data-first Enterprise AI strategies and MLOps best practices—learn - realistic experiments, CI/CD, governance, and align ML to business impact. -intro: 'How do you move from proof-of-concept to scaled enterprise AI without over-investing - in hype? In this episode, Alexander Hendorf — head of data and AI at KÖNIGSWEG, PyData - chair and Python Software Foundation/EuroPython fellow — walks through pragmatic, - data-first strategies for scaling AI across organizations.

We cover how - to align AI initiatives with company goals, run realistic experiments (and why transparent - evaluation matters), and set expectations about AI’s limits (the “Beethoven” example). - Alexander breaks down a data-first architecture — data lake, BI vs. ML vs. deep - learning splits — and explains productionization needs like retraining, feedback - loops, and MLOps automation. He shares MLOps best practices: standardization, CI/CD, - governance, reproducibility, and warnings about vendor lock-in and consultancy pitfalls. - You’ll also hear advice on prioritization over perfection, timing innovation, and - choosing platforms that fit long-term team maturity.

Listen for actionable - guidance on experiment design, model evaluation, and building repeatable pipelines - so you can scale enterprise AI responsibly and sustainably. Ideal for data leaders, - ML engineers, and product owners implementing production ML and MLOps.' -dateadded: '2022-08-19' -duration: PT01H01M25S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=0 - endOffset: 122 -- name: 'Guest Overview: Alexander Hendorf — Königsweg partner & PyData chair' - startOffset: 122 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=122 - endOffset: 199 -- name: 'Career Path: from law and DJing to programming and machine learning' - startOffset: 199 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=199 - endOffset: 307 -- name: 'Partner Role: team leadership, strategy, and client selection' - startOffset: 307 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=307 - endOffset: 576 -- name: 'Community Engagement: PyData, cross-domain learning, and meetups' - startOffset: 576 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=576 - endOffset: 693 -- name: 'Conference Organizing: becoming chair, scaling events, and organizer summit' - startOffset: 693 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=693 - endOffset: 991 -- name: 'Public Speaking: generating talk ideas and learning through presentations' - startOffset: 991 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=991 - endOffset: 1256 -- name: 'Technical Talks: Pandas deep dives and "Deep Learning for Fun & Profit"' - startOffset: 1256 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1256 - endOffset: 1471 -- name: 'Communicating AI to Business: simplification, open source, and stakeholder - buy-in' - startOffset: 1471 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1471 - endOffset: 1878 -- name: 'Enterprise AI Strategy: aligning initiatives, experiments, and company goals' - startOffset: 1878 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=1878 - endOffset: 2210 -- name: 'Experimentation Reality: evaluation, transparency, and avoiding hype-driven - bets' - startOffset: 2210 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2210 - endOffset: 2242 -- name: 'AI Limitations Illustrated: realistic expectations (Beethoven example)' - startOffset: 2242 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2242 - endOffset: 2568 -- name: 'Innovation Patience: retrospectives, avoiding over-engineering, and timing' - startOffset: 2568 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2568 - endOffset: 2763 -- name: 'Prioritization Over Perfection: "good enough" engineering and impact focus' - startOffset: 2763 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2763 - endOffset: 2950 -- name: 'Data-First Approach: data lake concept, BI vs. ML vs. deep learning split' - startOffset: 2950 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=2950 - endOffset: 3132 -- name: 'Productionization Needs: retraining, feedback loops, and MLOps automation' - startOffset: 3132 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3132 - endOffset: 3214 -- name: 'MLOps Best Practices: standardization, CI/CD, governance, and reproducibility' - startOffset: 3214 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3214 - endOffset: 3335 -- name: 'MLOps Hype vs. Reality: buzzword caution and consultancy pitfalls' - startOffset: 3335 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3335 - endOffset: 3531 -- name: 'Platform Selection & Longevity: vendor lock-in, long-term planning, and team - maturity' - startOffset: 3531 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3531 - endOffset: 3667 -- name: 'How to Reach Alexander: LinkedIn, Twitter, and PyData events' - startOffset: 3667 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3667 - endOffset: 3754 -- name: Episode Wrap-Up and Upcoming Conferences - startOffset: 3754 - url: https://www.youtube.com/watch?v=Vms29u9xC3k&t=3754 - endOffset: 3685 --- Links: diff --git a/_podcast/s13e05-se4ml-software-engineering-for-machine-learning.md b/_podcast/software-engineering-for-machine-learning.md similarity index 97% rename from _podcast/s13e05-se4ml-software-engineering-for-machine-learning.md rename to _podcast/software-engineering-for-machine-learning.md index 046cc645..1879b404 100644 --- a/_podcast/s13e05-se4ml-software-engineering-for-machine-learning.md +++ b/_podcast/software-engineering-for-machine-learning.md @@ -1,20 +1,130 @@ --- +title: "Software Engineering for ML: Prevent Hidden Technical Debt with MLOps, Documentation & Team Alignment" +short: "Software Engineering for ML" +season: 13 episode: 5 guests: - nadianahar +image: images/podcast/software-engineering-for-machine-learning.jpg ids: anchor: ow/datatalksclub/episodes/SE4ML---Software-Engineering-for-Machine-Learning---Nadia-Nahar-e20svmn youtube: 35Ch8xL2SA8 -image: images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/SE4ML---Software-Engineering-for-Machine-Learning---Nadia-Nahar-e20svmn apple: https://podcasts.apple.com/us/podcast/se4ml-software-engineering-for-machine-learning-nadia/id1541710331?i=1000605782433 spotify: https://open.spotify.com/episode/6ElyurOyGfRiCwLGUWOG7f?si=6k0i3XNUSPWd31vsZv4pfA youtube: https://www.youtube.com/watch?v=35Ch8xL2SA8 -season: 13 -short: SE4ML - Software Engineering for Machine Learning -title: 'Prevent Hidden Technical Debt in ML Systems: Software Engineering, MLOps, - Docs & Responsible AI' + +description: "Learn how to prevent hidden technical debt in ML systems with MLOps, documentation and responsible AI— improve reliability, tests, and team alignment" +intro: "How do teams prevent hidden technical debt in ML systems before it derails production? In this episode, Nadia Nahar, a PhD student in Software Engineering at Carnegie Mellon University, walks through the software-engineering challenges unique to machine learning and practical steps to reduce long-term costs.

We cover defining software engineering for ML systems, differences from traditional software (uncertainty, data workflows, monitoring), and the “hidden technical debt” scope. Nadia describes an artifact analysis of open-source ML products (~300 repos), common failure modes (discontinuation, unmet requirements, poor data, deployment gaps), and research methods combining manual review with commit/code scripts. Key topics include requirements alignment, team structures and integration patterns, CRISP-DM vs Agile mismatches, MLOps and engineering support, plus documentation practices (Model Cards, Datasheets, factsheets, checklists). We also discuss responsible AI use cases—explainability needs in healthcare and education, including a classroom game predicting smoking risk—and governance approaches for product-centric fairness.

Listen to learn concrete remedies—workshops, shared vocabularies, documentation standards, and how to involve ML practitioners from requirements through testing—to prevent hidden technical debt in ML systems" +topics: +- software engineering +- machine learning +- MLOps +dateadded: 2023-03-27 + +duration: PT00H58M25S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=0 + endOffset: 96 +- name: 'Guest Background: Nadia Nahar (PhD, software engineering)' + startOffset: 96 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=96 + endOffset: 254 +- name: Academia–Industry Collaboration in Software Engineering + startOffset: 254 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=254 + endOffset: 418 +- name: Defining Software Engineering for Machine Learning Systems + startOffset: 418 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=418 + endOffset: 462 +- name: 'ML vs Traditional Software: uncertainty, data workflows, monitoring' + startOffset: 462 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=462 + endOffset: 612 +- name: 'System-Centric Perspective: "Hidden Technical Debt" and scope' + startOffset: 612 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=612 + endOffset: 654 +- name: 'Industry Pain Points: requirements, unrealistic expectations, data access' + startOffset: 654 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=654 + endOffset: 832 +- name: 'Communication & Alignment: vocabulary, expectation setting, documentation' + startOffset: 832 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=832 + endOffset: 917 +- name: 'Artifact Analysis: building an open-source ML product dataset' + startOffset: 917 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=917 + endOffset: 1145 +- name: 'Open-Source ML Products: dataset size (~300 repos) and availability issues' + startOffset: 1145 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1145 + endOffset: 1314 +- name: 'Product Criteria: distinguishing ML products from models and APIs' + startOffset: 1314 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1314 + endOffset: 1443 +- name: 'Dataset Research Questions: development order, collaboration, testing, ops, + responsible AI' + startOffset: 1443 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1443 + endOffset: 1562 +- name: 'Analysis Approach: manual review augmented by scripts (commits & code)' + startOffset: 1562 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1562 + endOffset: 1782 +- name: 'Failure Modes: discontinuation, unmet requirements, poor data, deployment + gaps' + startOffset: 1782 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1782 + endOffset: 2062 +- name: 'Process Gap: CRISP-DM, Agile mismatch, and the need for integrated ML+SW + processes' + startOffset: 2062 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2062 + endOffset: 2188 +- name: 'Team Structures & Integration Patterns: siloing, APIs, all-in-one teams, + ML engineers' + startOffset: 2188 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2188 + endOffset: 2345 +- name: 'Practical Remedies: workshops, shared vocabularies, documentation, engineering + support (MLOps)' + startOffset: 2345 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2345 + endOffset: 2567 +- name: 'Documentation Practices: Model Cards, Datasheets, factsheets, and checklists' + startOffset: 2567 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2567 + endOffset: 2836 +- name: 'Responsible AI Research: explainability requirements in healthcare and education' + startOffset: 2836 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2836 + endOffset: 3003 +- name: 'Explainability Use Case: classroom game predicting smoking risk and stakeholder + needs' + startOffset: 3003 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3003 + endOffset: 3256 +- name: 'Responsible AI Governance: product-centric fairness and team accountability' + startOffset: 3256 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3256 + endOffset: 3415 +- name: 'Agile Integration: involving ML practitioners from requirements through testing' + startOffset: 3415 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3415 + endOffset: 3601 +- name: Closing Remarks & Resources + startOffset: 3601 + url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3601 + endOffset: 3505 + transcript: - header: Podcast Introduction - header: 'Guest Background: Nadia Nahar (PhD, software engineering)' @@ -1092,126 +1202,6 @@ transcript: sec: 3601 time: '1:00:01' who: Nadia -description: Learn how to prevent hidden technical debt in ML systems with MLOps, - documentation and responsible AI— improve reliability, tests, and team alignment. -intro: How do teams prevent hidden technical debt in ML systems before it derails - production? In this episode, Nadia Nahar, a PhD student in Software Engineering at - Carnegie Mellon University, walks through the software-engineering challenges unique - to machine learning and practical steps to reduce long-term costs.

We cover - defining software engineering for ML systems, differences from traditional software - (uncertainty, data workflows, monitoring), and the “hidden technical debt” scope. - Nadia describes an artifact analysis of open-source ML products (~300 repos), common - failure modes (discontinuation, unmet requirements, poor data, deployment gaps), - and research methods combining manual review with commit/code scripts. Key topics - include requirements alignment, team structures and integration patterns, CRISP-DM - vs Agile mismatches, MLOps and engineering support, plus documentation practices - (Model Cards, Datasheets, factsheets, checklists). We also discuss responsible AI - use cases—explainability needs in healthcare and education, including a classroom - game predicting smoking risk—and governance approaches for product-centric fairness. -

Listen to learn concrete remedies—workshops, shared vocabularies, documentation - standards, and how to involve ML practitioners from requirements through testing—to - prevent hidden technical debt in ML systems. -dateadded: '2023-03-27' -duration: PT00H58M25S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=0 - endOffset: 96 -- name: 'Guest Background: Nadia Nahar (PhD, software engineering)' - startOffset: 96 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=96 - endOffset: 254 -- name: Academia–Industry Collaboration in Software Engineering - startOffset: 254 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=254 - endOffset: 418 -- name: Defining Software Engineering for Machine Learning Systems - startOffset: 418 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=418 - endOffset: 462 -- name: 'ML vs Traditional Software: uncertainty, data workflows, monitoring' - startOffset: 462 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=462 - endOffset: 612 -- name: 'System-Centric Perspective: "Hidden Technical Debt" and scope' - startOffset: 612 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=612 - endOffset: 654 -- name: 'Industry Pain Points: requirements, unrealistic expectations, data access' - startOffset: 654 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=654 - endOffset: 832 -- name: 'Communication & Alignment: vocabulary, expectation setting, documentation' - startOffset: 832 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=832 - endOffset: 917 -- name: 'Artifact Analysis: building an open-source ML product dataset' - startOffset: 917 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=917 - endOffset: 1145 -- name: 'Open-Source ML Products: dataset size (~300 repos) and availability issues' - startOffset: 1145 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1145 - endOffset: 1314 -- name: 'Product Criteria: distinguishing ML products from models and APIs' - startOffset: 1314 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1314 - endOffset: 1443 -- name: 'Dataset Research Questions: development order, collaboration, testing, ops, - responsible AI' - startOffset: 1443 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1443 - endOffset: 1562 -- name: 'Analysis Approach: manual review augmented by scripts (commits & code)' - startOffset: 1562 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1562 - endOffset: 1782 -- name: 'Failure Modes: discontinuation, unmet requirements, poor data, deployment - gaps' - startOffset: 1782 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=1782 - endOffset: 2062 -- name: 'Process Gap: CRISP-DM, Agile mismatch, and the need for integrated ML+SW - processes' - startOffset: 2062 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2062 - endOffset: 2188 -- name: 'Team Structures & Integration Patterns: siloing, APIs, all-in-one teams, - ML engineers' - startOffset: 2188 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2188 - endOffset: 2345 -- name: 'Practical Remedies: workshops, shared vocabularies, documentation, engineering - support (MLOps)' - startOffset: 2345 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2345 - endOffset: 2567 -- name: 'Documentation Practices: Model Cards, Datasheets, factsheets, and checklists' - startOffset: 2567 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2567 - endOffset: 2836 -- name: 'Responsible AI Research: explainability requirements in healthcare and education' - startOffset: 2836 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=2836 - endOffset: 3003 -- name: 'Explainability Use Case: classroom game predicting smoking risk and stakeholder - needs' - startOffset: 3003 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3003 - endOffset: 3256 -- name: 'Responsible AI Governance: product-centric fairness and team accountability' - startOffset: 3256 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3256 - endOffset: 3415 -- name: 'Agile Integration: involving ML practitioners from requirements through testing' - startOffset: 3415 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3415 - endOffset: 3601 -- name: Closing Remarks & Resources - startOffset: 3601 - url: https://www.youtube.com/watch?v=35Ch8xL2SA8&t=3601 - endOffset: 3505 --- Links: diff --git a/_podcast/s05e04-introducing-data-science-in-startups.md b/_podcast/solopreneur-data-scientist.md similarity index 96% rename from _podcast/s05e04-introducing-data-science-in-startups.md rename to _podcast/solopreneur-data-scientist.md index c92d1e3e..3405ebea 100644 --- a/_podcast/s05e04-introducing-data-science-in-startups.md +++ b/_podcast/solopreneur-data-scientist.md @@ -1,11 +1,11 @@ --- -title: 'Solo Data Scientist Playbook: 90-Day Roadmap, Pipelines, A/B Tests & Prioritization' -short: Introducing Data Science in Startups -guests: -- mariannadiachuk -image: images/podcast/s05e04-introducing-data-science-in-startups.jpg +title: "Solo Data Scientist Playbook: 90-Day Roadmap, Pipelines, A/B Tests & Prioritization" +short: "Introducing Data Science in Startups" season: 5 episode: 4 +guests: +- mariannadiachuk +image: images/podcast/solopreneur-data-scientist.jpg ids: youtube: KMSE9GkU2mE anchor: Introducing-Data-Science-in-Startups---Marianna-Diachuk-e17rc4i @@ -14,6 +14,130 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Introducing-Data-Science-in-Startups---Marianna-Diachuk-e17rc4i spotify: https://open.spotify.com/episode/0kGFYX12RgkmZC2lMml6S4 apple: https://podcasts.apple.com/us/podcast/introducing-data-science-in-startups-marianna-diachuk/id1541710331?i=1000536525162 + +description: "Master the solo data scientist 90-day roadmap: prioritize projects, run A/B tests, align stakeholders and deploy models for fast business impact." +intro: "How can a solo data scientist deliver measurable impact in the first 90 days? In this episode, Marianna Diachuk — data scientist at Restream, former DataRobot engineer and fintech team lead, and Data Science Lead/mentor with Women Who Code — walks through a practical Solo Data Scientist playbook. You''ll hear a clear 90-day roadmap covering first-week stakeholder interviews and data exploration, first-month research and proofs-of-concept, and first-quarter priorities: building data pipelines, deployment, methodology, and A/B testing. Marianna breaks down company prerequisites (pipelines, engineers, analytics), the experience needed for end-to-end projects, and how to translate business problems into data science work through proactive outreach and prioritization by feasibility, impact, and stakeholder alignment. Topics include churn workflows, reuse and automation to speed iterations, metrics and KPIs for solution selection, experiment design and safe rollouts, plus communicating results through reports and tech talks. Listen for actionable guidance on transitioning from engineering, when to stop projects, hiring signals, an interview readiness checklist, and resources to learn faster and educate your organization." +topics: +- data science +- startups +- career transition +- software engineering +- communication +- career growth +dateadded: 2021-09-25 + +duration: PT00H58M04S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=0 + endOffset: 2 +- name: Guest Background & Career Path in Data Science + startOffset: 2 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2 + endOffset: 222 +- name: 'Solo Data Scientist: Freedom, Influence & Responsibility' + startOffset: 222 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=222 + endOffset: 493 +- name: 'Company Prerequisites: Data Pipelines, Engineers & Analytics' + startOffset: 493 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=493 + endOffset: 653 +- name: 'Experience Required: Mid-Senior, End-to-End Project Skills' + startOffset: 653 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=653 + endOffset: 753 +- name: 'Problem Discovery: Translating Business Needs to Data Science' + startOffset: 753 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=753 + endOffset: 865 +- name: Proactive Outreach & Building a Data Science Roadmap + startOffset: 865 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=865 + endOffset: 961 +- name: 'Prioritization: Feasibility, Impact & Stakeholder Alignment' + startOffset: 961 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=961 + endOffset: 1267 +- name: 'First Week: Stakeholder Interviews and Data Exploration' + startOffset: 1267 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1267 + endOffset: 1345 +- name: 'First Month: Early Research, Insights or Proof-of-Concept' + startOffset: 1345 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1345 + endOffset: 1447 +- name: 'First Quarter: Pipelines, Methodology, Deployment & A/B Testing' + startOffset: 1447 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1447 + endOffset: 1540 +- name: 'Managing Expectations: Data Science as Iterative Inquiry' + startOffset: 1540 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1540 + endOffset: 1687 +- name: 'Start Small: Exploratory Analysis, Dashboards vs. Machine Learning' + startOffset: 1687 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1687 + endOffset: 1811 +- name: 'Churn Workflows: Analysis to Model to Marketing Collaboration' + startOffset: 1811 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1811 + endOffset: 1974 +- name: 'Project Timelines: Reuse, Automation & Faster Iterations' + startOffset: 1974 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1974 + endOffset: 2063 +- name: 'Solution Selection: Define Metrics and Measure Outcomes' + startOffset: 2063 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2063 + endOffset: 2149 +- name: 'Evaluating Performance: KPIs, Experiments & Delivering Insights' + startOffset: 2149 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2149 + endOffset: 2365 +- name: 'When You Get Stuck: Networks, Communities & Learning Resources' + startOffset: 2365 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2365 + endOffset: 2459 +- name: 'Communicating Results: Reports, Visualizations & Tech Talks' + startOffset: 2459 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2459 + endOffset: 2636 +- name: 'Transitioning from Engineering: Mindset, Deployment & Monitoring' + startOffset: 2636 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2636 + endOffset: 2747 +- name: 'Scaling the Team: Signals to Hire More Data Scientists' + startOffset: 2747 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2747 + endOffset: 2882 +- name: 'Stopping Projects: Prioritize, Cut Losses & Reallocate Effort' + startOffset: 2882 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2882 + endOffset: 3017 +- name: 'Interview Checklist: Questions to Assess Company Readiness' + startOffset: 3017 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3017 + endOffset: 3255 +- name: 'Assessing Readiness: Pipelines, Analytics Dept. & Expectations' + startOffset: 3255 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3255 + endOffset: 3318 +- name: 'Research to Production: Silent Mode, A/B Tests & Safe Rollout' + startOffset: 3318 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3318 + endOffset: 3435 +- name: 'Closing Advice: Learn Fast and Educate Your Organization' + startOffset: 3435 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3435 + endOffset: 3472 +- name: Contact Info & Episode Wrap-up + startOffset: 3472 + url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3472 + endOffset: 3484 + transcript: - header: Podcast Introduction - header: Guest Background & Career Path in Data Science @@ -179,7 +303,7 @@ transcript: sec: 652 time: '10:52' who: Marianna -- header: 'Experience Required: Mid‑Senior, End‑to‑End Project Skills' +- header: 'Experience Required: Mid-Senior, End-to-End Project Skills' - line: How much experience would I need to have in order to join a company as the only data scientist? Do I need to be very experienced, like a senior person? Or if I'm just switching careers, would it be a good idea to join such a company? @@ -365,7 +489,7 @@ transcript: sec: 1291 time: '21:31' who: Marianna -- header: 'First Month: Early Research, Insights or Proof‑of‑Concept' +- header: 'First Month: Early Research, Insights or Proof-of-Concept' - line: That's the first week, you said. Let’s say, you’re there for a month already – what should you do in the first month? Do you already need to have some sort of POC or are you still in the exploratory phase? I guess it depends on the case, @@ -946,134 +1070,6 @@ transcript: sec: 3486 time: '58:06' who: Alexey -description: 'Master the solo data scientist 90-day roadmap: prioritize projects, - run A/B tests, align stakeholders and deploy models for fast business impact.' -intro: 'How can a solo data scientist deliver measurable impact in the first 90 days? - In this episode, Marianna Diachuk — data scientist at Restream, former DataRobot - engineer and fintech team lead, and Data Science Lead/mentor with Women Who Code - — walks through a practical Solo Data Scientist playbook. You''ll hear a clear 90-day - roadmap covering first-week stakeholder interviews and data exploration, first-month - research and proofs-of-concept, and first-quarter priorities: building data pipelines, - deployment, methodology, and A/B testing. Marianna breaks down company prerequisites - (pipelines, engineers, analytics), the experience needed for end-to-end projects, - and how to translate business problems into data science work through proactive - outreach and prioritization by feasibility, impact, and stakeholder alignment. Topics - include churn workflows, reuse and automation to speed iterations, metrics and KPIs - for solution selection, experiment design and safe rollouts, plus communicating - results through reports and tech talks. Listen for actionable guidance on transitioning - from engineering, when to stop projects, hiring signals, an interview readiness - checklist, and resources to learn faster and educate your organization.' -dateadded: '2021-09-25' -duration: PT00H58M04S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=0 - endOffset: 2 -- name: Guest Background & Career Path in Data Science - startOffset: 2 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2 - endOffset: 222 -- name: 'Solo Data Scientist: Freedom, Influence & Responsibility' - startOffset: 222 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=222 - endOffset: 493 -- name: 'Company Prerequisites: Data Pipelines, Engineers & Analytics' - startOffset: 493 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=493 - endOffset: 653 -- name: 'Experience Required: Mid‑Senior, End‑to‑End Project Skills' - startOffset: 653 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=653 - endOffset: 753 -- name: 'Problem Discovery: Translating Business Needs to Data Science' - startOffset: 753 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=753 - endOffset: 865 -- name: Proactive Outreach & Building a Data Science Roadmap - startOffset: 865 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=865 - endOffset: 961 -- name: 'Prioritization: Feasibility, Impact & Stakeholder Alignment' - startOffset: 961 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=961 - endOffset: 1267 -- name: 'First Week: Stakeholder Interviews and Data Exploration' - startOffset: 1267 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1267 - endOffset: 1345 -- name: 'First Month: Early Research, Insights or Proof‑of‑Concept' - startOffset: 1345 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1345 - endOffset: 1447 -- name: 'First Quarter: Pipelines, Methodology, Deployment & A/B Testing' - startOffset: 1447 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1447 - endOffset: 1540 -- name: 'Managing Expectations: Data Science as Iterative Inquiry' - startOffset: 1540 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1540 - endOffset: 1687 -- name: 'Start Small: Exploratory Analysis, Dashboards vs. Machine Learning' - startOffset: 1687 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1687 - endOffset: 1811 -- name: 'Churn Workflows: Analysis to Model to Marketing Collaboration' - startOffset: 1811 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1811 - endOffset: 1974 -- name: 'Project Timelines: Reuse, Automation & Faster Iterations' - startOffset: 1974 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=1974 - endOffset: 2063 -- name: 'Solution Selection: Define Metrics and Measure Outcomes' - startOffset: 2063 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2063 - endOffset: 2149 -- name: 'Evaluating Performance: KPIs, Experiments & Delivering Insights' - startOffset: 2149 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2149 - endOffset: 2365 -- name: 'When You Get Stuck: Networks, Communities & Learning Resources' - startOffset: 2365 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2365 - endOffset: 2459 -- name: 'Communicating Results: Reports, Visualizations & Tech Talks' - startOffset: 2459 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2459 - endOffset: 2636 -- name: 'Transitioning from Engineering: Mindset, Deployment & Monitoring' - startOffset: 2636 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2636 - endOffset: 2747 -- name: 'Scaling the Team: Signals to Hire More Data Scientists' - startOffset: 2747 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2747 - endOffset: 2882 -- name: 'Stopping Projects: Prioritize, Cut Losses & Reallocate Effort' - startOffset: 2882 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=2882 - endOffset: 3017 -- name: 'Interview Checklist: Questions to Assess Company Readiness' - startOffset: 3017 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3017 - endOffset: 3255 -- name: 'Assessing Readiness: Pipelines, Analytics Dept. & Expectations' - startOffset: 3255 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3255 - endOffset: 3318 -- name: 'Research to Production: Silent Mode, A/B Tests & Safe Rollout' - startOffset: 3318 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3318 - endOffset: 3435 -- name: 'Closing Advice: Learn Fast and Educate Your Organization' - startOffset: 3435 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3435 - endOffset: 3472 -- name: Contact Info & Episode Wrap-up - startOffset: 3472 - url: https://youtube.com/watch?v=KMSE9GkU2mE&t=3472 - endOffset: 3484 --- Links: diff --git a/_podcast/s06e01-solopreneur.md b/_podcast/solopreneur-developer-and-data-professional.md similarity index 98% rename from _podcast/s06e01-solopreneur.md rename to _podcast/solopreneur-developer-and-data-professional.md index c3a8006f..536fce0b 100644 --- a/_podcast/s06e01-solopreneur.md +++ b/_podcast/solopreneur-developer-and-data-professional.md @@ -1,14 +1,11 @@ --- -title: 'Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs' -short: 'Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs' -guests: -- noahgift -description: Discover solopreneur tactics to build a side-gig tunnel, diversify income - mix with courses, teaching and consulting, and quit corporate on your terms. -image: images/podcast/s06e01-solopreneur.jpg -date: 2025-11-07 +title: "Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs" +short: "Solopreneur Guide: Diversify Income with Courses, Consulting, Books & Side-Gigs" season: 6 episode: 1 +guests: +- noahgift +image: images/podcast/solopreneur-developer-and-data-professional.jpg ids: youtube: gCLUY37HGtw anchor: Becoming-a-Solopreneur-in-Data---Noah-Gift-e19gqbr @@ -17,6 +14,118 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/Becoming-a-Solopreneur-in-Data---Noah-Gift-e19gqbr spotify: https://open.spotify.com/episode/264kr8rkSV71NwlU3kphHm apple: https://podcasts.apple.com/us/podcast/becoming-a-solopreneur-in-data-noah-gift/id1541710331?i=1000540908616 + +description: "Discover solopreneur tactics to build a side-gig tunnel, diversify income mix with courses, teaching and consulting, and quit corporate on your terms" +intro: "How do you build a sustainable solopreneur business that doesn't rely on VC funding—while diversifying income across courses, consulting, books, and side-gigs? In this episode, Noah Gift, founder of Pragmatic AI Labs and a lecturer on machine learning and data science at Northwestern, Duke MIDS, UC Berkeley, UC Davis, and UNC Charlotte, walks through his transition to solo work (since 2017) and a repeatable income mix for intentional small-business ownership.

We cover defining solopreneurship, the practical income mix formula (online courses, university teaching, selective consulting, book publishing, apps, real estate, and investments) plus how to build a side-gig tunnel while employed. Noah shares work allocation strategies (exponential projects vs. consulting), publishing trade-offs, a book workflow (outline -> projects -> write), daily routines, time-and-cost tactics, and signals for financial readiness to quit full-time work.

If you're planning to diversify income streams with online courses, consulting, or writing, this episode gives actionable steps, publishing considerations, and networking advice to help you transition deliberately and scale revenue without sacrificing control" +topics: +- solopreneurship +- entrepreneurship +- career growth +- career transition +- consulting +dateadded: 2021-11-06 +date: 2025-11-07 + +duration: PT00H58M56S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=0 + endOffset: 74 +- name: 'Episode Topic & Guest Overview: Becoming a Solopreneur with Noah Gift' + startOffset: 74 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=74 + endOffset: 157 +- name: 'Early Career: TV, Caltech, Disney and Film Pipeline Experience' + startOffset: 157 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=157 + endOffset: 376 +- name: 'Transition to Independent Work: Solopreneur Since 2017' + startOffset: 376 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=376 + endOffset: 402 +- name: 'Defining Solopreneurship: Intentional Smallness and Revenue Diversification' + startOffset: 402 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=402 + endOffset: 585 +- name: 'Small-Business Philosophy: Alternatives to Venture-Backed Growth' + startOffset: 585 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=585 + endOffset: 821 +- name: 'Daily Routine: Exercise, Prioritization, and Task Queues' + startOffset: 821 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=821 + endOffset: 987 +- name: 'Work Allocation Strategy: Exponential Projects vs. Consulting' + startOffset: 987 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=987 + endOffset: 1301 +- name: 'Long-Term Escape Plan: Incremental Transition Out of Corporate Roles' + startOffset: 1301 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1301 + endOffset: 1505 +- name: 'Income Mix Formula: Courses, University Teaching, and Select Consulting' + startOffset: 1505 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1505 + endOffset: 1672 +- name: 'Goal Criteria: Scale, Ethics, and Asynchronous Work Preferences' + startOffset: 1672 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1672 + endOffset: 1860 +- name: 'Maintaining Motivation: Deadlines, Enjoyment, and Accepting Imperfection' + startOffset: 1860 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1860 + endOffset: 1991 +- name: 'Pressure Management: Demand as a Signal of Success' + startOffset: 1991 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1991 + endOffset: 2144 +- name: 'Publishing Options: Tradeoffs Between Traditional and Self-Publishing' + startOffset: 2144 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2144 + endOffset: 2288 +- name: 'Book Workflow: Outlines First, Build Projects, Then Write' + startOffset: 2288 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2288 + endOffset: 2494 +- name: 'Writing Discipline: Treating Book Projects Like Marathons' + startOffset: 2494 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2494 + endOffset: 2576 +- name: 'Distributed Income: Combining Books, Apps, Real Estate, and Investments' + startOffset: 2576 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2576 + endOffset: 2787 +- name: 'Side-Gig Strategy: Building the “Tunnel” While Employed' + startOffset: 2787 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2787 + endOffset: 2952 +- name: 'Time & Cost Tactics: Reduce Commute, Lower Expenses, and Save Cash' + startOffset: 2952 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2952 + endOffset: 3229 +- name: 'Financial Readiness: When to Quit Full-Time Employment' + startOffset: 3229 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3229 + endOffset: 3306 +- name: 'Networking for Independence: Deep Skill, Visibility, and Avoiding Management' + startOffset: 3306 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3306 + endOffset: 3504 +- name: 'University Teaching Path: Leverage Expertise, Professors, and Written Work' + startOffset: 3504 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3504 + endOffset: 3600 +- name: 'Contact & Resources: NoahGift.com and LinkedIn' + startOffset: 3600 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3600 + endOffset: 3610 +- name: 'Closing Remarks: Final Advice — Be Excellent and Bet on Yourself' + startOffset: 3610 + url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3610 + endOffset: 3536 + transcript: - header: Podcast Introduction - header: 'Episode Topic & Guest Overview: Becoming a Solopreneur with Noah Gift' @@ -1692,118 +1801,4 @@ transcript: sec: 3610 time: '1:00:10' who: Alexey -intro: How do you build a sustainable solopreneur business that doesn't rely on VC - funding—while diversifying income across courses, consulting, books, and side-gigs? - In this episode, Noah Gift, founder of Pragmatic AI Labs and a lecturer on machine - learning and data science at Northwestern, Duke MIDS, UC Berkeley, UC Davis, and - UNC Charlotte, walks through his transition to solo work (since 2017) and a repeatable - income mix for intentional small-business ownership.

We cover defining - solopreneurship, the practical income mix formula (online courses, university teaching, - selective consulting, book publishing, apps, real estate, and investments) plus - how to build a side-gig tunnel while employed. Noah shares work allocation strategies - (exponential projects vs. consulting), publishing trade-offs, a book workflow (outline - -> projects -> write), daily routines, time-and-cost tactics, and signals for financial - readiness to quit full-time work.

If you're planning to diversify income - streams with online courses, consulting, or writing, this episode gives actionable - steps, publishing considerations, and networking advice to help you transition deliberately - and scale revenue without sacrificing control. -dateadded: '2021-11-06' -duration: PT00H58M56S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=0 - endOffset: 74 -- name: 'Episode Topic & Guest Overview: Becoming a Solopreneur with Noah Gift' - startOffset: 74 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=74 - endOffset: 157 -- name: 'Early Career: TV, Caltech, Disney and Film Pipeline Experience' - startOffset: 157 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=157 - endOffset: 376 -- name: 'Transition to Independent Work: Solopreneur Since 2017' - startOffset: 376 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=376 - endOffset: 402 -- name: 'Defining Solopreneurship: Intentional Smallness and Revenue Diversification' - startOffset: 402 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=402 - endOffset: 585 -- name: 'Small-Business Philosophy: Alternatives to Venture-Backed Growth' - startOffset: 585 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=585 - endOffset: 821 -- name: 'Daily Routine: Exercise, Prioritization, and Task Queues' - startOffset: 821 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=821 - endOffset: 987 -- name: 'Work Allocation Strategy: Exponential Projects vs. Consulting' - startOffset: 987 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=987 - endOffset: 1301 -- name: 'Long-Term Escape Plan: Incremental Transition Out of Corporate Roles' - startOffset: 1301 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1301 - endOffset: 1505 -- name: 'Income Mix Formula: Courses, University Teaching, and Select Consulting' - startOffset: 1505 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1505 - endOffset: 1672 -- name: 'Goal Criteria: Scale, Ethics, and Asynchronous Work Preferences' - startOffset: 1672 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1672 - endOffset: 1860 -- name: 'Maintaining Motivation: Deadlines, Enjoyment, and Accepting Imperfection' - startOffset: 1860 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1860 - endOffset: 1991 -- name: 'Pressure Management: Demand as a Signal of Success' - startOffset: 1991 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=1991 - endOffset: 2144 -- name: 'Publishing Options: Tradeoffs Between Traditional and Self-Publishing' - startOffset: 2144 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2144 - endOffset: 2288 -- name: 'Book Workflow: Outlines First, Build Projects, Then Write' - startOffset: 2288 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2288 - endOffset: 2494 -- name: 'Writing Discipline: Treating Book Projects Like Marathons' - startOffset: 2494 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2494 - endOffset: 2576 -- name: 'Distributed Income: Combining Books, Apps, Real Estate, and Investments' - startOffset: 2576 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2576 - endOffset: 2787 -- name: 'Side-Gig Strategy: Building the “Tunnel” While Employed' - startOffset: 2787 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2787 - endOffset: 2952 -- name: 'Time & Cost Tactics: Reduce Commute, Lower Expenses, and Save Cash' - startOffset: 2952 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=2952 - endOffset: 3229 -- name: 'Financial Readiness: When to Quit Full-Time Employment' - startOffset: 3229 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3229 - endOffset: 3306 -- name: 'Networking for Independence: Deep Skill, Visibility, and Avoiding Management' - startOffset: 3306 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3306 - endOffset: 3504 -- name: 'University Teaching Path: Leverage Expertise, Professors, and Written Work' - startOffset: 3504 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3504 - endOffset: 3600 -- name: 'Contact & Resources: NoahGift.com and LinkedIn' - startOffset: 3600 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3600 - endOffset: 3610 -- name: 'Closing Remarks: Final Advice — Be Excellent and Bet on Yourself' - startOffset: 3610 - url: https://www.youtube.com/watch?v=gCLUY37HGtw&t=3610 - endOffset: 3536 --- diff --git a/_podcast/s11e09-teaching-and-mentoring-in-data-analytics.md b/_podcast/teaching-mentoring-data-analytics-fintech.md similarity index 97% rename from _podcast/s11e09-teaching-and-mentoring-in-data-analytics.md rename to _podcast/teaching-mentoring-data-analytics-fintech.md index 3ecf924e..89d8e31f 100644 --- a/_podcast/s11e09-teaching-and-mentoring-in-data-analytics.md +++ b/_podcast/teaching-mentoring-data-analytics-fintech.md @@ -1,20 +1,144 @@ --- +title: "Designing FinTech Data Analytics Curriculum: Fraud Detection, BigQuery Labs & Mentoring" +short: "Teaching and Mentoring in Data Analytics" +season: 11 episode: 9 guests: - irinabrudaru +image: images/podcast/teaching-mentoring-data-analytics-fintech.jpg ids: anchor: Teaching-and-Mentoring-in-Data-Analytics---Irina-Brudaru-e1rihm1 youtube: saaRRzgHsmE -image: images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Teaching-and-Mentoring-in-Data-Analytics---Irina-Brudaru-e1rihm1 apple: https://podcasts.apple.com/us/podcast/teaching-and-mentoring-in-data-analytics-irina-brudaru/id1541710331?i=1000588551445 spotify: https://open.spotify.com/episode/0ES2N4yIu61bUB3dY9oxgQ?si=_KFHPXOUQVap8oSBp6AJgA youtube: https://www.youtube.com/watch?v=saaRRzgHsmE -season: 11 -short: Teaching and Mentoring in Data Analytics -title: 'Designing FinTech Data Analytics Curriculum: Fraud Detection, BigQuery Labs - & Mentoring' + +description: "Discover FinTech data analytics curriculum: fraud detection, BigQuery labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career guidance." +intro: "How do you design a FinTech data analytics curriculum that teaches fraud detection, chargeback modeling, and real-world cloud skills while also mentoring diverse learners? In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google data leader, and long-time mentor and teacher — walks through building practical FinTech courses informed by industry experience across Berlin, Amsterdam and the Bay Area.

We cover curriculum components you can reuse: rule-based vs neural approaches to fraud detection, chargeback modeling, deploying ML in production, and essential business skills for analysts. Irina explains hands-on BigQuery labs, student cloud access strategies, and how to demystify Google Cloud for analysts. She shares mentoring methods (visual explanations, learner-centered teaching), instructor sourcing and storytelling for classroom impact, cohort analysis for product metrics, recruiting women to zoomcamps, and securing technical reviewers.

Listen to gain actionable guidance on structuring FinTech analytics training, designing cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring practices that help career changers and underrepresented learners succeed in data analytics." +topics: +- data analytics +- fintech +- mentoring +- teaching +- data science +dateadded: 2022-12-03 + +duration: PT01H46S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=0 + endOffset: 68 +- name: 'Guest Overview: Irina Brudaru — teacher, curriculum developer, mentor in + data analytics' + startOffset: 68 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=68 + endOffset: 133 +- name: 'Career Origins: early computing, Romania education, Max Planck research' + startOffset: 133 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=133 + endOffset: 221 +- name: 'Industry Transition: data consulting, BI, Google and product analytics experience' + startOffset: 221 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=221 + endOffset: 376 +- name: 'International Roles & Management: San Francisco, Netherlands, Berlin; leading + data teams' + startOffset: 376 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=376 + endOffset: 537 +- name: 'Early Mentoring Wins: mentoring family, interns, and career pivot stories' + startOffset: 537 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=537 + endOffset: 574 +- name: 'Mentoring Methods: visual explanations, learner-centered teaching techniques' + startOffset: 574 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=574 + endOffset: 597 +- name: 'Community Teaching: NGOs, bootcamps, and FrauenLoop volunteer work' + startOffset: 597 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=597 + endOffset: 798 +- name: 'Curriculum Design for FinTech: AI Guild program planning and certification + integration' + startOffset: 798 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=798 + endOffset: 896 +- name: 'Curriculum Components: fraud, chargeback, ML in production, and business + skills' + startOffset: 896 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=896 + endOffset: 1107 +- name: 'Instructor Sourcing & Storytelling: finding teachers and teaching data storytelling' + startOffset: 1107 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1107 + endOffset: 1334 +- name: 'Fraud Detection & Chargeback Modeling: rule-based vs neural approaches in + FinTech' + startOffset: 1334 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1334 + endOffset: 1543 +- name: 'Hands-on Cloud Teaching: BigQuery labs, student cloud access, demystifying + Google Cloud' + startOffset: 1543 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1543 + endOffset: 1734 +- name: 'Overcoming Cloud Reluctance: focusing on essential cloud skills for analysts' + startOffset: 1734 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1734 + endOffset: 1791 +- name: 'Managerial Scope: balancing analytics, data engineering, and technical credibility' + startOffset: 1791 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1791 + endOffset: 1910 +- name: 'Cohort Analysis Explained: retention metrics, product analytics visualization' + startOffset: 1910 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1910 + endOffset: 2134 +- name: 'Path to Formal Teaching: outreach, invitations, and joining teaching programs' + startOffset: 2134 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2134 + endOffset: 2329 +- name: 'Gender Diversity Research: plans to analyze company data for inclusion insights' + startOffset: 2329 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2329 + endOffset: 2476 +- name: 'Recruiting Women to Zoomcamps: targeted outreach, partnerships, and scheduling + considerations' + startOffset: 2476 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2476 + endOffset: 2724 +- name: 'Securing Technical Feedback: finding reviewers, advocating for code review + processes' + startOffset: 2724 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2724 + endOffset: 2979 +- name: 'Learning Antipatterns: ML hype, overengineering, and tool-centric approaches' + startOffset: 2979 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2979 + endOffset: 3286 +- name: 'Career Transition Advice: moving into data science from non-technical roles' + startOffset: 3286 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3286 + endOffset: 3488 +- name: 'Core Analyst Fundamentals: SQL, data visualization, soft skills, and product + tracking' + startOffset: 3488 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3488 + endOffset: 3632 +- name: 'Community Partnerships: collaborating with Women in Tech groups and volunteer + orgs' + startOffset: 3632 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3632 + endOffset: 3666 +- name: Episode Wrap-up and Contact Info + startOffset: 3666 + url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3666 + endOffset: 3646 + transcript: - header: Podcast Introduction - header: 'Guest Overview: Irina Brudaru — teacher, curriculum developer, mentor in @@ -1445,138 +1569,6 @@ transcript: sec: 3714 time: '1:01:54' who: Irina -description: 'Discover FinTech data analytics curriculum: fraud detection, BigQuery - labs & mentoring—gain hands-on cloud skills, chargeback modeling, SQL and career - guidance.' -intro: 'How do you design a FinTech data analytics curriculum that teaches fraud detection, - chargeback modeling, and real-world cloud skills while also mentoring diverse learners? - In this episode, Irina Brudaru — Head of Data & Analytics at Finlex, former Google - data leader, and long-time mentor and teacher — walks through building practical - FinTech courses informed by industry experience across Berlin, Amsterdam and the - Bay Area.

We cover curriculum components you can reuse: rule‑based vs neural - approaches to fraud detection, chargeback modeling, deploying ML in production, - and essential business skills for analysts. Irina explains hands‑on BigQuery labs, - student cloud access strategies, and how to demystify Google Cloud for analysts. - She shares mentoring methods (visual explanations, learner‑centered teaching), instructor - sourcing and storytelling for classroom impact, cohort analysis for product metrics, - recruiting women to zoomcamps, and securing technical reviewers.

Listen - to gain actionable guidance on structuring FinTech analytics training, designing - cloud labs, teaching fraud detection and chargeback workflows, and adopting mentoring - practices that help career changers and underrepresented learners succeed in data - analytics.' -dateadded: '2022-12-03' -duration: PT01H46S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=0 - endOffset: 68 -- name: 'Guest Overview: Irina Brudaru — teacher, curriculum developer, mentor in - data analytics' - startOffset: 68 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=68 - endOffset: 133 -- name: 'Career Origins: early computing, Romania education, Max Planck research' - startOffset: 133 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=133 - endOffset: 221 -- name: 'Industry Transition: data consulting, BI, Google and product analytics experience' - startOffset: 221 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=221 - endOffset: 376 -- name: 'International Roles & Management: San Francisco, Netherlands, Berlin; leading - data teams' - startOffset: 376 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=376 - endOffset: 537 -- name: 'Early Mentoring Wins: mentoring family, interns, and career pivot stories' - startOffset: 537 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=537 - endOffset: 574 -- name: 'Mentoring Methods: visual explanations, learner-centered teaching techniques' - startOffset: 574 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=574 - endOffset: 597 -- name: 'Community Teaching: NGOs, bootcamps, and FrauenLoop volunteer work' - startOffset: 597 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=597 - endOffset: 798 -- name: 'Curriculum Design for FinTech: AI Guild program planning and certification - integration' - startOffset: 798 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=798 - endOffset: 896 -- name: 'Curriculum Components: fraud, chargeback, ML in production, and business - skills' - startOffset: 896 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=896 - endOffset: 1107 -- name: 'Instructor Sourcing & Storytelling: finding teachers and teaching data storytelling' - startOffset: 1107 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1107 - endOffset: 1334 -- name: 'Fraud Detection & Chargeback Modeling: rule-based vs neural approaches in - FinTech' - startOffset: 1334 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1334 - endOffset: 1543 -- name: 'Hands-on Cloud Teaching: BigQuery labs, student cloud access, demystifying - Google Cloud' - startOffset: 1543 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1543 - endOffset: 1734 -- name: 'Overcoming Cloud Reluctance: focusing on essential cloud skills for analysts' - startOffset: 1734 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1734 - endOffset: 1791 -- name: 'Managerial Scope: balancing analytics, data engineering, and technical credibility' - startOffset: 1791 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1791 - endOffset: 1910 -- name: 'Cohort Analysis Explained: retention metrics, product analytics visualization' - startOffset: 1910 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=1910 - endOffset: 2134 -- name: 'Path to Formal Teaching: outreach, invitations, and joining teaching programs' - startOffset: 2134 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2134 - endOffset: 2329 -- name: 'Gender Diversity Research: plans to analyze company data for inclusion insights' - startOffset: 2329 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2329 - endOffset: 2476 -- name: 'Recruiting Women to Zoomcamps: targeted outreach, partnerships, and scheduling - considerations' - startOffset: 2476 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2476 - endOffset: 2724 -- name: 'Securing Technical Feedback: finding reviewers, advocating for code review - processes' - startOffset: 2724 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2724 - endOffset: 2979 -- name: 'Learning Antipatterns: ML hype, overengineering, and tool-centric approaches' - startOffset: 2979 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=2979 - endOffset: 3286 -- name: 'Career Transition Advice: moving into data science from non-technical roles' - startOffset: 3286 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3286 - endOffset: 3488 -- name: 'Core Analyst Fundamentals: SQL, data visualization, soft skills, and product - tracking' - startOffset: 3488 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3488 - endOffset: 3632 -- name: 'Community Partnerships: collaborating with Women in Tech groups and volunteer - orgs' - startOffset: 3632 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3632 - endOffset: 3666 -- name: Episode Wrap-up and Contact Info - startOffset: 3666 - url: https://www.youtube.com/watch?v=saaRRzgHsmE&t=3666 - endOffset: 3646 --- Links: diff --git a/_podcast/s12e04-doing-software-engineering-in-academia.md b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md similarity index 97% rename from _podcast/s12e04-doing-software-engineering-in-academia.md rename to _podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md index 395287b2..72cce4ce 100644 --- a/_podcast/s12e04-doing-software-engineering-in-academia.md +++ b/_podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.md @@ -1,20 +1,153 @@ --- +title: "Teaching Open Science & Reproducible Research: Research Software Engineering Practices for Academia" +short: "Teaching Open Science & Reproducible Research" +season: 12 episode: 4 guests: - johannabayer +image: images/podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg ids: anchor: Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb youtube: K0PdQITQzVQ -image: images/podcast/s12e04-doing-software-engineering-in-academia.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Doing-Software-Engineering-in-Academia---Johanna-Bayer-e1snqcb apple: https://podcasts.apple.com/us/podcast/doing-software-engineering-in-academia-johanna-bayer/id1541710331?i=1000594351759 spotify: https://open.spotify.com/episode/3ol91Xt0A6VBbPgFxGh5N6?si=QDcjMCJ7SOG6eJjjYbyEcg youtube: https://www.youtube.com/watch?v=K0PdQITQzVQ -season: 12 -short: Doing Software Engineering in Academia -title: 'Teach Reproducible Research: RSE Practices for Neuroimaging, Packaging, MLflow - & Data Sharing' + +description: "Master reproducible research for neuroimaging: packaging, MLflow & data sharing to publish reproducible manuscripts, boost citations and career visibility." +intro: "How do you teach reproducible research and practical research software engineering (RSE) skills to neuroimaging students and researchers? In this episode, Johanna Bayer — a psychologist-turned-computational neuroscientist completing a PhD in machine learning for clinical neuroimaging at the University of Melbourne and an open science advocate — walks through concrete approaches for teaching reproducible research. We cover course design (Carpentries-style curricula, Git introductions, and reproducible manuscripts with embedded code), guided onboarding to open source (small repos, pull requests, cookiecutter templates), and core coding practices to teach: packaging, environments, formatting, testing, branching and versioning. Johanna also discusses experiment tracking with MLflow, treating software as a research output (DOIs and toolboxes), data sharing realities and sensitive-data practices, and strategies for culture change in labs via hackathons and grassroots efforts. Listeners will gain practical teaching tactics, tooling recommendations, and considerations for infrastructure and academic-industry tensions — plus pointers to resources like The Turing Way, The Carpentries, and the ML Solutions Handbook to help implement reproducible research and RSE practices in neuroimaging projects." +topics: +- open science +- software engineering +- academia +- teaching +dateadded: 2023-01-14 + +duration: PT00H58M10S + +quotableClips: +- name: Podcast Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=0 + endOffset: 68 +- name: 'Guest Background: Johanna Bayer — Psychology to Machine Learning in Neuroimaging' + startOffset: 68 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=68 + endOffset: 144 +- name: 'Academic Journey: Studies in Germany, Zurich and Move to Melbourne' + startOffset: 144 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=144 + endOffset: 327 +- name: 'Teaching Open Science: Intro to Git, Homework Support and Course Structure' + startOffset: 327 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=327 + endOffset: 459 +- name: Carpentries & Structured Beginner Curriculum for Reproducible Research + startOffset: 459 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=459 + endOffset: 510 +- name: 'Open Science Curriculum: Reproducible Manuscripts with Embedded Code' + startOffset: 510 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=510 + endOffset: 652 +- name: 'Guided Onboarding to Open Source: Small Repos, Pull Requests & Turing Book' + startOffset: 652 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=652 + endOffset: 730 +- name: 'What RSE Means: Software-Focused Research Outputs and Practices' + startOffset: 730 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=730 + endOffset: 850 +- name: 'Academic RSE Roles: PhD Students, Methods Papers and Toolboxes' + startOffset: 850 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=850 + endOffset: 996 +- name: 'Software as Research Output: DOIs, Toolboxes and Publishing Code' + startOffset: 996 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=996 + endOffset: 1030 +- name: 'Culture Change in Labs: Convincing Supervisors & Grassroots Hackathons' + startOffset: 1030 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1030 + endOffset: 1205 +- name: 'Industry Lessons for Academia: Programming Expectations & Tool Adoption' + startOffset: 1205 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1205 + endOffset: 1332 +- name: 'Experiment Tracking in Research: MLflow and Reproducibility Tools' + startOffset: 1332 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1332 + endOffset: 1336 +- name: 'Barriers to Teaching Software Skills: Time, Expertise and Fear of Scrutiny' + startOffset: 1336 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1336 + endOffset: 1434 +- name: 'Infrastructure Gaps: Hosting Interactive Reproducible Papers and Costs' + startOffset: 1434 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1434 + endOffset: 1658 +- name: 'Core Coding Practices to Teach: Packaging, Environments, Formatting & Tests' + startOffset: 1658 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1658 + endOffset: 1698 +- name: 'Learning by Doing: Brainhack, Hackathons, Community Contributions' + startOffset: 1698 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1698 + endOffset: 1844 +- name: 'Formal Courses vs Self-Learning: Structure, Discipline and Freelancing' + startOffset: 1844 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1844 + endOffset: 1984 +- name: 'Collaboration & Code Review: Working Alone vs Community Feedback' + startOffset: 1984 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1984 + endOffset: 2165 +- name: 'Benefits of Open Code: Citations, Collaboration and Career Visibility' + startOffset: 2165 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2165 + endOffset: 2221 +- name: 'Data Sharing Reality: "Data Upon Request", Access Controls and Consortia' + startOffset: 2221 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2221 + endOffset: 2330 +- name: 'Project Case Study: Normative Brain Model — Folder Structure & Cookiecutter' + startOffset: 2330 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2330 + endOffset: 2367 +- name: 'Applied Engineering Practices: Branching, Formatting, Versioning & MLflow' + startOffset: 2367 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2367 + endOffset: 2542 +- name: 'Sensitive Data Practices: De-identification and Controlled Access' + startOffset: 2542 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2542 + endOffset: 2724 +- name: Balancing Open Source, Hackathons and Full-Time Research Commitments + startOffset: 2724 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2724 + endOffset: 2862 +- name: 'Discovering Projects: GitHub Trending, Social Media & Community Platforms' + startOffset: 2862 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2862 + endOffset: 2986 +- name: 'Contributing to Repositories: Readme, Contributing Guides, Issues & Communication' + startOffset: 2986 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2986 + endOffset: 3142 +- name: 'Open Publishing vs Industry IP: Academic Openness and Commercial Concerns' + startOffset: 3142 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3142 + endOffset: 3312 +- name: 'Recommended Resources: The Turing Way, The Carpentries & ML Solutions Handbook' + startOffset: 3312 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3312 + endOffset: 3483 +- name: Episode Conclusion and Closing Remarks + startOffset: 3483 + url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3483 + endOffset: 3490 + transcript: - header: Podcast Introduction - header: 'Guest Background: Johanna Bayer — Psychology to Machine Learning in Neuroimaging' @@ -1153,147 +1286,6 @@ transcript: sec: 3558 time: '59:18' who: Alexey -description: 'Master reproducible research for neuroimaging: packaging, MLflow & data - sharing to publish reproducible manuscripts, boost citations and career visibility.' -intro: 'How do you teach reproducible research and practical research software engineering - (RSE) skills to neuroimaging students and researchers? In this episode, Johanna Bayer - — a psychologist-turned-computational neuroscientist completing a PhD in machine - learning for clinical neuroimaging at the University of Melbourne and an open science - advocate — walks through concrete approaches for teaching reproducible research. - We cover course design (Carpentries-style curricula, Git introductions, and reproducible - manuscripts with embedded code), guided onboarding to open source (small repos, - pull requests, cookiecutter templates), and core coding practices to teach: packaging, - environments, formatting, testing, branching and versioning. Johanna also discusses - experiment tracking with MLflow, treating software as a research output (DOIs and - toolboxes), data sharing realities and sensitive-data practices, and strategies - for culture change in labs via hackathons and grassroots efforts. Listeners will - gain practical teaching tactics, tooling recommendations, and considerations for - infrastructure and academic-industry tensions — plus pointers to resources like - The Turing Way, The Carpentries, and the ML Solutions Handbook to help implement - reproducible research and RSE practices in neuroimaging projects.' -dateadded: '2023-01-14' -duration: PT00H58M10S -quotableClips: -- name: Podcast Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=0 - endOffset: 68 -- name: 'Guest Background: Johanna Bayer — Psychology to Machine Learning in Neuroimaging' - startOffset: 68 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=68 - endOffset: 144 -- name: 'Academic Journey: Studies in Germany, Zurich and Move to Melbourne' - startOffset: 144 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=144 - endOffset: 327 -- name: 'Teaching Open Science: Intro to Git, Homework Support and Course Structure' - startOffset: 327 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=327 - endOffset: 459 -- name: Carpentries & Structured Beginner Curriculum for Reproducible Research - startOffset: 459 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=459 - endOffset: 510 -- name: 'Open Science Curriculum: Reproducible Manuscripts with Embedded Code' - startOffset: 510 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=510 - endOffset: 652 -- name: 'Guided Onboarding to Open Source: Small Repos, Pull Requests & Turing Book' - startOffset: 652 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=652 - endOffset: 730 -- name: 'What RSE Means: Software-Focused Research Outputs and Practices' - startOffset: 730 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=730 - endOffset: 850 -- name: 'Academic RSE Roles: PhD Students, Methods Papers and Toolboxes' - startOffset: 850 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=850 - endOffset: 996 -- name: 'Software as Research Output: DOIs, Toolboxes and Publishing Code' - startOffset: 996 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=996 - endOffset: 1030 -- name: 'Culture Change in Labs: Convincing Supervisors & Grassroots Hackathons' - startOffset: 1030 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1030 - endOffset: 1205 -- name: 'Industry Lessons for Academia: Programming Expectations & Tool Adoption' - startOffset: 1205 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1205 - endOffset: 1332 -- name: 'Experiment Tracking in Research: MLflow and Reproducibility Tools' - startOffset: 1332 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1332 - endOffset: 1336 -- name: 'Barriers to Teaching Software Skills: Time, Expertise and Fear of Scrutiny' - startOffset: 1336 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1336 - endOffset: 1434 -- name: 'Infrastructure Gaps: Hosting Interactive Reproducible Papers and Costs' - startOffset: 1434 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1434 - endOffset: 1658 -- name: 'Core Coding Practices to Teach: Packaging, Environments, Formatting & Tests' - startOffset: 1658 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1658 - endOffset: 1698 -- name: 'Learning by Doing: Brainhack, Hackathons, Community Contributions' - startOffset: 1698 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1698 - endOffset: 1844 -- name: 'Formal Courses vs Self-Learning: Structure, Discipline and Freelancing' - startOffset: 1844 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1844 - endOffset: 1984 -- name: 'Collaboration & Code Review: Working Alone vs Community Feedback' - startOffset: 1984 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=1984 - endOffset: 2165 -- name: 'Benefits of Open Code: Citations, Collaboration and Career Visibility' - startOffset: 2165 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2165 - endOffset: 2221 -- name: 'Data Sharing Reality: "Data Upon Request", Access Controls and Consortia' - startOffset: 2221 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2221 - endOffset: 2330 -- name: 'Project Case Study: Normative Brain Model — Folder Structure & Cookiecutter' - startOffset: 2330 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2330 - endOffset: 2367 -- name: 'Applied Engineering Practices: Branching, Formatting, Versioning & MLflow' - startOffset: 2367 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2367 - endOffset: 2542 -- name: 'Sensitive Data Practices: De-identification and Controlled Access' - startOffset: 2542 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2542 - endOffset: 2724 -- name: Balancing Open Source, Hackathons and Full-Time Research Commitments - startOffset: 2724 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2724 - endOffset: 2862 -- name: 'Discovering Projects: GitHub Trending, Social Media & Community Platforms' - startOffset: 2862 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2862 - endOffset: 2986 -- name: 'Contributing to Repositories: Readme, Contributing Guides, Issues & Communication' - startOffset: 2986 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=2986 - endOffset: 3142 -- name: 'Open Publishing vs Industry IP: Academic Openness and Commercial Concerns' - startOffset: 3142 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3142 - endOffset: 3312 -- name: 'Recommended Resources: The Turing Way, The Carpentries & ML Solutions Handbook' - startOffset: 3312 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3312 - endOffset: 3483 -- name: Episode Conclusion and Closing Remarks - startOffset: 3483 - url: https://www.youtube.com/watch?v=K0PdQITQzVQ&t=3483 - endOffset: 3490 --- Links: diff --git a/_podcast/s02e01-writing.md b/_podcast/technical-writing-for-data-scientists.md similarity index 89% rename from _podcast/s02e01-writing.md rename to _podcast/technical-writing-for-data-scientists.md index 2457edf0..d1f15318 100644 --- a/_podcast/s02e01-writing.md +++ b/_podcast/technical-writing-for-data-scientists.md @@ -1,17 +1,11 @@ --- -title: 'Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career' -short: 'Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career' -guests: -- eugeneyan -image: images/podcast/s02e01-writing.jpg -description: 'Master technical writing for data science with a practical 7-day workflow: - outline-first cadence, portfolio tips, docs & distribution to accelerate your career.' -keywords: technical writing, data science career, ML engineer writing, documentation - skills, technical communication, data science blog, career growth, writing process, - Amazon data scientist, Eugene Yan, technical documentation, data science portfolio, - ML career advice, technical writing tips, data science writing +title: "Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career" +short: "Master Technical Writing: 7-Day Workflow to Accelerate Your Data Science Career" season: 2 episode: 1 +guests: +- eugeneyan +image: images/podcast/technical-writing-for-data-scientists.jpg ids: youtube: vXWGd7olv3c anchor: The-Importance-of-Writing-in-a-Tech-Career---Eugene-Yan-ep17du @@ -20,23 +14,18 @@ links: anchor: https://anchor.fm/datatalksclub/episodes/The-Importance-of-Writing-in-a-Tech-Career---Eugene-Yan-ep17du spotify: TODO apple: TODO -intro: How can technical writing accelerate your data science career in just one week? - In this episode, Eugene Yan — an Applied Scientist at Amazon who previously led data - science teams at Lazada and uCare.ai and writes about ML in production and career - growth — walks through a practical, repeatable 7-day workflow for technical writing - tailored to data scientists.

We cover Eugene’s career transition and first - public writing, motivations for sharing work, and how to target readers (peers, - future teammates, and hiring managers). He frames writing as a product with a weekly - shipping cadence, explains the outline-first method for filtering ideas, and outlines - a realistic time budget and editing limits. You’ll get concrete guidance on idea - sourcing, title crafting, article length, blogging tools (Medium, Substack, WordPress, - Jekyll), writing habits, distribution via Twitter and LinkedIn, and writing at work - (press releases, design docs, decision logs). Practical portfolio advice — clear - README, quick start, repo tour — and tips to iterate outlines and ship weekly round - out the episode.

Listen to learn a concrete 7-day workflow, documentation - and portfolio best practices, and distribution tactics to boost your technical writing - and advance your data science career. -dateadded: '2021-02-23' + +description: "Master technical writing for data science with a practical 7-day workflow: outline-first cadence, portfolio tips, docs & distribution to accelerate your career." +topics: +- software engineering +- tools +- practices +- communication +- career transition +intro: "How can technical writing accelerate your data science career in just one week? In this episode, Eugene Yan — an Applied Scientist at Amazon who previously led data science teams at Lazada and uCare.ai and writes about ML in production and career growth — walks through a practical, repeatable 7-day workflow for technical writing tailored to data scientists.

We cover Eugene’s career transition and first public writing, motivations for sharing work, and how to target readers (peers, future teammates, and hiring managers). He frames writing as a product with a weekly shipping cadence, explains the outline-first method for filtering ideas, and outlines a realistic time budget and editing limits. You’ll get concrete guidance on idea sourcing, title crafting, article length, blogging tools (Medium, Substack, WordPress, Jekyll), writing habits, distribution via Twitter and LinkedIn, and writing at work (press releases, design docs, decision logs). Practical portfolio advice — clear README, quick start, repo tour — and tips to iterate outlines and ship weekly round out the episode.

Listen to learn a concrete 7-day workflow, documentation and portfolio best practices, and distribution tactics to boost your technical writing and advance your data science career" +dateadded: 2021-02-23 + + quotableClips: - name: Podcast Introduction startOffset: 0 @@ -118,6 +107,8 @@ quotableClips: startOffset: 3630 url: https://www.youtube.com/watch?v=vXWGd7olv3c&t=3630 endOffset: 3630 + +keywords: technical writing, data science career, ML engineer writing, documentation skills, technical communication, data science blog, career growth, writing process, Amazon data scientist, Eugene Yan, technical documentation, data science portfolio, ML career advice, technical writing tips, data science writing --- Today we're discussing technical writing, logging, documentation, and more. Our special guest is [Eugene Yan](/people/eugeneyan). Eugene works at the intersection of machine learning and product, building pragmatic ML systems while writing and speaking about effective data science, ML in production, and career growth. diff --git a/_podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md similarity index 95% rename from _podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md rename to _podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md index 110d3518..c35fe327 100644 --- a/_podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.md +++ b/_podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.md @@ -1,20 +1,116 @@ --- +title: "From Theme Parks to Tesla: Building Data Products Through Applied ML and Full-Stack Engineering" +short: "From Theme Parks to Tesla: Building Data Products That Work" +season: 21 episode: 9 guests: - abouzarabbaspour +image: images/podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg ids: anchor: datatalksclub/episodes/From-Theme-Parks-to-Tesla-Building-Data-Products-That-Work-e395qme youtube: gXvVMvhfrIY -image: images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg links: anchor: https://creators.spotify.com/pod/profile/datatalksclub/episodes/From-Theme-Parks-to-Tesla-Building-Data-Products-That-Work-e395qme apple: https://podcasts.apple.com/us/podcast/from-theme-parks-to-tesla-building-data-products-that-work/id1541710331?i=1000731198436 spotify: https://open.spotify.com/episode/5dpBs4xr3zMkBDw6cTYHQE?si=pivilqeDTHOiNCBb1bFHdA youtube: https://www.youtube.com/watch?v=gXvVMvhfrIY -season: 21 -short: 'From Theme Parks to Tesla: Building Data Products That Work' -title: Optimize Visitor Flow with Theme Park Crowd Modeling, Queue Prediction & Real-Time - Recommendations +description: "Discover crowd modeling, queue prediction and real-time recommendations to optimize visitor flow, reduce wait times and boost engagement with smart routing." +topics: +- machine learning +- MLOps +- data engineering +- LLMs +- data science + +intro: "How can theme parks use data to cut wait times and guide visitors in real time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and data engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) and automotive (Tesla) — walks through building systems that optimize visitor flow using crowd modeling, queue prediction and real-time recommendations.

We cover the core problems of modeling crowd dynamics and ride capacity, designing a next-best-action visitor routing engine, and using behavioral route modeling and probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: incentivizing app adoption to collect data, validating recommendations with employee swiping experiments and A/B tests, and running streaming pipelines for live experiments and rollout (engagement metrics and accuracy measurement). He also touches on deployment concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines — and how these engineering choices affect measurement and user experience.

Listen to learn concrete approaches for queue prediction, visitor routing, real-time processing, and experimentation so you can design and validate systems that improve throughput and guest satisfaction." +dateadded: 2025-10-21 +duration: PT01H35S +quotableClips: +- name: Podcast Introduction & Event Info + startOffset: 0 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=0 + endOffset: 77 +- name: 'Early Career: Software Engineering to Data Science' + startOffset: 77 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=77 + endOffset: 126 +- name: 'Academic Path: Professional Doctorate & TU Berlin' + startOffset: 126 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=126 + endOffset: 288 +- name: 'Research Partnerships: Industry Projects and Applied Research' + startOffset: 288 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=288 + endOffset: 377 +- name: 'Efteling Insights: Theme Park Tech and Experience Design' + startOffset: 377 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=377 + endOffset: 456 +- name: 'Crowd Modeling: Queue Prediction and Ride Capacity' + startOffset: 456 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=456 + endOffset: 779 +- name: 'Visitor Routing: Next-Best-Action Recommendation System' + startOffset: 779 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=779 + endOffset: 890 +- name: App Adoption & Incentives for Data Collection + startOffset: 890 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=890 + endOffset: 1000 +- name: Behavioral Route Modeling & Probabilistic Recommendations + startOffset: 1000 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1000 + endOffset: 1109 +- name: 'E-commerce Recs: Bol.com Favorite-Brand Carousel' + startOffset: 1109 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1109 + endOffset: 1443 +- name: 'Recommendation Validation: Employee Swiping Experiment & A/B Testing' + startOffset: 1443 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1443 + endOffset: 1561 +- name: 'Real-time Processing: Streaming for Live Experiments' + startOffset: 1561 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1561 + endOffset: 1879 +- name: 'Measurement & Rollout: Engagement Metrics and Accuracy Results' + startOffset: 1879 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1879 + endOffset: 2001 +- name: 'Role at Tesla: Data Engineering vs. ML Engineering Responsibilities' + startOffset: 2001 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2001 + endOffset: 2061 +- name: 'Full-Stack Data Work: Building Apps, Instrumentation, and Deployment' + startOffset: 2061 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2061 + endOffset: 2503 +- name: 'LLMs & AI-Assisted Development: Productivity Gains and Risks' + startOffset: 2503 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2503 + endOffset: 2766 +- name: 'On-Prem Inference Hardware: Raspberry Pi, Jetson Orin, Mac Mini' + startOffset: 2766 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2766 + endOffset: 2993 +- name: 'Models & Platforms: LLaMA, Code Models, and Replicate' + startOffset: 2993 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2993 + endOffset: 3243 +- name: 'Interview Preparation: Tesla Data Engineering Expectations (Architecture, + ETL, Scripting)' + startOffset: 3243 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3243 + endOffset: 3463 +- name: 'Career Strategy: Prioritization, Learning Opportunities, Underpromise & Overdeliver' + startOffset: 3463 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3463 + endOffset: 3610 +- name: Episode Closing & Key Takeaways + startOffset: 3610 + url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3610 + endOffset: 3635 transcript: - header: Podcast Introduction & Event Info - line: Hi everyone, welcome to our event. This event is brought to you by Data Docs @@ -1135,114 +1231,13 @@ transcript: sec: 3635 time: '1:00:35' who: Abouzar -description: Discover crowd modeling, queue prediction and real-time recommendations - to optimize visitor flow, reduce wait times and boost engagement with smart routing. -intro: 'How can theme parks use data to cut wait times and guide visitors in real - time? In this episode, Abouzar Abbaspour — an EngD-trained machine learning and data - engineer whose career spans telecom, e-commerce (bol.com), theme parks (Efteling) - and automotive (Tesla) — walks through building systems that optimize visitor flow - using crowd modeling, queue prediction and real-time recommendations.

We - cover the core problems of modeling crowd dynamics and ride capacity, designing - a next-best-action visitor routing engine, and using behavioral route modeling and - probabilistic recommendations to nudge guests. Abouzar explains practical trade-offs: - incentivizing app adoption to collect data, validating recommendations with employee - swiping experiments and A/B tests, and running streaming pipelines for live experiments - and rollout (engagement metrics and accuracy measurement). He also touches on deployment - concerns — from on-prem inference hardware to integrating LLMs and scalable pipelines - — and how these engineering choices affect measurement and user experience.

- Listen to learn concrete approaches for queue prediction, visitor routing, real-time - processing, and experimentation so you can design and validate systems that improve - throughput and guest satisfaction.' -dateadded: '2025-10-21' -duration: PT01H35S -quotableClips: -- name: Podcast Introduction & Event Info - startOffset: 0 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=0 - endOffset: 77 -- name: 'Early Career: Software Engineering to Data Science' - startOffset: 77 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=77 - endOffset: 126 -- name: 'Academic Path: Professional Doctorate & TU Berlin' - startOffset: 126 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=126 - endOffset: 288 -- name: 'Research Partnerships: Industry Projects and Applied Research' - startOffset: 288 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=288 - endOffset: 377 -- name: 'Efteling Insights: Theme Park Tech and Experience Design' - startOffset: 377 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=377 - endOffset: 456 -- name: 'Crowd Modeling: Queue Prediction and Ride Capacity' - startOffset: 456 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=456 - endOffset: 779 -- name: 'Visitor Routing: Next-Best-Action Recommendation System' - startOffset: 779 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=779 - endOffset: 890 -- name: App Adoption & Incentives for Data Collection - startOffset: 890 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=890 - endOffset: 1000 -- name: Behavioral Route Modeling & Probabilistic Recommendations - startOffset: 1000 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1000 - endOffset: 1109 -- name: 'E-commerce Recs: Bol.com Favorite-Brand Carousel' - startOffset: 1109 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1109 - endOffset: 1443 -- name: 'Recommendation Validation: Employee Swiping Experiment & A/B Testing' - startOffset: 1443 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1443 - endOffset: 1561 -- name: 'Real-time Processing: Streaming for Live Experiments' - startOffset: 1561 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1561 - endOffset: 1879 -- name: 'Measurement & Rollout: Engagement Metrics and Accuracy Results' - startOffset: 1879 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=1879 - endOffset: 2001 -- name: 'Role at Tesla: Data Engineering vs. ML Engineering Responsibilities' - startOffset: 2001 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2001 - endOffset: 2061 -- name: 'Full-Stack Data Work: Building Apps, Instrumentation, and Deployment' - startOffset: 2061 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2061 - endOffset: 2503 -- name: 'LLMs & AI-Assisted Development: Productivity Gains and Risks' - startOffset: 2503 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2503 - endOffset: 2766 -- name: 'On-Prem Inference Hardware: Raspberry Pi, Jetson Orin, Mac Mini' - startOffset: 2766 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2766 - endOffset: 2993 -- name: 'Models & Platforms: LLaMA, Code Models, and Replicate' - startOffset: 2993 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=2993 - endOffset: 3243 -- name: 'Interview Preparation: Tesla Data Engineering Expectations (Architecture, - ETL, Scripting)' - startOffset: 3243 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3243 - endOffset: 3463 -- name: 'Career Strategy: Prioritization, Learning Opportunities, Underpromise & Overdeliver' - startOffset: 3463 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3463 - endOffset: 3610 -- name: Episode Closing & Key Takeaways - startOffset: 3610 - url: https://www.youtube.com/watch?v=gXvVMvhfrIY&t=3610 - endOffset: 3635 +context: 'The episode’s central idea is the pragmatic, end-to-end translation of data + science and ML research into real-world product impact: owning the full stack from + instrumentation and data collection through real-time streaming inference, experimentation, + and rollout, while making practical engineering trade-offs (hardware, platforms, + team roles) and modeling human behavior and incentives to drive measurable engagement + and business outcomes.' --- - Links: * [Linkedin](https://www.linkedin.com/in/abouzar-abbaspour/){:target="_blank"} diff --git a/_podcast/s20e03-trends-in-data-engineering.md b/_podcast/trends-in-modern-data-engineering.md similarity index 89% rename from _podcast/s20e03-trends-in-data-engineering.md rename to _podcast/trends-in-modern-data-engineering.md index 91eb8204..fae50732 100644 --- a/_podcast/s20e03-trends-in-data-engineering.md +++ b/_podcast/trends-in-modern-data-engineering.md @@ -1,19 +1,139 @@ --- +title: "Modern Data Engineering: Iceberg, Delta Lake & AI-Powered Pipelines" +short: "Trends in Data Engineering" +season: 20 episode: 3 guests: - adrianbrudaru +image: images/podcast/trends-in-modern-data-engineering.jpg ids: - anchor: atalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae + anchor: datatalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae youtube: AlCFKbFIEM8 -image: images/podcast/s20e03-trends-in-data-engineering.jpg links: anchor: https://creators.spotify.com/pod/show/datatalksclub/episodes/Trends-in-Data-Engineering--Adrian-Brudaru-e2ui9ae apple: https://podcasts.apple.com/us/podcast/trends-in-data-engineering-adrian-brudaru/id1541710331?i=1000698294801 spotify: https://open.spotify.com/episode/35QbCW6Evqk1EPMKUDGGdv youtube: https://www.youtube.com/watch?v=AlCFKbFIEM8 -season: 20 -short: Trends in Data Engineering -title: 'Future-Proof Data Engineering: Adopt Apache Iceberg, DuckDB & AI-Powered Pipelines' +description: "Master Iceberg, Delta Lake and AI-powered pipelines to build scalable, governed data lakehouses—optimize ETL, boost real-time analytics and ML performance." +topics: +- data engineering +- data governance +- AI +- open-source +intro: "How can engineering teams build reliable, scalable lakehouse pipelines that combine transactional table formats with AI-driven automation? In this episode Adrian Brudaru—an economics-trained analyst turned freelance data practitioner and co-founder of a data company focused on open source tooling—joins us to explore the realities of modern data engineering.

Adrian draws on years of startup and freelance experience and a current mission to democratise data engineering through open source to discuss the practical trade-offs between Iceberg and Delta Lake, how table formats fit into a data lakehouse architecture, and where AI can augment pipeline development and observability. Key topics include selecting the right table format for versioning and governance, integrating AI-powered features into ETL/ELT workflows, and the role of open source tools in scaling data platforms.

Listen to gain grounded perspectives on Iceberg, Delta Lake, AI-powered pipelines, and data pipeline best practices—especially useful for data engineers, architects, and engineering managers evaluating lakehouse strategies or looking to adopt open source solutions." +dateadded: 2025-03-14 +duration: PT01H02M16S +quotableClips: +- name: Episode opening & guest introduction + startOffset: 1 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1 + endOffset: 143 +- name: Perspective on evolving data engineering challenges + startOffset: 143 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=143 + endOffset: 190 +- name: 'Career journey: startups, freelancing, founding DLT' + startOffset: 190 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=190 + endOffset: 243 +- name: DLT as a Python-based ingestion standard and market impact + startOffset: 243 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=243 + endOffset: 465 +- name: DLT Plus vision and partnership outreach for freelancers + startOffset: 465 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=465 + endOffset: 663 +- name: 'Industry shift toward specialization: governance, data quality, streaming' + startOffset: 663 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=663 + endOffset: 757 +- name: 'Early-career opportunities: AI projects and startup hiring' + startOffset: 757 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=757 + endOffset: 872 +- name: Modern data stack critique and open-source "postmodern" alternatives + startOffset: 872 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=872 + endOffset: 1000 +- name: '2025 trends: AI integration in data engineering and Apache Iceberg adoption' + startOffset: 1000 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1000 + endOffset: 1097 +- name: 'Apache Iceberg explained: table format, Parquet storage, vendor lock-in reduction' + startOffset: 1097 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1097 + endOffset: 1287 +- name: 'Database layers and catalog role: storage, compute, access, metadata & lineage' + startOffset: 1287 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1287 + endOffset: 1421 +- name: Metadata and catalog tooling overview (AWS Glue and peers) + startOffset: 1421 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1421 + endOffset: 1558 +- name: 'DuckDB impact: embeddable local OLAP and portable query engine' + startOffset: 1558 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1558 + endOffset: 1660 +- name: 'Cost-efficient pipelines: DuckDB with GitHub Actions and headless table formats' + startOffset: 1660 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1660 + endOffset: 1831 +- name: Headless table formats and DLT support for Delta Lake and Iceberg + startOffset: 1831 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1831 + endOffset: 1889 +- name: dbt's influence on engineering workflows and alternatives like SQLMesh + startOffset: 1889 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1889 + endOffset: 2137 +- name: 'Workflow orchestration options in 2025: Airflow, Prefect, Dagster, GitHub + Actions' + startOffset: 2137 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2137 + endOffset: 2282 +- name: 'AI engineering convergence: data engineers building AI agents' + startOffset: 2282 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2282 + endOffset: 2466 +- name: 'Beginner roadmap: SQL, Python, capturing business requirements, building + a portfolio' + startOffset: 2466 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2466 + endOffset: 2682 +- name: Tool selection guidance and vendor caution for modern data stacks + startOffset: 2682 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2682 + endOffset: 2756 +- name: 'Transition paths: senior backend engineers moving into data engineering' + startOffset: 2756 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2756 + endOffset: 2884 +- name: 'Job market outlook: senior vs junior data engineering opportunities' + startOffset: 2884 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2884 + endOffset: 2982 +- name: 'Table format comparisons: Delta, Hudi, and Iceberg differences' + startOffset: 2982 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2982 + endOffset: 3079 +- name: 'Streaming architectures and tools: micro-batching, Kafka, SQS, Flink' + startOffset: 3079 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3079 + endOffset: 3375 +- name: AI-driven commoditization and code generation in data engineering + startOffset: 3375 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3375 + endOffset: 3582 +- name: 'DLT roadmap: DLT Plus and a marketplace for reusable data products' + startOffset: 3582 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3582 + endOffset: 3679 +- name: Episode wrap-up and key takeaways + startOffset: 3679 + url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3679 + endOffset: 3736 transcript: - header: Episode opening & guest introduction - line: This week, we’ll talk about trends in data engineering. Our special guest @@ -517,139 +637,19 @@ transcript: sec: 3737 time: '1:02:17' who: Alexey -description: Discover Apache Iceberg, DuckDB & AI-powered pipelines - learn cost-efficient - table formats, orchestration tactics and a career roadmap for data engineers. -intro: How do you future‑proof data engineering against vendor lock‑in, rising AI - demand, and exploding metadata complexity? In this episode, Adrian Brudaru — a former - business analyst turned freelancer and co‑founder of DLT — walks through practical - choices for building resilient, cost‑efficient pipelines. Adrian explains DLT as - a Python‑based ingestion standard, the DLT Plus vision and marketplace for reusable - data products, and why the industry is shifting toward specialization in governance, - data quality, and streaming.

Key topics include adopting Apache Iceberg - as a table format (Parquet storage and reduced vendor lock‑in), the role of data - catalogs and metadata tooling (AWS Glue and peers), and how DuckDB enables embeddable - local OLAP and portable query execution. We cover cost‑efficient patterns—DuckDB - with GitHub Actions and headless table formats—dbt’s influence and alternatives - like SQLMesh, orchestration choices (Airflow, Prefect, Dagster, GitHub Actions), - and the 2025 trend of AI integration in pipelines and AI agents. Listeners will - get actionable guidance on tool selection, beginner and transition roadmaps, and - when Iceberg, DuckDB, or AI‑powered approaches make sense for their data engineering - stack. -dateadded: '2025-03-14' -duration: PT01H02M16S -quotableClips: -- name: Episode opening & guest introduction - startOffset: 1 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1 - endOffset: 143 -- name: Perspective on evolving data engineering challenges - startOffset: 143 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=143 - endOffset: 190 -- name: 'Career journey: startups, freelancing, founding DLT' - startOffset: 190 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=190 - endOffset: 243 -- name: DLT as a Python-based ingestion standard and market impact - startOffset: 243 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=243 - endOffset: 465 -- name: DLT Plus vision and partnership outreach for freelancers - startOffset: 465 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=465 - endOffset: 663 -- name: 'Industry shift toward specialization: governance, data quality, streaming' - startOffset: 663 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=663 - endOffset: 757 -- name: 'Early-career opportunities: AI projects and startup hiring' - startOffset: 757 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=757 - endOffset: 872 -- name: Modern data stack critique and open-source "postmodern" alternatives - startOffset: 872 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=872 - endOffset: 1000 -- name: '2025 trends: AI integration in data engineering and Apache Iceberg adoption' - startOffset: 1000 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1000 - endOffset: 1097 -- name: 'Apache Iceberg explained: table format, Parquet storage, vendor lock-in reduction' - startOffset: 1097 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1097 - endOffset: 1287 -- name: 'Database layers and catalog role: storage, compute, access, metadata & lineage' - startOffset: 1287 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1287 - endOffset: 1421 -- name: Metadata and catalog tooling overview (AWS Glue and peers) - startOffset: 1421 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1421 - endOffset: 1558 -- name: 'DuckDB impact: embeddable local OLAP and portable query engine' - startOffset: 1558 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1558 - endOffset: 1660 -- name: 'Cost-efficient pipelines: DuckDB with GitHub Actions and headless table formats' - startOffset: 1660 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1660 - endOffset: 1831 -- name: Headless table formats and DLT support for Delta Lake and Iceberg - startOffset: 1831 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1831 - endOffset: 1889 -- name: dbt's influence on engineering workflows and alternatives like SQLMesh - startOffset: 1889 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=1889 - endOffset: 2137 -- name: 'Workflow orchestration options in 2025: Airflow, Prefect, Dagster, GitHub - Actions' - startOffset: 2137 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2137 - endOffset: 2282 -- name: 'AI engineering convergence: data engineers building AI agents' - startOffset: 2282 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2282 - endOffset: 2466 -- name: 'Beginner roadmap: SQL, Python, capturing business requirements, building - a portfolio' - startOffset: 2466 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2466 - endOffset: 2682 -- name: Tool selection guidance and vendor caution for modern data stacks - startOffset: 2682 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2682 - endOffset: 2756 -- name: 'Transition paths: senior backend engineers moving into data engineering' - startOffset: 2756 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2756 - endOffset: 2884 -- name: 'Job market outlook: senior vs junior data engineering opportunities' - startOffset: 2884 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2884 - endOffset: 2982 -- name: 'Table format comparisons: Delta, Hudi, and Iceberg differences' - startOffset: 2982 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=2982 - endOffset: 3079 -- name: 'Streaming architectures and tools: micro-batching, Kafka, SQS, Flink' - startOffset: 3079 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3079 - endOffset: 3375 -- name: AI-driven commoditization and code generation in data engineering - startOffset: 3375 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3375 - endOffset: 3582 -- name: 'DLT roadmap: DLT Plus and a marketplace for reusable data products' - startOffset: 3582 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3582 - endOffset: 3679 -- name: Episode wrap-up and key takeaways - startOffset: 3679 - url: https://www.youtube.com/watch?v=AlCFKbFIEM8&t=3679 - endOffset: 3736 ---- +context: 'Context — This episode traces the practical and technological shifts shaping + data engineering today: the rise of open-source, standards-driven building blocks + (table formats like Iceberg/Delta, catalogs, DuckDB), new orchestration and workflow + patterns, AI-driven tooling, specialization in governance/quality/streaming, and + emerging marketplaces and platforms (like DLT Plus) that package reusable data products. + Core theme — Data engineering is transitioning from monolithic, vendor-locked stacks + to a composable, metadata-first ecosystem: teams win by adopting open, portable + standards (headless table formats, catalogs, embeddable query engines), designing + interoperable, cost-efficient pipelines, and packaging reusable data products—while + practitioners pivot toward specialization and metadata-aware, SQL/Python-first skills + to build and govern interoperable, AI-enabled data workflows.' +--- Links: * [Linkedin](https://www.linkedin.com/in/data-team/){:target="_blank"} diff --git a/_podcast/s19e01-using-data-to-create-liveable-cities.md b/_podcast/urban-data-science.md similarity index 94% rename from _podcast/s19e01-using-data-to-create-liveable-cities.md rename to _podcast/urban-data-science.md index b1f0ddef..98b31095 100644 --- a/_podcast/s19e01-using-data-to-create-liveable-cities.md +++ b/_podcast/urban-data-science.md @@ -1,20 +1,149 @@ --- +title: "Urban Data Science: Transport Analytics, Sensors and Liveable Cities" +short: "Using Data to Create Liveable Cities" +season: 19 episode: 1 guests: - rachellim +image: images/podcast/urban-data-science.jpg ids: - anchor: atatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup + anchor: datatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup youtube: VXQIGHUWeL0 -image: images/podcast/s19e01-using-data-to-create-liveable-cities.jpg links: anchor: https://podcasters.spotify.com/pod/show/datatalksclub/episodes/Using-Data-to-Create-Liveable-Cities---Rachel-Lim-e2qecup apple: https://podcasts.apple.com/us/podcast/using-data-to-create-liveable-cities-rachel-lim/id1541710331?i=1000675373908 spotify: https://open.spotify.com/episode/1z7jdogto8i4Zk6Zh1vDxE?si=KCg2Iq1US0SKwFCKasGqUg youtube: https://www.youtube.com/watch?v=VXQIGHUWeL0 -season: 19 -short: Using Data to Create Liveable Cities -title: How Urban Transport Data & AI Enable Real-Time Monitoring, Demand Forecasting - & Data Pipelines +description: "Discover urban data science, transport analytics & sensors for livable cities - real-time monitoring, fare-card insights, data pipelines, AI tools." +topics: +- data engineering +- LLMs +- computer vision +- data science +intro: "How can cities use transport analytics, sensors and AI to become more liveable? In this episode Rachel Lim, an urban data scientist with a geography background and a master's in urban data science, walks through practical ways data informs transport planning and placemaking. We cover core data sources—GPS, sensors, fare card systems, ride-hailing logs and computer vision for passenger flow—plus travel demand forecasting, real-time monitoring (including event analytics like F1), and operational responses such as traffic marshals and recovery services.

Rachel explains data engineering realities—Kafka, Apache Spark, real-time APIs, data pipelines and warehousing—alongside journey logic, fare computation and data quality management. She also explores emerging tools: generative AI for natural-language access, text-to-SQL architectures, synthetic data, and privacy practices for publishing masked datasets. The conversation highlights Singapore's planning context, open data portals (data.gov.sg, DataMall), and project ideas for learners using parking and taxi datasets.

Listen to learn which transport analytics and sensor strategies produce actionable insights, how to set up robust data pipelines, and where to start hands-on projects to build liveable cities." +dateadded: 2024-11-06 +duration: PT00H51M32S +quotableClips: +- name: Episode Introduction + startOffset: 0 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=0 + endOffset: 116 +- name: 'Guest Introduction: Rachel Lim, urban data scientist' + startOffset: 116 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=116 + endOffset: 172 +- name: 'Career Path: Geography to urban informatics and data engineering' + startOffset: 172 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=172 + endOffset: 287 +- name: 'Transport Scientist Role: public sector and consultancy applications' + startOffset: 287 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=287 + endOffset: 334 +- name: 'Planning Horizons: short-term operations vs long-term infrastructure' + startOffset: 334 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=334 + endOffset: 407 +- name: 'Data Sources for Transport: GPS, sensors, fare cards, ride-hailing' + startOffset: 407 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=407 + endOffset: 460 +- name: 'Fare Card Systems: tap-in/tap-out travel data mechanics' + startOffset: 460 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=460 + endOffset: 500 +- name: Computer Vision for Passenger Flow where fare data is absent + startOffset: 500 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=500 + endOffset: 535 +- name: 'Professional Motivation: internships, World City Summit, master’s study' + startOffset: 535 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=535 + endOffset: 686 +- name: 'Urban Design Principles: walkability, public spaces, human-scale streets' + startOffset: 686 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=686 + endOffset: 829 +- name: 'Livability Criteria: transport, housing, green space, digital access' + startOffset: 829 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=829 + endOffset: 948 +- name: 'Singapore Planning Practices: Master Plan and placemaking initiatives' + startOffset: 948 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=948 + endOffset: 1104 +- name: 'Open Data & Collaboration: public datasets enabling research and apps' + startOffset: 1104 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1104 + endOffset: 1269 +- name: 'Travel Demand Forecasting: predicting movements for infrastructure planning' + startOffset: 1269 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1269 + endOffset: 1381 +- name: 'Data Pipelines & Warehousing: aggregation of real-time and historical data' + startOffset: 1381 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1381 + endOffset: 1449 +- name: 'Real-Time Monitoring: traffic management and event analytics (F1 example)' + startOffset: 1449 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1449 + endOffset: 1510 +- name: 'Operational Response: cameras, recovery services, traffic marshals' + startOffset: 1510 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1510 + endOffset: 1679 +- name: 'Generative AI in Data Engineering: natural-language access to data' + startOffset: 1679 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1679 + endOffset: 1999 +- name: 'Text-to-SQL Architecture: metadata, vector DB, RAG and LLMs' + startOffset: 1999 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1999 + endOffset: 2118 +- name: 'Prompt Engineering & Query Safety: reliability and SQL restrictions' + startOffset: 2118 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2118 + endOffset: 2192 +- name: 'Dataset Scale: millions of fare card records and demand analytics' + startOffset: 2192 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2192 + endOffset: 2314 +- name: 'Infrastructure Stack: Kafka, Apache Spark, sensors, real-time APIs' + startOffset: 2314 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2314 + endOffset: 2367 +- name: 'Journey Logic & Aggregation: trip definition and fare computation' + startOffset: 2367 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2367 + endOffset: 2468 +- name: 'Data Quality Management: anomaly detection and sensor reliability' + startOffset: 2468 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2468 + endOffset: 2537 +- name: 'Generative AI Use Cases: synthetic data and conversational search' + startOffset: 2537 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2537 + endOffset: 2740 +- name: 'Privacy & Publishing: masking sensitive data before release' + startOffset: 2740 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2740 + endOffset: 2760 +- name: 'Singapore Open Data Portals: data.gov.sg and DataMall resources' + startOffset: 2760 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2760 + endOffset: 2825 +- name: 'Project Ideas for Learning: car parking and real-time taxi datasets' + startOffset: 2825 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2825 + endOffset: 2956 +- name: 'Recommended Resources: DataTalks.Club, Jane Jacobs, Happy City' + startOffset: 2956 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2956 + endOffset: 3162 +- name: Episode Wrap-Up and Closing Remarks + startOffset: 3162 + url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=3162 + endOffset: 3092 transcript: - header: Episode Introduction - header: 'Guest Introduction: Rachel Lim, urban data scientist' @@ -786,150 +915,18 @@ transcript: sec: 3208 time: '53:28' who: Alexey -description: Discover urban transport data, real-time monitoring and demand forecasting - techniques—learn data pipelines, text-to-SQL and AI tools to improve planning & - ops. -intro: How can urban transport data and AI be combined to enable real-time monitoring, - accurate demand forecasting, and reliable data pipelines for city operations and - planning? In this episode, Rachel Lim, an urban data scientist with a geography background - and a master’s in urban data science, walks through practical approaches that bridge - urban design and data engineering. We cover the core data sources—GPS, sensors, - fare card systems, ride‑hailing feeds—and where computer vision complements missing - passenger-flow data. Rachel explains planning horizons from short‑term operational - response (traffic marshals, cameras, event analytics like F1) to long‑term infrastructure - planning driven by travel demand forecasting. She breaks down data pipelines and - warehousing, real‑time stacks (Kafka, Apache Spark, sensors, APIs), journey logic, - anomaly detection, and privacy-preserving publishing to open portals like data.gov.sg - and DataMall. The episode also explores generative AI and Text‑to‑SQL workflows, - prompt safety, synthetic data, and conversational search for natural‑language access - to datasets. Listeners will gain actionable insight into building and governing - transport data systems that support real‑time monitoring, demand forecasting, and - scalable analytics. -dateadded: '2024-11-06' -duration: PT00H51M32S -quotableClips: -- name: Episode Introduction - startOffset: 0 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=0 - endOffset: 116 -- name: 'Guest Introduction: Rachel Lim, urban data scientist' - startOffset: 116 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=116 - endOffset: 172 -- name: 'Career Path: Geography to urban informatics and data engineering' - startOffset: 172 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=172 - endOffset: 287 -- name: 'Transport Scientist Role: public sector and consultancy applications' - startOffset: 287 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=287 - endOffset: 334 -- name: 'Planning Horizons: short-term operations vs long-term infrastructure' - startOffset: 334 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=334 - endOffset: 407 -- name: 'Data Sources for Transport: GPS, sensors, fare cards, ride-hailing' - startOffset: 407 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=407 - endOffset: 460 -- name: 'Fare Card Systems: tap-in/tap-out travel data mechanics' - startOffset: 460 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=460 - endOffset: 500 -- name: Computer Vision for Passenger Flow where fare data is absent - startOffset: 500 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=500 - endOffset: 535 -- name: 'Professional Motivation: internships, World City Summit, master’s study' - startOffset: 535 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=535 - endOffset: 686 -- name: 'Urban Design Principles: walkability, public spaces, human-scale streets' - startOffset: 686 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=686 - endOffset: 829 -- name: 'Livability Criteria: transport, housing, green space, digital access' - startOffset: 829 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=829 - endOffset: 948 -- name: 'Singapore Planning Practices: Master Plan and placemaking initiatives' - startOffset: 948 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=948 - endOffset: 1104 -- name: 'Open Data & Collaboration: public datasets enabling research and apps' - startOffset: 1104 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1104 - endOffset: 1269 -- name: 'Travel Demand Forecasting: predicting movements for infrastructure planning' - startOffset: 1269 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1269 - endOffset: 1381 -- name: 'Data Pipelines & Warehousing: aggregation of real-time and historical data' - startOffset: 1381 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1381 - endOffset: 1449 -- name: 'Real-Time Monitoring: traffic management and event analytics (F1 example)' - startOffset: 1449 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1449 - endOffset: 1510 -- name: 'Operational Response: cameras, recovery services, traffic marshals' - startOffset: 1510 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1510 - endOffset: 1679 -- name: 'Generative AI in Data Engineering: natural-language access to data' - startOffset: 1679 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1679 - endOffset: 1999 -- name: 'Text-to-SQL Architecture: metadata, vector DB, RAG and LLMs' - startOffset: 1999 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=1999 - endOffset: 2118 -- name: 'Prompt Engineering & Query Safety: reliability and SQL restrictions' - startOffset: 2118 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2118 - endOffset: 2192 -- name: 'Dataset Scale: millions of fare card records and demand analytics' - startOffset: 2192 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2192 - endOffset: 2314 -- name: 'Infrastructure Stack: Kafka, Apache Spark, sensors, real-time APIs' - startOffset: 2314 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2314 - endOffset: 2367 -- name: 'Journey Logic & Aggregation: trip definition and fare computation' - startOffset: 2367 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2367 - endOffset: 2468 -- name: 'Data Quality Management: anomaly detection and sensor reliability' - startOffset: 2468 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2468 - endOffset: 2537 -- name: 'Generative AI Use Cases: synthetic data and conversational search' - startOffset: 2537 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2537 - endOffset: 2740 -- name: 'Privacy & Publishing: masking sensitive data before release' - startOffset: 2740 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2740 - endOffset: 2760 -- name: 'Singapore Open Data Portals: data.gov.sg and DataMall resources' - startOffset: 2760 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2760 - endOffset: 2825 -- name: 'Project Ideas for Learning: car parking and real-time taxi datasets' - startOffset: 2825 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2825 - endOffset: 2956 -- name: 'Recommended Resources: DataTalks.Club, Jane Jacobs, Happy City' - startOffset: 2956 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=2956 - endOffset: 3162 -- name: Episode Wrap-Up and Closing Remarks - startOffset: 3162 - url: https://www.youtube.com/watch?v=VXQIGHUWeL0&t=3162 - endOffset: 3092 ---- +context: 'Context: Cities are complex systems where transport, land use, public space + and technology intersect; practitioners juggle short-term operations and long-term + planning using streams of sensor, fare, and observational data while balancing livability, + equity and privacy. + Core narrative: A human-centered, data-driven approach to urban mobility—building + robust, privacy-aware data pipelines that integrate real-time sensors, fare systems + and observational sources, and leveraging predictive models and generative-AI interfaces—enables + actionable insights for both operational responsiveness and strategic planning, + ultimately shaping walkable, equitable, and livable cities through transparent open + data, rigorous data quality, and interdisciplinary collaboration.' +--- Links: * [Dynamic Datasets](https://datamall.lta.gov.sg/content/datamall/en/dynamic-data.html){:target="_blank"} \ No newline at end of file diff --git a/_podcast/s08e01-visualising-machine-learning.md b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md similarity index 95% rename from _podcast/s08e01-visualising-machine-learning.md rename to _podcast/visualizing-machine-learning-concepts-to-explain-ml.md index 5136fb79..84f10470 100644 --- a/_podcast/s08e01-visualising-machine-learning.md +++ b/_podcast/visualizing-machine-learning-concepts-to-explain-ml.md @@ -1,40 +1,124 @@ --- +title: "Using Visualizations to Explain Machine Learning: Build Intuition with kDimensions, Figma & Templates" +short: "Using Visualizations to Explain Machine Learning" +season: 8 episode: 1 guests: - meoramer -intro: 'How do you teach machine learning so people build intuition before diving - into math? In this episode, Meor Amer—educator, author, and Developer Relations at - Cohere—walks through a visual-first approach to machine learning that makes concepts - accessible and actionable. Drawing on his journey from bioengineering and telecom - analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, - Meor explains why visual machine learning and dimensionality reduction matter and - how templates can scale understanding.

We cover practical workflows: generating - ideas (visualize the verb, use metaphors like the catapult and airplane), design - constraints that spark creativity, and a sketchbook → Figma pipeline for engineers - that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn - visuals, how to map ML problems (classification, regression, clustering, anomaly, - RL) to templates, and hands‑on learning techniques—consume with intent, break and - modify code. He also discusses monetizing visual design services and turning articles - into key visuals using 4–5 keywords.

Listen to learn concrete techniques - for ML visualization, Figma for engineers, and creating reusable templates that - build intuition and make machine learning teachable.' -topics: -- machine learning -- education +image: images/podcast/visualizing-machine-learning-concepts-to-explain-ml.jpg ids: anchor: Visualising-Machine-Learning---Meor-Amer-e1g7iri youtube: OuCuk-7RHjM -image: images/podcast/s08e01-visualising-machine-learning.jpg links: anchor: https://anchor.fm/datatalksclub/episodes/Visualising-Machine-Learning---Meor-Amer-e1g7iri apple: https://podcasts.apple.com/us/podcast/visualising-machine-learning-meor-amer/id1541710331?i=1000555246590 spotify: https://open.spotify.com/episode/032NhEphm5QDdDFDUIypOL youtube: https://www.youtube.com/watch?v=OuCuk-7RHjM -season: 8 -short: Visualising Machine Learning -title: 'Visualize Machine Learning: Build Intuition with kDimensions, Figma & Templates' -description: Discover kDimensions and Figma templates to visualize machine learning, - build intuition before the math, map ML problems, and create shareable visuals. + +description: "Discover kDimensions and Figma templates to visualize machine learning, build intuition before the math, map ML problems, and create shareable visuals" +intro: "How do you teach machine learning so people build intuition before diving into math? In this episode, Meor Amer—educator, author, and Developer Relations at Cohere—walks through a visual-first approach to machine learning that makes concepts accessible and actionable. Drawing on his journey from bioengineering and telecom analytics to founding kDimensions and writing A Visual Introduction to Deep Learning, Meor explains why visual machine learning and dimensionality reduction matter and how templates can scale understanding.

We cover practical workflows: generating ideas (visualize the verb, use metaphors like the catapult and airplane), design constraints that spark creativity, and a sketchbook → Figma pipeline for engineers that emphasizes message over aesthetics. Meor shares posting cadence for LinkedIn visuals, how to map ML problems (classification, regression, clustering, anomaly, RL) to templates, and hands-on learning techniques—consume with intent, break and modify code. He also discusses monetizing visual design services and turning articles into key visuals using 4–5 keywords.

Listen to learn concrete techniques for ML visualization, Figma for engineers, and creating reusable templates that build intuition and make machine learning teachable." +topics: +- machine learning +- education +dateadded: 2022-03-26 + +duration: PT00H59M45S + +quotableClips: +- name: Episode Introduction & Visual ML Overview + startOffset: 0 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=0 + endOffset: 116 +- name: Posting Cadence & Visuals on LinkedIn + startOffset: 116 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=116 + endOffset: 177 +- name: 'Career Journey: Bioengineering → Telecom Analytics → Self-employment' + startOffset: 177 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=177 + endOffset: 375 +- name: 'kDimensions: Name & Visual Dimensionality Reduction' + startOffset: 375 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=375 + endOffset: 532 +- name: Jack Butcher Influence & Visual Engineering Principles + startOffset: 532 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=532 + endOffset: 700 +- name: 'Purpose of Visuals: Build Intuition Before Math' + startOffset: 700 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=700 + endOffset: 852 +- name: 'Design Constraints: Creativity Through Color & Shape Limits' + startOffset: 852 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=852 + endOffset: 1053 +- name: 'Idea Generation: Visualize the Verb & Use Metaphors' + startOffset: 1053 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1053 + endOffset: 1286 +- name: Drift Visualized (Catapult Metaphor) & Data-centric AI Airplane Analogy + startOffset: 1286 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1286 + endOffset: 1447 +- name: 'Creative Process: Longlist → Shortlist → Brainstorming' + startOffset: 1447 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1447 + endOffset: 1826 +- name: 'Capturing Ideas: Sketchbook, Notes & Quick Logging' + startOffset: 1826 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1826 + endOffset: 1874 +- name: 'Tools: Figma for Engineers & Geometric Shape Workflow' + startOffset: 1874 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1874 + endOffset: 2011 +- name: 'From Sketch to Figma: Drafting, Asset Reuse & Iteration' + startOffset: 2011 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2011 + endOffset: 2132 +- name: 'Design Advice: Prioritize Message Over Aesthetics; Start Posting' + startOffset: 2132 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2132 + endOffset: 2450 +- name: 'Learning Technique: Consume with Intent to Teach — "What If?" Questions' + startOffset: 2450 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2450 + endOffset: 2617 +- name: 'Hands-on Learning: Break and Modify Code to Understand ML' + startOffset: 2617 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2617 + endOffset: 2687 +- name: 'Monetization: Visual Design Services for Startups & Content Creators' + startOffset: 2687 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2687 + endOffset: 2940 +- name: 'Content Design: Turn Articles into Key Visuals (Extract 4–5 Keywords)' + startOffset: 2940 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2940 + endOffset: 3056 +- name: 'Visualization Techniques: Contrast, Balance & Slider Metaphors' + startOffset: 3056 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3056 + endOffset: 3246 +- name: 'Mapping ML Problems to Visual Templates: Classification, Regression, Anomaly, + Clustering, RL' + startOffset: 3246 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3246 + endOffset: 3361 +- name: 'Book Overview: Visual Introduction to Deep Learning (Neuron-by-Neuron)' + startOffset: 3361 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3361 + endOffset: 3536 +- name: 'Book Workflow: Visual-first Layout with Concise Text' + startOffset: 3536 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3536 + endOffset: 3612 +- name: 'Closing: kDimensions, Book Links & Contact Information' + startOffset: 3612 + url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3612 + endOffset: 3585 + transcript: - header: Episode Introduction & Visual ML Overview - header: Posting Cadence & Visuals on LinkedIn @@ -61,7 +145,7 @@ transcript: sec: 173 time: '2:53' who: Meor -- header: 'Career Journey: Bioengineering → Telecom Analytics → Self‑employment' +- header: 'Career Journey: Bioengineering → Telecom Analytics → Self-employment' - line: Before we go into our main topic of visualising machine learning, let's start with your background. Can you tell us about your career journey so far? sec: 177 @@ -311,7 +395,7 @@ transcript: sec: 1135 time: '18:55' who: Meor -- header: Drift Visualized (Catapult Metaphor) & Data‑centric AI Airplane Analogy +- header: Drift Visualized (Catapult Metaphor) & Data-centric AI Airplane Analogy - line: Okay, so first, you think, "I want to create something on drift” Then you give yourself a bit of time, use a timer, set it to 5-10 minutes. And then you start brainstorming, like “How can I show the action? What is the action there?” @@ -603,7 +687,7 @@ transcript: sec: 2544 time: '42:24' who: Meor -- header: 'Hands‑on Learning: Break and Modify Code to Understand ML' +- header: 'Hands-on Learning: Break and Modify Code to Understand ML' - line: How do you come up with this “What if?” and “What can go wrong?”? If you have practical experience, then you can use it. But if you're just learning this thing, how can you know about these things? @@ -783,7 +867,7 @@ transcript: sec: 3246 time: '54:06' who: Meor -- header: 'Book Overview: Visual Introduction to Deep Learning (Neuron‑by‑Neuron)' +- header: 'Book Overview: Visual Introduction to Deep Learning (Neuron-by-Neuron)' - line: I also know that you wrote a book. You recently came to DataTalks.Club’s slack to answer questions about your book. Can you tell us about it? And how did you come up with the idea behind the book? @@ -816,7 +900,7 @@ transcript: sec: 3430 time: '57:10' who: Meor -- header: 'Book Workflow: Visual‑first Layout with Concise Text' +- header: 'Book Workflow: Visual-first Layout with Concise Text' - line: Did you first come up with text and then created illustrations? Or did you first come up with illustrations and then wrote text for them? sec: 3536 @@ -870,102 +954,6 @@ transcript: sec: 3701 time: '1:01:41' who: Meor -dateadded: '2022-03-26' -duration: PT00H59M45S -quotableClips: -- name: Episode Introduction & Visual ML Overview - startOffset: 0 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=0 - endOffset: 116 -- name: Posting Cadence & Visuals on LinkedIn - startOffset: 116 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=116 - endOffset: 177 -- name: 'Career Journey: Bioengineering → Telecom Analytics → Self‑employment' - startOffset: 177 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=177 - endOffset: 375 -- name: 'kDimensions: Name & Visual Dimensionality Reduction' - startOffset: 375 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=375 - endOffset: 532 -- name: Jack Butcher Influence & Visual Engineering Principles - startOffset: 532 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=532 - endOffset: 700 -- name: 'Purpose of Visuals: Build Intuition Before Math' - startOffset: 700 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=700 - endOffset: 852 -- name: 'Design Constraints: Creativity Through Color & Shape Limits' - startOffset: 852 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=852 - endOffset: 1053 -- name: 'Idea Generation: Visualize the Verb & Use Metaphors' - startOffset: 1053 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1053 - endOffset: 1286 -- name: Drift Visualized (Catapult Metaphor) & Data‑centric AI Airplane Analogy - startOffset: 1286 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1286 - endOffset: 1447 -- name: 'Creative Process: Longlist → Shortlist → Brainstorming' - startOffset: 1447 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1447 - endOffset: 1826 -- name: 'Capturing Ideas: Sketchbook, Notes & Quick Logging' - startOffset: 1826 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1826 - endOffset: 1874 -- name: 'Tools: Figma for Engineers & Geometric Shape Workflow' - startOffset: 1874 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=1874 - endOffset: 2011 -- name: 'From Sketch to Figma: Drafting, Asset Reuse & Iteration' - startOffset: 2011 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2011 - endOffset: 2132 -- name: 'Design Advice: Prioritize Message Over Aesthetics; Start Posting' - startOffset: 2132 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2132 - endOffset: 2450 -- name: 'Learning Technique: Consume with Intent to Teach — "What If?" Questions' - startOffset: 2450 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2450 - endOffset: 2617 -- name: 'Hands‑on Learning: Break and Modify Code to Understand ML' - startOffset: 2617 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2617 - endOffset: 2687 -- name: 'Monetization: Visual Design Services for Startups & Content Creators' - startOffset: 2687 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2687 - endOffset: 2940 -- name: 'Content Design: Turn Articles into Key Visuals (Extract 4–5 Keywords)' - startOffset: 2940 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=2940 - endOffset: 3056 -- name: 'Visualization Techniques: Contrast, Balance & Slider Metaphors' - startOffset: 3056 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3056 - endOffset: 3246 -- name: 'Mapping ML Problems to Visual Templates: Classification, Regression, Anomaly, - Clustering, RL' - startOffset: 3246 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3246 - endOffset: 3361 -- name: 'Book Overview: Visual Introduction to Deep Learning (Neuron‑by‑Neuron)' - startOffset: 3361 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3361 - endOffset: 3536 -- name: 'Book Workflow: Visual‑first Layout with Concise Text' - startOffset: 3536 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3536 - endOffset: 3612 -- name: 'Closing: kDimensions, Book Links & Contact Information' - startOffset: 3612 - url: https://www.youtube.com/watch?v=OuCuk-7RHjM&t=3612 - endOffset: 3585 --- Links: diff --git a/_posts/2025-08-16-ultimate-list-of-20-free-online-courses-on-machine-learning.md b/_posts/2025-08-16-free-machine-learning-courses.md similarity index 100% rename from _posts/2025-08-16-ultimate-list-of-20-free-online-courses-on-machine-learning.md rename to _posts/2025-08-16-free-machine-learning-courses.md diff --git a/images/podcast/s07e06-ab-testing.jpg b/images/podcast/ab-testing-and-product-experimentation.jpg similarity index 100% rename from images/podcast/s07e06-ab-testing.jpg rename to images/podcast/ab-testing-and-product-experimentation.jpg diff --git a/images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg b/images/podcast/ai-in-healthcare-and-digital-therapeutics.jpg similarity index 100% rename from images/podcast/s08e04-machine-learning-and-personalization-in-healthcare.jpg rename to images/podcast/ai-in-healthcare-and-digital-therapeutics.jpg diff --git a/images/podcast/s20e01-trends-in-ai-infrastructure.jpg b/images/podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg similarity index 100% rename from images/podcast/s20e01-trends-in-ai-infrastructure.jpg rename to images/podcast/ai-infrastructure-hybrid-cloud-on-prem-distributed-training.jpg diff --git a/images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg b/images/podcast/ai-ml-product-design-and-experimentation.jpg similarity index 100% rename from images/podcast/s08e03-innovation-and-design-for-machine-learning.jpg rename to images/podcast/ai-ml-product-design-and-experimentation.jpg diff --git a/images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg b/images/podcast/algorithmic-trading-with-python-and-machine-learning.jpg similarity index 100% rename from images/podcast/s17e03-stock-market-analysis-with-python-and-machine-learning.jpg rename to images/podcast/algorithmic-trading-with-python-and-machine-learning.jpg diff --git a/images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg b/images/podcast/algorithms-data-structures-for-engineers.jpg similarity index 100% rename from images/podcast/s05e01-mastering-algorithms-and-data-structures.jpg rename to images/podcast/algorithms-data-structures-for-engineers.jpg diff --git a/images/podcast/s03e11-analytics-engineer.jpg b/images/podcast/analytics-engineer-skills-tools.jpg similarity index 100% rename from images/podcast/s03e11-analytics-engineer.jpg rename to images/podcast/analytics-engineer-skills-tools.jpg diff --git a/images/podcast/s03e02-from-analytics-to-data-science.jpg b/images/podcast/analytics-to-data-science-with-kaggle-portfolio.jpg similarity index 100% rename from images/podcast/s03e02-from-analytics-to-data-science.jpg rename to images/podcast/analytics-to-data-science-with-kaggle-portfolio.jpg diff --git a/images/podcast/s20e07-build-strong-career-in-data.jpg b/images/podcast/applied-llm-research-and-career-growth-in-practice.jpg similarity index 100% rename from images/podcast/s20e07-build-strong-career-in-data.jpg rename to images/podcast/applied-llm-research-and-career-growth-in-practice.jpg diff --git a/images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg b/images/podcast/bayesian-modeling-workflows-and-tools.jpg similarity index 100% rename from images/podcast/s17e04-bayesian-modeling-and-probabilistic-programming.jpg rename to images/podcast/bayesian-modeling-workflows-and-tools.jpg diff --git a/images/podcast/s16e09-become-data-freelancer.jpg b/images/podcast/becoming-data-freelancer.jpg similarity index 100% rename from images/podcast/s16e09-become-data-freelancer.jpg rename to images/podcast/becoming-data-freelancer.jpg diff --git a/images/podcast/s06e05-post-doctoral-research.jpg b/images/podcast/big-data-analytics-and-postdoc-research.jpg similarity index 100% rename from images/podcast/s06e05-post-doctoral-research.jpg rename to images/podcast/big-data-analytics-and-postdoc-research.jpg diff --git a/images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg b/images/podcast/big-data-engineer-vs-data-scientist.jpg similarity index 100% rename from images/podcast/s04e03-big-data-engineer-vs-data-scientist.jpg rename to images/podcast/big-data-engineer-vs-data-scientist.jpg diff --git a/images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg b/images/podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.jpg similarity index 100% rename from images/podcast/s13e03-biohacking-for-data-scientists-and-ml-engineers.jpg rename to images/podcast/biohacking-productivity-for-data-scientists-and-ml-engineers.jpg diff --git a/images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg b/images/podcast/bioinformatics-worflows-tools-and-data-science.jpg similarity index 100% rename from images/podcast/s22e03-from-biotechnology-to-bioinformatics-software.jpg rename to images/podcast/bioinformatics-worflows-tools-and-data-science.jpg diff --git a/images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg b/images/podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg similarity index 100% rename from images/podcast/s22e01-building-reliable-ai-products-in-era-of-gen-ai-and-agents.jpg rename to images/podcast/building-agentic-ai-engineering-tooling-retrieval-evaluation.jpg diff --git a/images/podcast/s16e08-ai-for-digital-health.jpg b/images/podcast/building-ai-digital-health-startups.jpg similarity index 100% rename from images/podcast/s16e08-ai-for-digital-health.jpg rename to images/podcast/building-ai-digital-health-startups.jpg diff --git a/images/podcast/s07e03-product-management-essentials.jpg b/images/podcast/building-and-scaling-ai-data-products-with-mlops.jpg similarity index 100% rename from images/podcast/s07e03-product-management-essentials.jpg rename to images/podcast/building-and-scaling-ai-data-products-with-mlops.jpg diff --git a/images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg b/images/podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.jpg similarity index 100% rename from images/podcast/s15e09-data-engineering-for-fraud-prevention.jpg rename to images/podcast/building-and-scaling-data-engineering-systems-for-fraud-detection.jpg diff --git a/images/podcast/s11e05-building-data-science-practice.jpg b/images/podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.jpg similarity index 100% rename from images/podcast/s11e05-building-data-science-practice.jpg rename to images/podcast/building-and-scaling-data-science-practice-industrial-ai-mlops.jpg diff --git a/images/podcast/s05e06-building-and-leading-data-teams.jpg b/images/podcast/building-and-scaling-data-team.jpg similarity index 100% rename from images/podcast/s05e06-building-and-leading-data-teams.jpg rename to images/podcast/building-and-scaling-data-team.jpg diff --git a/images/podcast/s11e06-product-owners-in-data-science.jpg b/images/podcast/building-data-products-product-owner-vs-product-manager.jpg similarity index 100% rename from images/podcast/s11e06-product-owners-in-data-science.jpg rename to images/podcast/building-data-products-product-owner-vs-product-manager.jpg diff --git a/images/podcast/s10e08-leading-data-research.jpg b/images/podcast/building-data-science-programs-and-democratizing-high-performance-computing.jpg similarity index 100% rename from images/podcast/s10e08-leading-data-research.jpg rename to images/podcast/building-data-science-programs-and-democratizing-high-performance-computing.jpg diff --git a/images/podcast/s01e03-building-ds-team.jpg b/images/podcast/building-data-team.jpg similarity index 100% rename from images/podcast/s01e03-building-ds-team.jpg rename to images/podcast/building-data-team.jpg diff --git a/images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg b/images/podcast/building-domestic-risk-assessment-tool.jpg similarity index 100% rename from images/podcast/s18e07-building-domestic-risk-assessment-tool.jpg rename to images/podcast/building-domestic-risk-assessment-tool.jpg diff --git a/images/podcast/s14e09-interpretable-ai-and-ml.jpg b/images/podcast/building-explainable-and-actionable-ai-ml-systems.jpg similarity index 100% rename from images/podcast/s14e09-interpretable-ai-and-ml.jpg rename to images/podcast/building-explainable-and-actionable-ai-ml-systems.jpg diff --git a/images/podcast/s16e02-bridging-data-science-and-healthcare.jpg b/images/podcast/building-healthcare-machine-learning-systems.jpg similarity index 100% rename from images/podcast/s16e02-bridging-data-science-and-healthcare.jpg rename to images/podcast/building-healthcare-machine-learning-systems.jpg diff --git a/images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg b/images/podcast/building-ml-communities-diversity-and-career-growth.jpg similarity index 100% rename from images/podcast/s13e01-accelerating-adoption-of-ai-through-diversity.jpg rename to images/podcast/building-ml-communities-diversity-and-career-growth.jpg diff --git a/images/podcast/s04e04-ml-startup.jpg b/images/podcast/building-mlops-startup.jpg similarity index 100% rename from images/podcast/s04e04-ml-startup.jpg rename to images/podcast/building-mlops-startup.jpg diff --git a/images/podcast/s11e04-large-scale-entity-resolution.jpg b/images/podcast/building-open-source-data-product-for-identity-resolution.jpg similarity index 100% rename from images/podcast/s11e04-large-scale-entity-resolution.jpg rename to images/podcast/building-open-source-data-product-for-identity-resolution.jpg diff --git a/images/podcast/s13e09-building-open-source-nlp-tool.jpg b/images/podcast/building-open-source-nlp-tool.jpg similarity index 100% rename from images/podcast/s13e09-building-open-source-nlp-tool.jpg rename to images/podcast/building-open-source-nlp-tool.jpg diff --git a/images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg b/images/podcast/building-production-ml-platform-and-mlops-team.jpg similarity index 100% rename from images/podcast/s14e08-from-scratch-to-success-building-mlops-team-and-ml-platform.jpg rename to images/podcast/building-production-ml-platform-and-mlops-team.jpg diff --git a/images/podcast/s17e09-building-production-search-systems.jpg b/images/podcast/building-production-search-systems.jpg similarity index 100% rename from images/podcast/s17e09-building-production-search-systems.jpg rename to images/podcast/building-production-search-systems.jpg diff --git a/images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg b/images/podcast/building-scalable-and-reliable-machine-learning-systems.jpg similarity index 100% rename from images/podcast/s14e01-building-scalable-and-reliable-machine-learning-systems.jpg rename to images/podcast/building-scalable-and-reliable-machine-learning-systems.jpg diff --git a/images/podcast/s15e06-democratizing-causality.jpg b/images/podcast/causal-inference-for-machine-learning.jpg similarity index 100% rename from images/podcast/s15e06-democratizing-causality.jpg rename to images/podcast/causal-inference-for-machine-learning.jpg diff --git a/images/podcast/s04e09-chief-data-officer.jpg b/images/podcast/chief-data-officer-data-strategy-and-org-design.jpg similarity index 100% rename from images/podcast/s04e09-chief-data-officer.jpg rename to images/podcast/chief-data-officer-data-strategy-and-org-design.jpg diff --git a/images/podcast/s03e10-data-governance.jpg b/images/podcast/cloud-data-governance.jpg similarity index 100% rename from images/podcast/s03e10-data-governance.jpg rename to images/podcast/cloud-data-governance.jpg diff --git a/images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg b/images/podcast/community-building-and-teaching-in-ai-tech.jpg similarity index 100% rename from images/podcast/s18e05-community-building-and-teaching-in-ai-tech.jpg rename to images/podcast/community-building-and-teaching-in-ai-tech.jpg diff --git a/images/podcast/s01e02-processes.jpg b/images/podcast/crisp-dm.jpg similarity index 100% rename from images/podcast/s01e02-processes.jpg rename to images/podcast/crisp-dm.jpg diff --git a/images/podcast/s12e03-data-centric-ai.jpg b/images/podcast/data-centric.jpg similarity index 100% rename from images/podcast/s12e03-data-centric-ai.jpg rename to images/podcast/data-centric.jpg diff --git a/images/podcast/s13e04-starting-consultancy-in-data-space.jpg b/images/podcast/data-consulting-business-pricing-and-client-acquisition.jpg similarity index 100% rename from images/podcast/s13e04-starting-consultancy-in-data-space.jpg rename to images/podcast/data-consulting-business-pricing-and-client-acquisition.jpg diff --git a/images/podcast/s08e08-teaching-data-engineers.jpg b/images/podcast/data-engineering-career-path-and-skills.jpg similarity index 100% rename from images/podcast/s08e08-teaching-data-engineers.jpg rename to images/podcast/data-engineering-career-path-and-skills.jpg diff --git a/images/podcast/s07e07-becoming-a-data-engineering-manager.jpg b/images/podcast/data-engineering-leadership-and-modern-data-platforms.jpg similarity index 100% rename from images/podcast/s07e07-becoming-a-data-engineering-manager.jpg rename to images/podcast/data-engineering-leadership-and-modern-data-platforms.jpg diff --git a/images/podcast/s05e02-data-engineering-acronyms.jpg b/images/podcast/data-engineering-tools-modern-data-stack.jpg similarity index 100% rename from images/podcast/s05e02-data-engineering-acronyms.jpg rename to images/podcast/data-engineering-tools-modern-data-stack.jpg diff --git a/images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg b/images/podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg similarity index 100% rename from images/podcast/s20e09-taking-your-freelance-career-to-next-level.jpg rename to images/podcast/data-freelancing-career-strategy-market-demand-and-client-acquisition.jpg diff --git a/images/podcast/s14e04-data-access-management.jpg b/images/podcast/data-governance-data-access-management.jpg similarity index 100% rename from images/podcast/s14e04-data-access-management.jpg rename to images/podcast/data-governance-data-access-management.jpg diff --git a/images/podcast/s06e02-non-technical-interviews.jpg b/images/podcast/data-interview-behavioral-and-portfolio-prep-guide.jpg similarity index 100% rename from images/podcast/s06e02-non-technical-interviews.jpg rename to images/podcast/data-interview-behavioral-and-portfolio-prep-guide.jpg diff --git a/images/podcast/s11e08-technical-writing-and-data-journalism.jpg b/images/podcast/data-journalism-python-visualization-storytelling.jpg similarity index 100% rename from images/podcast/s11e08-technical-writing-and-data-journalism.jpg rename to images/podcast/data-journalism-python-visualization-storytelling.jpg diff --git a/images/podcast/s18e01-inclusive-data-leadership-coaching.jpg b/images/podcast/data-leadership-coaching.jpg similarity index 100% rename from images/podcast/s18e01-inclusive-data-leadership-coaching.jpg rename to images/podcast/data-leadership-coaching.jpg diff --git a/images/podcast/s03e08-data-led-professional.jpg b/images/podcast/data-led-growth-event-tracking-and-reverse-etl.jpg similarity index 100% rename from images/podcast/s03e08-data-led-professional.jpg rename to images/podcast/data-led-growth-event-tracking-and-reverse-etl.jpg diff --git a/images/podcast/s10e06-data-mesh-101.jpg b/images/podcast/data-mesh-architecture-decentralized-data-products.jpg similarity index 100% rename from images/podcast/s10e06-data-mesh-101.jpg rename to images/podcast/data-mesh-architecture-decentralized-data-products.jpg diff --git a/images/podcast/s14e02-practical-data-privacy.jpg b/images/podcast/data-privacy-engineering-gdpr-machine-learning.jpg similarity index 100% rename from images/podcast/s14e02-practical-data-privacy.jpg rename to images/podcast/data-privacy-engineering-gdpr-machine-learning.jpg diff --git a/images/podcast/s12e02-business-skills-for-data-professionals.jpg b/images/podcast/data-professionals-business-skills-in-saas.jpg similarity index 100% rename from images/podcast/s12e02-business-skills-for-data-professionals.jpg rename to images/podcast/data-professionals-business-skills-in-saas.jpg diff --git a/images/podcast/s03e03-data-observability.jpg b/images/podcast/data-quality-data-observability-data-reliability.jpg similarity index 100% rename from images/podcast/s03e03-data-observability.jpg rename to images/podcast/data-quality-data-observability-data-reliability.jpg diff --git a/images/podcast/s13e02-analytics-for-better-world.jpg b/images/podcast/data-science-and-analytics-for-nonprofits-tech-for-good.jpg similarity index 100% rename from images/podcast/s13e02-analytics-for-better-world.jpg rename to images/podcast/data-science-and-analytics-for-nonprofits-tech-for-good.jpg diff --git a/images/podcast/s02e07-abc-data-science.jpg b/images/podcast/data-science-career-abc-framework.jpg similarity index 100% rename from images/podcast/s02e07-abc-data-science.jpg rename to images/podcast/data-science-career-abc-framework.jpg diff --git a/images/podcast/s03e09-what-data-scientists-dont-mention.jpg b/images/podcast/data-science-failures-and-mlops-lessons.jpg similarity index 100% rename from images/podcast/s03e09-what-data-scientists-dont-mention.jpg rename to images/podcast/data-science-failures-and-mlops-lessons.jpg diff --git a/images/podcast/s10e01-data-science-for-social-impact.jpg b/images/podcast/data-science-for-public-policy-ethical-ai-social-impact.jpg similarity index 100% rename from images/podcast/s10e01-data-science-for-social-impact.jpg rename to images/podcast/data-science-for-public-policy-ethical-ai-social-impact.jpg diff --git a/images/podcast/s03e04-interviewing-300-data-scientists.jpg b/images/podcast/data-science-interview-and-cv-guide.jpg similarity index 100% rename from images/podcast/s03e04-interviewing-300-data-scientists.jpg rename to images/podcast/data-science-interview-and-cv-guide.jpg diff --git a/images/podcast/s10e02-decoding-data-science-job-descriptions.jpg b/images/podcast/data-science-job-red-flags-and-mismatched-roles.jpg similarity index 100% rename from images/podcast/s10e02-decoding-data-science-job-descriptions.jpg rename to images/podcast/data-science-job-red-flags-and-mismatched-roles.jpg diff --git a/images/podcast/s06e09-data-science-manager.jpg b/images/podcast/data-science-leadership-hiring-mlops.jpg similarity index 100% rename from images/podcast/s06e09-data-science-manager.jpg rename to images/podcast/data-science-leadership-hiring-mlops.jpg diff --git a/images/podcast/s13e06-secret-sauce-of-data-science-management.jpg b/images/podcast/data-science-management-and-agile-machine-learning.jpg similarity index 100% rename from images/podcast/s13e06-secret-sauce-of-data-science-management.jpg rename to images/podcast/data-science-management-and-agile-machine-learning.jpg diff --git a/images/podcast/s06e03-manager-vs-expert.jpg b/images/podcast/data-science-manager-vs-expert-hiring-guide.jpg similarity index 100% rename from images/podcast/s06e03-manager-vs-expert.jpg rename to images/podcast/data-science-manager-vs-expert-hiring-guide.jpg diff --git a/images/podcast/s09e07-designing-data-science-organization.jpg b/images/podcast/data-science-team-structure-and-org-design.jpg similarity index 100% rename from images/podcast/s09e07-designing-data-science-organization.jpg rename to images/podcast/data-science-team-structure-and-org-design.jpg diff --git a/images/podcast/s12e05-indie-hacking.jpg b/images/podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg similarity index 100% rename from images/podcast/s12e05-indie-hacking.jpg rename to images/podcast/data-scientist-and-indie-hacker-bootstrapping-side-projects.jpg diff --git a/images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg b/images/podcast/data-strategy-and-dataops-for-ai-powered-products.jpg similarity index 100% rename from images/podcast/s14e03-data-strategy-key-principles-and-best-practices.jpg rename to images/podcast/data-strategy-and-dataops-for-ai-powered-products.jpg diff --git a/images/podcast/s01e01-roles.jpg b/images/podcast/data-team-roles.jpg similarity index 100% rename from images/podcast/s01e01-roles.jpg rename to images/podcast/data-team-roles.jpg diff --git a/images/podcast/s03e04-effective-communication-with-business.jpg b/images/podcast/data-translator-role-and-data-strategy.jpg similarity index 100% rename from images/podcast/s03e04-effective-communication-with-business.jpg rename to images/podcast/data-translator-role-and-data-strategy.jpg diff --git a/images/podcast/s11e03-from-data-science-to-dataops.jpg b/images/podcast/dataops-and-gitops-best-practices-for-data-teams.jpg similarity index 100% rename from images/podcast/s11e03-from-data-science-to-dataops.jpg rename to images/podcast/dataops-and-gitops-best-practices-for-data-teams.jpg diff --git a/images/podcast/s08e05-storytime-for-dataops.jpg b/images/podcast/dataops-automation-and-reliable-data-pipelines.jpg similarity index 100% rename from images/podcast/s08e05-storytime-for-dataops.jpg rename to images/podcast/dataops-automation-and-reliable-data-pipelines.jpg diff --git a/images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg b/images/podcast/dataops-for-data-engineering.jpg similarity index 100% rename from images/podcast/s18e09-dataops-observability-and-cure-for-data-team-blues.jpg rename to images/podcast/dataops-for-data-engineering.jpg diff --git a/images/podcast/s02e11-dataops.jpg b/images/podcast/dataops-principles-and-scalable-data-platforms.jpg similarity index 100% rename from images/podcast/s02e11-dataops.jpg rename to images/podcast/dataops-principles-and-scalable-data-platforms.jpg diff --git a/images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg b/images/podcast/datatalksclub-building-scaling-data-community.jpg similarity index 100% rename from images/podcast/s07e01-datatalksclub-behind-the-scenes.jpg rename to images/podcast/datatalksclub-building-scaling-data-community.jpg diff --git a/images/podcast/s16e01-datatalks-club-anniversary-interview.jpg b/images/podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg similarity index 100% rename from images/podcast/s16e01-datatalks-club-anniversary-interview.jpg rename to images/podcast/datatalksclub-building-sustainable-data-community-3-years-anniversary.jpg diff --git a/images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg b/images/podcast/datatalksclub-scaling-and-free-courses.jpg similarity index 100% rename from images/podcast/s19e03-datatalks-club-anniversary-podcast.jpg rename to images/podcast/datatalksclub-scaling-and-free-courses.jpg diff --git a/images/podcast/s15e03-llms-for-everyone.jpg b/images/podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg similarity index 100% rename from images/podcast/s15e03-llms-for-everyone.jpg rename to images/podcast/deploying-llms-in-production-fine-tuning-retrieval-open-source-api.jpg diff --git a/images/podcast/s03e07-market-yourself.jpg b/images/podcast/developer-personal-brand-learn-in-public.jpg similarity index 100% rename from images/podcast/s03e07-market-yourself.jpg rename to images/podcast/developer-personal-brand-learn-in-public.jpg diff --git a/images/podcast/s02e02-developer-advocacy.jpg b/images/podcast/devrel-data-science-open-source-tools.jpg similarity index 100% rename from images/podcast/s02e02-developer-advocacy.jpg rename to images/podcast/devrel-data-science-open-source-tools.jpg diff --git a/images/podcast/s14e06-data-developer-relations.jpg b/images/podcast/devrel-open-source-machine-learning.jpg similarity index 100% rename from images/podcast/s14e06-data-developer-relations.jpg rename to images/podcast/devrel-open-source-machine-learning.jpg diff --git a/images/podcast/s19e09-linguistics-and-fairness.jpg b/images/podcast/fairness-in-ai-ml-engineering.jpg similarity index 100% rename from images/podcast/s19e09-linguistics-and-fairness.jpg rename to images/podcast/fairness-in-ai-ml-engineering.jpg diff --git a/images/podcast/s05e09-business-acumen.jpg b/images/podcast/feature-engineering-model-monitoring-and-data-governance.jpg similarity index 100% rename from images/podcast/s05e09-business-acumen.jpg rename to images/podcast/feature-engineering-model-monitoring-and-data-governance.jpg diff --git a/images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg b/images/podcast/finops-for-data-engineers.jpg similarity index 100% rename from images/podcast/s20e06-from-supply-chain-management-to-digital-warehousing-and-finops.jpg rename to images/podcast/finops-for-data-engineers.jpg diff --git a/images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg b/images/podcast/freelance-data-engineering-pricing-and-clients.jpg similarity index 100% rename from images/podcast/s09e04-freelancing-and-consulting-with-data-engineering.jpg rename to images/podcast/freelance-data-engineering-pricing-and-clients.jpg diff --git a/images/podcast/s04e08-freelancing.jpg b/images/podcast/freelancing-in-machine-learning.jpg similarity index 100% rename from images/podcast/s04e08-freelancing.jpg rename to images/podcast/freelancing-in-machine-learning.jpg diff --git a/images/podcast/s12e09-staff-ai-engineer.jpg b/images/podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg similarity index 100% rename from images/podcast/s12e09-staff-ai-engineer.jpg rename to images/podcast/from-academia-to-staff-ai-engineer-interviews-and-career-growth.jpg diff --git a/images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg b/images/podcast/from-academic-research-to-data-engineering-freelancing.jpg similarity index 100% rename from images/podcast/s21e01-from-simulation-algorithms-to-production-grade-data-systems.jpg rename to images/podcast/from-academic-research-to-data-engineering-freelancing.jpg diff --git a/images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg b/images/podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg similarity index 100% rename from images/podcast/s19e07-career-advice-learning-and-featuring-women-in-ml-and-ai.jpg rename to images/podcast/from-biology-to-machine-learning-data-science-portfolio-open-source-computer-vision-transformers.jpg diff --git a/images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg b/images/podcast/from-computer-vision-research-to-autonomous-driving-ai.jpg similarity index 100% rename from images/podcast/s22e02-lessons-from-applied-ai-tesla-waymo-and-beyond.jpg rename to images/podcast/from-computer-vision-research-to-autonomous-driving-ai.jpg diff --git a/images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg b/images/podcast/from-data-freelancer-to-startup-open-source-products.jpg similarity index 100% rename from images/podcast/s17e01-entrepreneurship-journey-from-freelancing-to-starting-company.jpg rename to images/podcast/from-data-freelancer-to-startup-open-source-products.jpg diff --git a/images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg b/images/podcast/from-devops-to-data-engineering-automation-open-source-volunteering.jpg similarity index 100% rename from images/podcast/s19e08-career-choices-transitions-and-promotions-in-and-out-of-tech.jpg rename to images/podcast/from-devops-to-data-engineering-automation-open-source-volunteering.jpg diff --git a/images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg b/images/podcast/from-game-ai-to-modern-ai-agents.jpg similarity index 100% rename from images/podcast/s21e07-lessons-from-two-decades-of-ai.jpg rename to images/podcast/from-game-ai-to-modern-ai-agents.jpg diff --git a/images/podcast/s15e08-from-data-manager-to-data-architect.jpg b/images/podcast/from-iot-data-engineering-to-leading-data-architect.jpg similarity index 100% rename from images/podcast/s15e08-from-data-manager-to-data-architect.jpg rename to images/podcast/from-iot-data-engineering-to-leading-data-architect.jpg diff --git a/images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg b/images/podcast/from-large-hadron-collider-to-data-science-research-software-engineering.jpg similarity index 100% rename from images/podcast/s19e05-large-hadron-collider-and-mentorship.jpg rename to images/podcast/from-large-hadron-collider-to-data-science-research-software-engineering.jpg diff --git a/images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg b/images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg similarity index 100% rename from images/podcast/s11e07-from-digital-marketing-to-analytics-engineering.jpg rename to images/podcast/from-marketing-to-analytics-engineering-sql-dbt-career-switch.jpg diff --git a/images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg b/images/podcast/from-math-graduate-to-data-analytics.jpg similarity index 100% rename from images/podcast/s07e09-from-math-teacher-to-analytics-engineer.jpg rename to images/podcast/from-math-graduate-to-data-analytics.jpg diff --git a/images/podcast/s03e06-from-physics-to-machine-learning.jpg b/images/podcast/from-physics-to-computer-vision-career-transition.jpg similarity index 100% rename from images/podcast/s03e06-from-physics-to-machine-learning.jpg rename to images/podcast/from-physics-to-computer-vision-career-transition.jpg diff --git a/images/podcast/s21e05-from-astronomy-to-applied-ml.jpg b/images/podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.jpg similarity index 100% rename from images/podcast/s21e05-from-astronomy-to-applied-ml.jpg rename to images/podcast/from-radio-astronomy-to-machine-learning-and-data-engineering.jpg diff --git a/images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg b/images/podcast/from-semiconductor-data-to-applied-machine-learning.jpg similarity index 100% rename from images/podcast/s21e08-from-semiconductors-to-machine-learning-career-in-data-and-teaching.jpg rename to images/podcast/from-semiconductor-data-to-applied-machine-learning.jpg diff --git a/images/podcast/s04e01-from-swe-to-ml.jpg b/images/podcast/from-software-engineer-to-machine-learning.jpg similarity index 100% rename from images/podcast/s04e01-from-swe-to-ml.jpg rename to images/podcast/from-software-engineer-to-machine-learning.jpg diff --git a/images/podcast/s07e08-from-data-science-to-data-engineering.jpg b/images/podcast/from-software-engineering-data-science-to-data-engineering-leadership.jpg similarity index 100% rename from images/podcast/s07e08-from-data-science-to-data-engineering.jpg rename to images/podcast/from-software-engineering-data-science-to-data-engineering-leadership.jpg diff --git a/images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg b/images/podcast/from-software-engineering-to-leading-data-science-teams.jpg similarity index 100% rename from images/podcast/s12e01-from-software-engineer-to-data-science-manager.jpg rename to images/podcast/from-software-engineering-to-leading-data-science-teams.jpg diff --git a/images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg b/images/podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg similarity index 100% rename from images/podcast/s16e06-unwritten-rules-for-success-in-machine-learning.jpg rename to images/podcast/from-software-engineering-to-vp-of-machine-learning-applied-ml-leadership.jpg diff --git a/images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg b/images/podcast/from-startup-engineering-to-freelance-data-science.jpg similarity index 100% rename from images/podcast/s14e05-lessons-learned-from-freelancing-and-working-in-start-up.jpg rename to images/podcast/from-startup-engineering-to-freelance-data-science.jpg diff --git a/images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg b/images/podcast/generative-ai-chatbots-in-production-security.jpg similarity index 100% rename from images/podcast/s19e06-ai-in-industry-trust-return-on-investment-and-future.jpg rename to images/podcast/generative-ai-chatbots-in-production-security.jpg diff --git a/images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg b/images/podcast/get-data-analytics-and-data-engineering-job.jpg similarity index 100% rename from images/podcast/s08e09-from-academia-to-data-analytics-and-engineering.jpg rename to images/podcast/get-data-analytics-and-data-engineering-job.jpg diff --git a/images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg b/images/podcast/get-data-engineering-job-prep-and-interview.jpg similarity index 100% rename from images/podcast/s09e03-getting-data-engineering-job-(summary-and-q&a).jpg rename to images/podcast/get-data-engineering-job-prep-and-interview.jpg diff --git a/images/podcast/s01e04-standing-out-as-a-data-scientist.jpg b/images/podcast/get-data-scientist-job.jpg similarity index 100% rename from images/podcast/s01e04-standing-out-as-a-data-scientist.jpg rename to images/podcast/get-data-scientist-job.jpg diff --git a/images/podcast/s07e04-career-coaching.jpg b/images/podcast/get-junior-data-job-and-transferable-skills.jpg similarity index 100% rename from images/podcast/s07e04-career-coaching.jpg rename to images/podcast/get-junior-data-job-and-transferable-skills.jpg diff --git a/images/podcast/s11e02-data-science-career-development.jpg b/images/podcast/hiring-and-managing-data-science-teams-in-b2b-saas.jpg similarity index 100% rename from images/podcast/s11e02-data-science-career-development.jpg rename to images/podcast/hiring-and-managing-data-science-teams-in-b2b-saas.jpg diff --git a/images/podcast/s07e02-recruiting-data-professionals.jpg b/images/podcast/hiring-data-scientists-and-analysts.jpg similarity index 100% rename from images/podcast/s07e02-recruiting-data-professionals.jpg rename to images/podcast/hiring-data-scientists-and-analysts.jpg diff --git a/images/podcast/s08e06-recruiting-data-engineers.jpg b/images/podcast/hiring-for-data-engineering-jobs-in-europe.jpg similarity index 100% rename from images/podcast/s08e06-recruiting-data-engineers.jpg rename to images/podcast/hiring-for-data-engineering-jobs-in-europe.jpg diff --git a/images/podcast/s09e09-hiring-data-science-talent.jpg b/images/podcast/hiring-for-data-science-jobs-interview-questions-skills.md.jpg similarity index 100% rename from images/podcast/s09e09-hiring-data-science-talent.jpg rename to images/podcast/hiring-for-data-science-jobs-interview-questions-skills.md.jpg diff --git a/images/podcast/s09e05-data-scientists-at-work.jpg b/images/podcast/how-to-break-into-data-science.jpg similarity index 100% rename from images/podcast/s09e05-data-scientists-at-work.jpg rename to images/podcast/how-to-break-into-data-science.jpg diff --git a/images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg b/images/podcast/how-to-grow-your-ml-engineering-career.jpg similarity index 100% rename from images/podcast/s12e07-navigating-career-changes-in-machine-learning.jpg rename to images/podcast/how-to-grow-your-ml-engineering-career.jpg diff --git a/images/podcast/s08e02-hacking-your-data-career.jpg b/images/podcast/how-to-stand-out-in-data-science.jpg similarity index 100% rename from images/podcast/s08e02-hacking-your-data-career.jpg rename to images/podcast/how-to-stand-out-in-data-science.jpg diff --git a/images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg b/images/podcast/how-to-switch-to-ml-tech-without-experience.jpg similarity index 100% rename from images/podcast/s08e07-from-roasting-coffee-to-backend-development.jpg rename to images/podcast/how-to-switch-to-ml-tech-without-experience.jpg diff --git a/images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg b/images/podcast/how-to-transition-into-ml-and-data-engineering-from-qa.jpg similarity index 100% rename from images/podcast/s11e01-from-testing-phones-to-managing-nlp-projects.jpg rename to images/podcast/how-to-transition-into-ml-and-data-engineering-from-qa.jpg diff --git a/images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg b/images/podcast/hugging-face-contributions-and-nlp-portfolio.jpg similarity index 100% rename from images/podcast/s09e06-developer-advocacy-engineer-for-open-source.jpg rename to images/podcast/hugging-face-contributions-and-nlp-portfolio.jpg diff --git a/images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg b/images/podcast/human-centered-ai-automatic-speech-recognition.jpg similarity index 100% rename from images/podcast/s19e02-human-centered-ai-for-disordered-speech-recognition.jpg rename to images/podcast/human-centered-ai-automatic-speech-recognition.jpg diff --git a/images/podcast/s04e06-humans-in-the-loop.jpg b/images/podcast/human-centered-mlops-and-model-monitoring.jpg similarity index 100% rename from images/podcast/s04e06-humans-in-the-loop.jpg rename to images/podcast/human-centered-mlops-and-model-monitoring.jpg diff --git a/images/podcast/s13e08-navigating-industrial-data-challenges.jpg b/images/podcast/industrial-data-small-data-production-machine-learning.jpg similarity index 100% rename from images/podcast/s13e08-navigating-industrial-data-challenges.jpg rename to images/podcast/industrial-data-small-data-production-machine-learning.jpg diff --git a/images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg b/images/podcast/interpretable-machine-learning.jpg similarity index 100% rename from images/podcast/s16e07-cracking-code-machine-learning-made-understandable.jpg rename to images/podcast/interpretable-machine-learning.jpg diff --git a/images/podcast/s15e02-investing-in-open-source-data-tools.jpg b/images/podcast/investing-in-open-source-developer-tools.jpg similarity index 100% rename from images/podcast/s15e02-investing-in-open-source-data-tools.jpg rename to images/podcast/investing-in-open-source-developer-tools.jpg diff --git a/images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg b/images/podcast/job-search-strategy-in-tech-projects-skills-cv-networking.jpg similarity index 100% rename from images/podcast/s17e06-accelerating-job-hunt-for-perfect-job-in-tech.jpg rename to images/podcast/job-search-strategy-in-tech-projects-skills-cv-networking.jpg diff --git a/images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg b/images/podcast/kaggle-grandmaster-to-production-ml-and-education.jpg similarity index 100% rename from images/podcast/s20e02-competitive-machine-learning-and-teaching.jpg rename to images/podcast/kaggle-grandmaster-to-production-ml-and-education.jpg diff --git a/images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg b/images/podcast/knowledge-graphs-and-llms-for-automotive-rnd.jpg similarity index 100% rename from images/podcast/s18e02-knowledge-graphs-and-llms-across-academia-and-industry.jpg rename to images/podcast/knowledge-graphs-and-llms-for-automotive-rnd.jpg diff --git a/images/podcast/s05e08-the-last-mile-in-data.jpg b/images/podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg similarity index 100% rename from images/podcast/s05e08-the-last-mile-in-data.jpg rename to images/podcast/last-mile-data-delivery-and-data-product-adoption-modern-data-stack.jpg diff --git a/images/podcast/s04e07-launching-a-startup.jpg b/images/podcast/launch-and-build-retail-startup.jpg similarity index 100% rename from images/podcast/s04e07-launching-a-startup.jpg rename to images/podcast/launch-and-build-retail-startup.jpg diff --git a/images/podcast/s20e04-mlops-in-corporations-and-startups.jpg b/images/podcast/lean-mlops-for-startups.jpg similarity index 100% rename from images/podcast/s20e04-mlops-in-corporations-and-startups.jpg rename to images/podcast/lean-mlops-for-startups.jpg diff --git a/images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg b/images/podcast/learning-machine-learning-self-taught-bioinformatics.jpg similarity index 100% rename from images/podcast/s13e07-mastering-self-learning-in-machine-learning.jpg rename to images/podcast/learning-machine-learning-self-taught-bioinformatics.jpg diff --git a/images/podcast/s12e06-preparing-for-data-science-interview.jpg b/images/podcast/machine-learning-data-science-interview-prep.jpg similarity index 100% rename from images/podcast/s12e06-preparing-for-data-science-interview.jpg rename to images/podcast/machine-learning-data-science-interview-prep.jpg diff --git a/images/podcast/s02e06-decision-optimization.jpg b/images/podcast/machine-learning-decision-optimization.jpg similarity index 100% rename from images/podcast/s02e06-decision-optimization.jpg rename to images/podcast/machine-learning-decision-optimization.jpg diff --git a/images/podcast/s04e05-running-from-complexity.jpg b/images/podcast/machine-learning-engineering-production-best-practices.jpg similarity index 100% rename from images/podcast/s04e05-running-from-complexity.jpg rename to images/podcast/machine-learning-engineering-production-best-practices.jpg diff --git a/images/podcast/s09e02-using-data-for-asteroid-mining.jpg b/images/podcast/machine-learning-for-asteroid-mining-and-water-detection.jpg similarity index 100% rename from images/podcast/s09e02-using-data-for-asteroid-mining.jpg rename to images/podcast/machine-learning-for-asteroid-mining-and-water-detection.jpg diff --git a/images/podcast/s09e01-machine-learning-in-marketing.jpg b/images/podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg similarity index 100% rename from images/podcast/s09e01-machine-learning-in-marketing.jpg rename to images/podcast/machine-learning-in-marketing-attribution-marketing-mix-modeling.jpg diff --git a/images/podcast/s07e05-machine-learning-system-design-interview.jpg b/images/podcast/machine-learning-system-design-interview.jpg similarity index 100% rename from images/podcast/s07e05-machine-learning-system-design-interview.jpg rename to images/podcast/machine-learning-system-design-interview.jpg diff --git a/images/podcast/s02e09-roles-skills-monetizing-ml.jpg b/images/podcast/make-money-with-machine-learning-roles-skills.jpg similarity index 100% rename from images/podcast/s02e09-roles-skills-monetizing-ml.jpg rename to images/podcast/make-money-with-machine-learning-roles-skills.jpg diff --git a/images/podcast/s01e05-mentoring.jpg b/images/podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg similarity index 100% rename from images/podcast/s01e05-mentoring.jpg rename to images/podcast/mentoring-in-tech-how-to-find-and-become-a-mentor.md.jpg diff --git a/images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg b/images/podcast/mindful-data-strategy-for-business-impact.jpg similarity index 100% rename from images/podcast/s21e02-mindful-data-strategy-from-pipelines-to-business-impact.jpg rename to images/podcast/mindful-data-strategy-for-business-impact.jpg diff --git a/images/podcast/s05e03-metrics-and-kpis.jpg b/images/podcast/ml-engineering-kpis-and-metrics-strategy.jpg similarity index 100% rename from images/podcast/s05e03-metrics-and-kpis.jpg rename to images/podcast/ml-engineering-kpis-and-metrics-strategy.jpg diff --git a/images/podcast/s06e07-product-management-for-machine-learning.jpg b/images/podcast/ml-product-manager-and-mlops-platform-strategy.jpg similarity index 100% rename from images/podcast/s06e07-product-management-for-machine-learning.jpg rename to images/podcast/ml-product-manager-and-mlops-platform-strategy.jpg diff --git a/images/podcast/s15e01-why-machine-learning-design-broken.jpg b/images/podcast/ml-system-design.jpg similarity index 100% rename from images/podcast/s15e01-why-machine-learning-design-broken.jpg rename to images/podcast/ml-system-design.jpg diff --git a/images/podcast/s17e05-machine-learning-engineering-in-finance.jpg b/images/podcast/mlops-and-ml-engineering-in-finance.jpg similarity index 100% rename from images/podcast/s17e05-machine-learning-engineering-in-finance.jpg rename to images/podcast/mlops-and-ml-engineering-in-finance.jpg diff --git a/images/podcast/s19e04-mlops-as-team.jpg b/images/podcast/mlops-at-scale-reproducibility-adoption.jpg similarity index 100% rename from images/podcast/s19e04-mlops-as-team.jpg rename to images/podcast/mlops-at-scale-reproducibility-adoption.jpg diff --git a/images/podcast/s02e12-communities.jpg b/images/podcast/mlops-community-building-and-meetups.jpg similarity index 100% rename from images/podcast/s02e12-communities.jpg rename to images/podcast/mlops-community-building-and-meetups.jpg diff --git a/images/podcast/s02e05-feature-stores.jpg b/images/podcast/mlops-feature-stores-feature-stores-feast-tecton.jpg similarity index 100% rename from images/podcast/s02e05-feature-stores.jpg rename to images/podcast/mlops-feature-stores-feature-stores-feast-tecton.jpg diff --git a/images/podcast/s02e04-mlops.jpg b/images/podcast/mlops-kubeflow-model-monitoring.jpg similarity index 100% rename from images/podcast/s02e04-mlops.jpg rename to images/podcast/mlops-kubeflow-model-monitoring.jpg diff --git a/images/podcast/s10e03-mlops-architect.jpg b/images/podcast/mlops-model-monitoring-data-observability.jpg similarity index 100% rename from images/podcast/s10e03-mlops-architect.jpg rename to images/podcast/mlops-model-monitoring-data-observability.jpg diff --git a/images/podcast/s14e07-from-mlops-to-dataops.jpg b/images/podcast/modern-data-pipelines-orchestration-ingestion-modeling.jpg similarity index 100% rename from images/podcast/s14e07-from-mlops-to-dataops.jpg rename to images/podcast/modern-data-pipelines-orchestration-ingestion-modeling.jpg diff --git a/images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg b/images/podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.jpg similarity index 100% rename from images/podcast/s17e02-searching-beyond-surface-navigating-challenges-and-innovations-in-search-technologies.jpg rename to images/podcast/modern-search-systems-vector-databases-llms-semantic-retrieval.jpg diff --git a/images/podcast/s10e07-dataset-creation-and-curation.jpg b/images/podcast/nlp-dataset-creation-annotation-tools-workflows.jpg similarity index 100% rename from images/podcast/s10e07-dataset-creation-and-curation.jpg rename to images/podcast/nlp-dataset-creation-annotation-tools-workflows.jpg diff --git a/images/podcast/s06e08-nlp-teams.jpg b/images/podcast/nlp-team-hiring-and-production-mlops.jpg similarity index 100% rename from images/podcast/s06e08-nlp-teams.jpg rename to images/podcast/nlp-team-hiring-and-production-mlops.jpg diff --git a/images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg b/images/podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg similarity index 100% rename from images/podcast/s21e03-from-medicine-to-machine-learning-how-public-learning-turned-into-career.jpg rename to images/podcast/nonlinear-path-to-machine-learning-freelancing-and-public-learning.jpg diff --git a/images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg b/images/podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg similarity index 100% rename from images/podcast/s17e07-make-impact-through-volunteering-open-source-work.jpg rename to images/podcast/open-source-and-volunteering-in-ai-for-data-ml-career-growth.jpg diff --git a/images/podcast/s02e03-open-source.jpg b/images/podcast/open-source-ml-contributions.jpg similarity index 100% rename from images/podcast/s02e03-open-source.jpg rename to images/podcast/open-source-ml-contributions.jpg diff --git a/images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg b/images/podcast/open-source-ml-tools-strategy-and-business-models.jpg similarity index 100% rename from images/podcast/s18e04-working-in-open-source-probabl-ai-and-sklearn.jpg rename to images/podcast/open-source-ml-tools-strategy-and-business-models.jpg diff --git a/images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg b/images/podcast/open-source-turned-into-career-and-startup-creation.jpg similarity index 100% rename from images/podcast/s09e08-from-open-source-maintainer-to-founder.jpg rename to images/podcast/open-source-turned-into-career-and-startup-creation.jpg diff --git a/images/podcast/s02e08-personal-branding.jpg b/images/podcast/personal-brand-for-data-professionals.jpg similarity index 100% rename from images/podcast/s02e08-personal-branding.jpg rename to images/podcast/personal-brand-for-data-professionals.jpg diff --git a/images/podcast/s06e06-from-academia-to-industry.jpg b/images/podcast/postdoc-to-data-science-lead-career-transition.jpg similarity index 100% rename from images/podcast/s06e06-from-academia-to-industry.jpg rename to images/podcast/postdoc-to-data-science-lead-career-transition.jpg diff --git a/images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg b/images/podcast/practical-devrel-demofirst-education-and-open-source.jpg similarity index 100% rename from images/podcast/s20e08-from-hackathons-to-developer-advocacy.jpg rename to images/podcast/practical-devrel-demofirst-education-and-open-source.jpg diff --git a/images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg b/images/podcast/practical-generative-ai-consulting-from-expertise-to-impact.jpg similarity index 100% rename from images/podcast/s16e05-from-research-scientist-at-amazon-to-machine-learning-ai-consultant.jpg rename to images/podcast/practical-generative-ai-consulting-from-expertise-to-impact.jpg diff --git a/images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg b/images/podcast/practical-llm-engineering-and-rag.jpg similarity index 100% rename from images/podcast/s22e04-how-to-build-and-evaluate-ai-systems-in-age-of-llms.jpg rename to images/podcast/practical-llm-engineering-and-rag.jpg diff --git a/images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg b/images/podcast/practical-llm-use-cases-and-product-patterns.jpg similarity index 100% rename from images/podcast/s15e04-good-bad-and-ugly-of-gpt.jpg rename to images/podcast/practical-llm-use-cases-and-product-patterns.jpg diff --git a/images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg b/images/podcast/pragmatic-and-standardized-mlops.jpg similarity index 100% rename from images/podcast/s15e07-pragmatic-and-standardized-mlops.jpg rename to images/podcast/pragmatic-and-standardized-mlops.jpg diff --git a/images/podcast/s06e04-becoming-a-data-product-manager.jpg b/images/podcast/product-designer-to-data-product-manager.jpg similarity index 100% rename from images/podcast/s06e04-becoming-a-data-product-manager.jpg rename to images/podcast/product-designer-to-data-product-manager.jpg diff --git a/images/podcast/s05e07-ml-vs-analytics.jpg b/images/podcast/production-ml-mlops-and-data-team-building.jpg similarity index 100% rename from images/podcast/s05e07-ml-vs-analytics.jpg rename to images/podcast/production-ml-mlops-and-data-team-building.jpg diff --git a/images/podcast/s04e02-build-your-own-data-pipeline.jpg b/images/podcast/production-ml-pipelines-with-aws-and-kafka.jpg similarity index 100% rename from images/podcast/s04e02-build-your-own-data-pipeline.jpg rename to images/podcast/production-ml-pipelines-with-aws-and-kafka.jpg diff --git a/images/podcast/s17e08-building-machine-learning-products.jpg b/images/podcast/production-ml-search-vector-search-embeddings-hybrid search.jpg similarity index 100% rename from images/podcast/s17e08-building-machine-learning-products.jpg rename to images/podcast/production-ml-search-vector-search-embeddings-hybrid search.jpg diff --git a/images/podcast/s20e05-data-intensive-ai.jpg b/images/podcast/production-ready-ai-engineering.jpg similarity index 100% rename from images/podcast/s20e05-data-intensive-ai.jpg rename to images/podcast/production-ready-ai-engineering.jpg diff --git a/images/podcast/s03e01-from-pm-to-ds.jpg b/images/podcast/project-manager-to-data-scientist.jpg similarity index 100% rename from images/podcast/s03e01-from-pm-to-ds.jpg rename to images/podcast/project-manager-to-data-scientist.jpg diff --git a/images/podcast/s02e10-public-speaking.jpg b/images/podcast/public-speaking-for-data-scientists.jpg similarity index 100% rename from images/podcast/s02e10-public-speaking.jpg rename to images/podcast/public-speaking-for-data-scientists.jpg diff --git a/images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg b/images/podcast/remote-data-engineering-work-and-building-iot-platforms.jpg similarity index 100% rename from images/podcast/s15e05-mastering-data-engineering-as-remote-worker.jpg rename to images/podcast/remote-data-engineering-work-and-building-iot-platforms.jpg diff --git a/images/podcast/s05e05-researchers-vs-engineers.jpg b/images/podcast/research-to-production-ml-systems-roadmap.jpg similarity index 100% rename from images/podcast/s05e05-researchers-vs-engineers.jpg rename to images/podcast/research-to-production-ml-systems-roadmap.jpg diff --git a/images/podcast/s10e09-responsible-and-explainable-ai.jpg b/images/podcast/responsible-explainable-ai-bias-detection.jpg similarity index 100% rename from images/podcast/s10e09-responsible-and-explainable-ai.jpg rename to images/podcast/responsible-explainable-ai-bias-detection.jpg diff --git a/images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg b/images/podcast/scaling-data-engineering-teams-self-service-platforms.jpg similarity index 100% rename from images/podcast/s10e05-growing-data-engineering-team-in-scale-up.jpg rename to images/podcast/scaling-data-engineering-teams-self-service-platforms.jpg diff --git a/images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg b/images/podcast/scaling-enterprise-ai-mlops-data-first-strategy.jpg similarity index 100% rename from images/podcast/s10e04-lessons-learned-about-data-&-ai-at-enterprises.jpg rename to images/podcast/scaling-enterprise-ai-mlops-data-first-strategy.jpg diff --git a/images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg b/images/podcast/software-engineering-for-machine-learning.jpg similarity index 100% rename from images/podcast/s13e05-se4ml-software-engineering-for-machine-learning.jpg rename to images/podcast/software-engineering-for-machine-learning.jpg diff --git a/images/podcast/s05e04-introducing-data-science-in-startups.jpg b/images/podcast/solopreneur-data-scientist.jpg similarity index 100% rename from images/podcast/s05e04-introducing-data-science-in-startups.jpg rename to images/podcast/solopreneur-data-scientist.jpg diff --git a/images/podcast/s06e01-solopreneur.jpg b/images/podcast/solopreneur-developer-and-data-professional.jpg similarity index 100% rename from images/podcast/s06e01-solopreneur.jpg rename to images/podcast/solopreneur-developer-and-data-professional.jpg diff --git a/images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg b/images/podcast/teaching-mentoring-data-analytics-fintech.jpg similarity index 100% rename from images/podcast/s11e09-teaching-and-mentoring-in-data-analytics.jpg rename to images/podcast/teaching-mentoring-data-analytics-fintech.jpg diff --git a/images/podcast/s12e04-doing-software-engineering-in-academia.jpg b/images/podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg similarity index 100% rename from images/podcast/s12e04-doing-software-engineering-in-academia.jpg rename to images/podcast/teaching-reproducible-research-and-open-science-coding-practices-for-academia.jpg diff --git a/images/podcast/s02e01-writing.jpg b/images/podcast/technical-writing-for-data-scientists.jpg similarity index 100% rename from images/podcast/s02e01-writing.jpg rename to images/podcast/technical-writing-for-data-scientists.jpg diff --git a/images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg b/images/podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg similarity index 100% rename from images/podcast/s21e09-from-theme-parks-to-tesla-building-data-products-that-work.jpg rename to images/podcast/theme-park-crowd-modeling-to-tesla-full-stack-data-engineering.jpg diff --git a/images/podcast/s20e03-trends-in-data-engineering.jpg b/images/podcast/trends-in-modern-data-engineering.jpg similarity index 100% rename from images/podcast/s20e03-trends-in-data-engineering.jpg rename to images/podcast/trends-in-modern-data-engineering.jpg diff --git a/images/podcast/s19e01-using-data-to-create-liveable-cities.jpg b/images/podcast/urban-data-science.jpg similarity index 100% rename from images/podcast/s19e01-using-data-to-create-liveable-cities.jpg rename to images/podcast/urban-data-science.jpg diff --git a/images/podcast/s08e01-visualising-machine-learning.jpg b/images/podcast/visualizing-machine-learning-concepts-to-explain-ml.jpg similarity index 100% rename from images/podcast/s08e01-visualising-machine-learning.jpg rename to images/podcast/visualizing-machine-learning-concepts-to-explain-ml.jpg diff --git a/podcast.md b/podcast.md index 9f7ed25b..06a71ca9 100644 --- a/podcast.md +++ b/podcast.md @@ -35,7 +35,7 @@ layout: page -{% assign seasons = site.podcast | reverse | group_by: 'season' %} +{% assign all_seasons = site.podcast | map: 'season' | uniq | sort | reverse %}
@@ -46,11 +46,12 @@ layout: page

All Podcast Episodes

- {% for season in seasons %} -
-

Season #{{ season.name }}

+ {% for season_num in all_seasons %} + {% assign season_episodes = site.podcast | where: 'season', season_num | sort: 'episode' | reverse %} +
+

Season #{{ season_num }}