diff --git a/research_quick_wins.py b/research_quick_wins.py index 8d9a0b4..6962cce 100644 --- a/research_quick_wins.py +++ b/research_quick_wins.py @@ -15,7 +15,7 @@ load_dotenv() # Add data_sources to path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'data_sources')) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "data_sources")) from modules.google_search_console import GoogleSearchConsole from modules.dataforseo import DataForSEO @@ -23,6 +23,25 @@ from modules.opportunity_scorer import OpportunityScorer, OpportunityType from modules.search_intent_analyzer import SearchIntentAnalyzer + +def get_first_ranking(rankings): + if isinstance(rankings, list) and rankings: + first = rankings[0] + if isinstance(first, dict): + return first + return None + + +def get_serp_features(dfs, keyword): + try: + serp_data = dfs.get_serp_data(keyword, limit=10) + if serp_data and "features" in serp_data: + return serp_data.get("features", []) + except Exception: + return [] + return [] + + def main(): print("=" * 80) print("QUICK WIN OPPORTUNITIES RESEARCH") @@ -62,10 +81,7 @@ def main(): # Get quick wins from GSC print("\n2. Fetching keywords ranking positions 11-20...") quick_wins = gsc.get_quick_wins( - days=30, - position_min=11, - position_max=20, - min_impressions=50 + days=30, position_min=11, position_max=20, min_impressions=50 ) if not quick_wins: @@ -81,11 +97,11 @@ def main(): detailed_opportunities = [] for i, kw in enumerate(quick_wins[:10], 1): - keyword = kw['keyword'] - position = kw['position'] - impressions = kw['impressions'] - clicks = kw['clicks'] - ctr = kw['ctr'] * 100 + keyword = kw["keyword"] + position = kw["position"] + impressions = kw["impressions"] + clicks = kw["clicks"] + ctr = kw["ctr"] * 100 print(f"\n#{i} KEYWORD: {keyword}") print("-" * 80) @@ -93,7 +109,9 @@ def main(): print(f"Impressions (30d): {impressions:,}") print(f"Clicks (30d): {clicks}") print(f"CTR: {ctr:.2f}%") - print(f"Commercial Intent: {kw['commercial_intent_category']} ({kw['commercial_intent']}/3.0)") + print( + f"Commercial Intent: {kw['commercial_intent_category']} ({kw['commercial_intent']}/3.0)" + ) print(f"Opportunity Score: {kw['opportunity_score']:.2f}") print(f"Priority: {kw['priority'].upper()}") @@ -104,15 +122,19 @@ def main(): try: print(f"\nVerifying with DataForSEO...") rankings = dfs.get_rankings( - domain=os.getenv('GSC_SITE_URL', 'yoursite.com').replace('https://', '').replace('http://', '').rstrip('/'), - keywords=[keyword] + domain=os.getenv("GSC_SITE_URL", "yoursite.com") + .replace("https://", "") + .replace("http://", "") + .rstrip("/"), + keywords=[keyword], ) - if rankings and rankings[0]['position']: - dfs_position = rankings[0]['position'] - dfs_url = rankings[0]['url'] - search_volume = rankings[0]['search_volume'] - difficulty = rankings[0].get('difficulty') + ranking = get_first_ranking(rankings) + if ranking and ranking.get("position"): + dfs_position = ranking["position"] + dfs_url = ranking.get("url") + search_volume = ranking.get("search_volume") + difficulty = ranking.get("difficulty") print(f" DataForSEO Position: {dfs_position}") print(f" Ranking URL: {dfs_url}") @@ -121,43 +143,40 @@ def main(): if difficulty: print(f" SEO Difficulty: {difficulty}/100") - kw['dfs_position'] = dfs_position - kw['ranking_url'] = dfs_url - kw['search_volume'] = search_volume - kw['difficulty'] = difficulty + kw["dfs_position"] = dfs_position + kw["ranking_url"] = dfs_url + kw["search_volume"] = search_volume + kw["difficulty"] = difficulty else: print(f" Not found in top 100 (DataForSEO)") - # Get SERP features for intent analysis - try: - serp_data = dfs.get_serp_data(keyword, limit=10) - if serp_data and 'features' in serp_data: - serp_features = serp_data.get('features', []) - kw['serp_features'] = serp_features - if serp_features: - print(f" SERP Features: {', '.join(serp_features[:3])}") - except: - pass + serp_features = get_serp_features(dfs, keyword) + kw["serp_features"] = serp_features + if serp_features: + print(f" SERP Features: {', '.join(serp_features[:3])}") except Exception as e: print(f" DataForSEO error: {e}") # Get page performance from GA4 (if we have the URL) - if ga and kw.get('ranking_url'): + if ga and kw.get("ranking_url"): try: # Extract path from URL from urllib.parse import urlparse - path = urlparse(kw['ranking_url']).path + + path = urlparse(kw["ranking_url"]).path print(f"\nChecking GA4 performance for {path}...") page_data = ga.get_page_performance(path, days=30) - if page_data and 'pageviews' in page_data: + if page_data and "pageviews" in page_data: print(f" Pageviews: {page_data['pageviews']:,}") - print(f" Avg. Engagement: {page_data.get('avg_engagement_time', 0):.0f}s") + print( + f" Avg. Engagement: {page_data.get('avg_engagement_time', 0):.0f}s" + ) print(f" Bounce Rate: {page_data.get('bounce_rate', 0):.1%}") - kw['pageviews'] = page_data['pageviews'] - kw['engagement'] = page_data.get('avg_engagement_time', 0) + kw["pageviews"] = page_data["pageviews"] + kw["engagement"] = page_data.get("avg_engagement_time", 0) except Exception as e: print(f" GA4 error: {e}") @@ -166,48 +185,51 @@ def main(): enhanced_score_result = scorer.calculate_score( keyword_data=kw, opportunity_type=OpportunityType.QUICK_WIN, - search_volume=kw.get('search_volume'), - difficulty=kw.get('difficulty'), + search_volume=kw.get("search_volume"), + difficulty=kw.get("difficulty"), serp_features=serp_features, cluster_value=50, # Default - will be enhanced with topic clustering later trend_direction=None, # Will be added with trend detection - trend_percent=None + trend_percent=None, ) - kw['enhanced_score'] = enhanced_score_result['final_score'] - kw['enhanced_priority'] = enhanced_score_result['priority'] - kw['score_breakdown'] = enhanced_score_result['score_breakdown'] - kw['primary_factor'] = enhanced_score_result['primary_factor'] + kw["enhanced_score"] = enhanced_score_result["final_score"] + kw["enhanced_priority"] = enhanced_score_result["priority"] + kw["score_breakdown"] = enhanced_score_result["score_breakdown"] + kw["primary_factor"] = enhanced_score_result["primary_factor"] print(f" Enhanced Score: {enhanced_score_result['final_score']}/100") print(f" Priority: {enhanced_score_result['priority']}") print(f" Key Factor: {enhanced_score_result['primary_factor']}") # Calculate traffic potential - if kw.get('position') and kw.get('impressions'): + if kw.get("position") and kw.get("impressions"): traffic_potential = scorer.calculate_potential_traffic( - current_position=kw['position'], + current_position=kw["position"], target_position=7, # Target middle of page 1 - impressions=kw['impressions'], - current_clicks=kw['clicks'] + impressions=kw["impressions"], + current_clicks=kw["clicks"], + ) + kw["traffic_potential"] = traffic_potential + print( + f" Potential: +{traffic_potential['additional_clicks']} clicks/month (+{traffic_potential['percent_increase']:.0f}%)" ) - kw['traffic_potential'] = traffic_potential - print(f" Potential: +{traffic_potential['additional_clicks']} clicks/month (+{traffic_potential['percent_increase']:.0f}%)") # Analyze search intent if serp_features: try: intent_result = intent_analyzer.analyze( - keyword=keyword, - serp_features=serp_features + keyword=keyword, serp_features=serp_features ) # Handle SearchIntent enum or string - primary_intent = intent_result.get('primary_intent', 'unknown') - if hasattr(primary_intent, 'value'): + primary_intent = intent_result.get("primary_intent", "unknown") + if hasattr(primary_intent, "value"): primary_intent = primary_intent.value - kw['search_intent'] = str(primary_intent) - kw['intent_confidence'] = float(intent_result.get('confidence', 0)) - print(f" Search Intent: {kw['search_intent']} (confidence: {kw['intent_confidence']:.0f}%)") + kw["search_intent"] = str(primary_intent) + kw["intent_confidence"] = float(intent_result.get("confidence", 0)) + print( + f" Search Intent: {kw['search_intent']} (confidence: {kw['intent_confidence']:.0f}%)" + ) except Exception as e: print(f" Intent analysis error: {e}") @@ -215,13 +237,13 @@ def main(): recommendation = generate_recommendation(kw) print(f" {recommendation}") - kw['recommendation'] = recommendation + kw["recommendation"] = recommendation detailed_opportunities.append(kw) # Re-sort by enhanced score detailed_opportunities.sort( - key=lambda x: x.get('enhanced_score', x.get('opportunity_score', 0)), - reverse=True + key=lambda x: x.get("enhanced_score", x.get("opportunity_score", 0)), + reverse=True, ) # Generate summary report @@ -229,9 +251,11 @@ def main(): print("SUMMARY REPORT") print("=" * 80) - total_impressions = sum(k['impressions'] for k in detailed_opportunities) - total_clicks = sum(k['clicks'] for k in detailed_opportunities) - avg_position = sum(k['position'] for k in detailed_opportunities) / len(detailed_opportunities) + total_impressions = sum(k["impressions"] for k in detailed_opportunities) + total_clicks = sum(k["clicks"] for k in detailed_opportunities) + avg_position = sum(k["position"] for k in detailed_opportunities) / len( + detailed_opportunities + ) print(f"\nQuick Wins Identified: {len(detailed_opportunities)}") print(f"Total Impressions: {total_impressions:,}") @@ -256,19 +280,25 @@ def main(): for i, kw in enumerate(detailed_opportunities[:3], 1): print(f"\n{i}. {kw['keyword']}") print(f" Position {kw['position']} → Target: 5-7") - print(f" Current: {kw['clicks']} clicks → Potential: {int(kw['impressions'] * 0.055)} clicks") - if kw.get('ranking_url'): + print( + f" Current: {kw['clicks']} clicks → Potential: {int(kw['impressions'] * 0.055)} clicks" + ) + if kw.get("ranking_url"): print(f" URL: {kw['ranking_url']}") # Write to markdown file - print(f"\n\n4. Writing report to research/quick-wins-{datetime.now().strftime('%Y-%m-%d')}.md...") + print( + f"\n\n4. Writing report to research/quick-wins-{datetime.now().strftime('%Y-%m-%d')}.md..." + ) write_markdown_report(detailed_opportunities) print("\n" + "=" * 80) print("✅ RESEARCH COMPLETE") print("=" * 80) print(f"\nNext steps:") - print(f"1. Review detailed report: research/quick-wins-{datetime.now().strftime('%Y-%m-%d')}.md") + print( + f"1. Review detailed report: research/quick-wins-{datetime.now().strftime('%Y-%m-%d')}.md" + ) print(f"2. Prioritize top 3-5 keywords to target first") print(f"3. Update content-priorities.md with findings") print(f"4. Start content optimization with top priority") @@ -276,9 +306,9 @@ def main(): def generate_recommendation(kw): """Generate action recommendation based on keyword data""" - position = kw['position'] - impressions = kw['impressions'] - clicks = kw['clicks'] + position = kw["position"] + impressions = kw["impressions"] + clicks = kw["clicks"] ctr = (clicks / impressions * 100) if impressions > 0 else 0 recommendations = [] @@ -305,20 +335,24 @@ def generate_recommendation(kw): if position > 15: recommendations.append("Consider: Add 500+ words, update examples, add visuals") else: - recommendations.append("Consider: Refresh intro, update stats, improve formatting") + recommendations.append( + "Consider: Refresh intro, update stats, improve formatting" + ) return " | ".join(recommendations) def write_markdown_report(opportunities): """Write detailed markdown report""" - date_str = datetime.now().strftime('%Y-%m-%d') + date_str = datetime.now().strftime("%Y-%m-%d") filename = f"research/quick-wins-{date_str}.md" - with open(filename, 'w') as f: + with open(filename, "w") as f: f.write(f"# Quick Win Opportunities\n\n") f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n") - f.write(f"**Strategy:** Target keywords ranking positions 11-20 (page 2) to push to page 1\n\n") + f.write( + f"**Strategy:** Target keywords ranking positions 11-20 (page 2) to push to page 1\n\n" + ) f.write(f"**Opportunities Found:** {len(opportunities)}\n\n") f.write("---\n\n") @@ -330,40 +364,54 @@ def write_markdown_report(opportunities): f.write(f"- **Impressions (30d):** {kw['impressions']:,}\n") f.write(f"- **Clicks (30d):** {kw['clicks']}\n") f.write(f"- **CTR:** {kw['ctr'] * 100:.2f}%\n") - f.write(f"- **Commercial Intent:** {kw['commercial_intent_category']} ({kw['commercial_intent']}/3.0)\n") - if kw.get('search_intent'): - f.write(f"- **Search Intent:** {kw['search_intent']} ({kw.get('intent_confidence', 0):.0f}% confidence)\n") + f.write( + f"- **Commercial Intent:** {kw['commercial_intent_category']} ({kw['commercial_intent']}/3.0)\n" + ) + if kw.get("search_intent"): + f.write( + f"- **Search Intent:** {kw['search_intent']} ({kw.get('intent_confidence', 0):.0f}% confidence)\n" + ) f.write(f"\n### Enhanced Opportunity Analysis\n\n") - f.write(f"- **Enhanced Score:** {kw.get('enhanced_score', kw['opportunity_score']):.2f}/100\n") - f.write(f"- **Priority:** {kw.get('enhanced_priority', kw['priority']).upper()}\n") + f.write( + f"- **Enhanced Score:** {kw.get('enhanced_score', kw['opportunity_score']):.2f}/100\n" + ) + f.write( + f"- **Priority:** {kw.get('enhanced_priority', kw['priority']).upper()}\n" + ) f.write(f"- **Key Factor:** {kw.get('primary_factor', 'volume')}\n\n") - if kw.get('score_breakdown'): + if kw.get("score_breakdown"): f.write(f"**Score Breakdown:**\n") - breakdown = kw['score_breakdown'] + breakdown = kw["score_breakdown"] f.write(f"- Volume: {breakdown.get('volume_score', 0):.0f}/100\n") f.write(f"- Position: {breakdown.get('position_score', 0):.0f}/100\n") f.write(f"- Intent: {breakdown.get('intent_score', 0):.0f}/100\n") - f.write(f"- Competition: {breakdown.get('competition_score', 0):.0f}/100\n") + f.write( + f"- Competition: {breakdown.get('competition_score', 0):.0f}/100\n" + ) f.write(f"- CTR Opportunity: {breakdown.get('ctr_score', 0):.0f}/100\n") f.write(f"\n") - if kw.get('traffic_potential'): - tp = kw['traffic_potential'] + if kw.get("traffic_potential"): + tp = kw["traffic_potential"] f.write(f"### Traffic Potential\n\n") - f.write(f"- **Current:** {tp['current_clicks']} clicks/month at position {tp['current_position']:.1f}\n") + f.write( + f"- **Current:** {tp['current_clicks']} clicks/month at position {tp['current_position']:.1f}\n" + ) f.write(f"- **Target:** Position {tp['target_position']} (page 1)\n") f.write(f"- **Potential:** {tp['potential_clicks']} clicks/month\n") - f.write(f"- **Gain:** +{tp['additional_clicks']} clicks (+{tp['percent_increase']:.0f}%)\n\n") + f.write( + f"- **Gain:** +{tp['additional_clicks']} clicks (+{tp['percent_increase']:.0f}%)\n\n" + ) - if kw.get('ranking_url'): + if kw.get("ranking_url"): f.write(f"### Ranking Page\n\n") f.write(f"- **URL:** {kw['ranking_url']}\n") - if kw.get('dfs_position'): + if kw.get("dfs_position"): f.write(f"- **DataForSEO Position:** {kw['dfs_position']}\n") - if kw.get('search_volume'): + if kw.get("search_volume"): f.write(f"- **Search Volume:** {kw['search_volume']:,}/month\n") - if kw.get('pageviews'): + if kw.get("pageviews"): f.write(f"- **Pageviews (30d):** {kw['pageviews']:,}\n") f.write(f"- **Avg. Engagement:** {kw['engagement']:.0f}s\n") f.write("\n") @@ -384,8 +432,8 @@ def write_markdown_report(opportunities): f.write("---\n\n") # Summary section - total_impressions = sum(k['impressions'] for k in opportunities) - total_clicks = sum(k['clicks'] for k in opportunities) + total_impressions = sum(k["impressions"] for k in opportunities) + total_clicks = sum(k["clicks"] for k in opportunities) estimated_potential = int(total_impressions * 0.055) f.write(f"## Summary\n\n") @@ -393,16 +441,22 @@ def write_markdown_report(opportunities): f.write(f"**Combined Metrics:**\n") f.write(f"- Total Impressions: {total_impressions:,}\n") f.write(f"- Total Current Clicks: {total_clicks}\n") - f.write(f"- Current CTR: {(total_clicks/total_impressions*100):.2f}%\n\n") + f.write(f"- Current CTR: {(total_clicks / total_impressions * 100):.2f}%\n\n") f.write(f"**Potential Impact:**\n") f.write(f"- Target CTR (page 1): 5.5%\n") f.write(f"- Potential Total Clicks: {estimated_potential}\n") f.write(f"- Additional Clicks: +{estimated_potential - total_clicks}\n\n") f.write(f"## Next Steps\n\n") - f.write(f"1. **Prioritize:** Start with top 3-5 keywords (highest opportunity score)\n") - f.write(f"2. **Research:** Analyze top-ranking competitor content for each keyword\n") - f.write(f"3. **Optimize:** Update existing content or create comprehensive new content\n") + f.write( + f"1. **Prioritize:** Start with top 3-5 keywords (highest opportunity score)\n" + ) + f.write( + f"2. **Research:** Analyze top-ranking competitor content for each keyword\n" + ) + f.write( + f"3. **Optimize:** Update existing content or create comprehensive new content\n" + ) f.write(f"4. **Monitor:** Track position changes weekly\n") f.write(f"5. **Iterate:** Continue with next set of opportunities\n\n") diff --git a/tests/test_research_quick_wins_helpers.py b/tests/test_research_quick_wins_helpers.py new file mode 100644 index 0000000..424011c --- /dev/null +++ b/tests/test_research_quick_wins_helpers.py @@ -0,0 +1,90 @@ +import importlib.util +import sys +import types +import unittest +from pathlib import Path + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "research_quick_wins.py" + + +def load_research_quick_wins_module(): + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + + modules_pkg = types.ModuleType("modules") + + google_search_console = types.ModuleType("modules.google_search_console") + google_search_console.GoogleSearchConsole = object + dataforseo = types.ModuleType("modules.dataforseo") + dataforseo.DataForSEO = object + google_analytics = types.ModuleType("modules.google_analytics") + google_analytics.GoogleAnalytics = object + opportunity_scorer = types.ModuleType("modules.opportunity_scorer") + opportunity_scorer.OpportunityScorer = object + opportunity_scorer.OpportunityType = object + search_intent = types.ModuleType("modules.search_intent_analyzer") + search_intent.SearchIntentAnalyzer = object + + injected = { + "dotenv": fake_dotenv, + "modules": modules_pkg, + "modules.google_search_console": google_search_console, + "modules.dataforseo": dataforseo, + "modules.google_analytics": google_analytics, + "modules.opportunity_scorer": opportunity_scorer, + "modules.search_intent_analyzer": search_intent, + } + previous = {name: sys.modules.get(name) for name in injected} + sys.modules.update(injected) + try: + spec = importlib.util.spec_from_file_location( + "research_quick_wins_under_test", MODULE_PATH + ) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load {MODULE_PATH}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + finally: + for name, value in previous.items(): + if value is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = value + + +class QuickWinsHelperTests(unittest.TestCase): + def test_get_first_ranking_returns_first_dict_only(self): + module = load_research_quick_wins_module() + + self.assertEqual(module.get_first_ranking([{"position": 11}]), {"position": 11}) + self.assertIsNone(module.get_first_ranking([])) + self.assertIsNone(module.get_first_ranking([None])) + + def test_get_serp_features_returns_empty_list_on_provider_error(self): + module = load_research_quick_wins_module() + + class FakeDFS: + def get_serp_data(self, keyword, limit=10): + raise RuntimeError("api down") + + self.assertEqual( + module.get_serp_features(FakeDFS(), "podcast monetization"), [] + ) + + def test_get_serp_features_extracts_feature_list(self): + module = load_research_quick_wins_module() + + class FakeDFS: + def get_serp_data(self, keyword, limit=10): + return {"features": ["people_also_ask", "featured_snippet"]} + + self.assertEqual( + module.get_serp_features(FakeDFS(), "podcast monetization"), + ["people_also_ask", "featured_snippet"], + ) + + +if __name__ == "__main__": + unittest.main()