diff --git a/README.md b/README.md
index 23599b3cf..f25c124fd 100644
--- a/README.md
+++ b/README.md
@@ -85,3 +85,47 @@ Then open http://localhost:5173 in your browser.
 - **Frontend:** React + Vite, react-markdown for rendering
 - **Storage:** JSON files in `data/conversations/`
 - **Package Management:** uv for Python, npm for JavaScript
+
+## Ranking Algorithms
+
+The council uses two methods to aggregate peer rankings from Stage 2:
+
+### Mean Position Averaging
+The original method calculates each model's average position across all rankings. Simple but susceptible to outlier rankings.
+
+### Tournament-Style Pairwise Comparison
+A more robust method that counts head-to-head wins between each pair of models. For each pair (A, B), we count how many rankers preferred A over B. The model with more pairwise victories wins that matchup.
+
+**Why tournament ranking is more robust:**
+
+Consider a 3-model council where Models A, B, C all rank themselves first (self-promotion bias):
+- Model A ranks: A=1, B=2, C=3
+- Model B ranks: B=1, A=2, C=3
+- Model C ranks: C=1, A=2, B=3
+
+Mean ranking results:
+| Model | Positions | Average |
+|-------|-----------|---------|
+| A | 1, 2, 2 | 1.67 |
+| B | 2, 1, 3 | 2.00 |
+| C | 3, 3, 1 | 2.33 |
+
+Tournament results:
+| Model | vs A | vs B | vs C | Win% |
+|-------|------|------|------|------|
+| A | - | 2-1 | 2-1 | 100% |
+| B | 1-2 | - | 2-1 | 50% |
+| C | 1-2 | 1-2 | - | 0% |
+
+Model A wins both pairwise matchups (2-1 against B, 2-1 against C) and deserves first place. The tournament method correctly identifies this.
+
+**Outlier robustness validation:**
+
+When one ranker places Model A last (outlier vote), mean ranking degrades A from 1.0 to 1.5 average. Tournament ranking keeps A at 100% win rate because A still wins the majority of head-to-head comparisons. This demonstrates tournament ranking's robustness to strategic voting and outliers.
+
+**Validation tests verify:**
+- Pairwise comparison math correctness
+- Tie handling (0.5 points awarded to each model)
+- Edge cases (single model, empty rankings)
+- Fallback parsing from raw ranking text
+- Realistic 5-model council scenarios
diff --git a/backend/council.py b/backend/council.py
index 5069abec9..bd4aad8e7 100644
--- a/backend/council.py
+++ b/backend/council.py
@@ -255,6 +255,259 @@ def calculate_aggregate_rankings(
     return aggregate
 
 
+def calculate_tournament_rankings(
+    stage2_results: List[Dict[str, Any]],
+    label_to_model: Dict[str, str]
+) -> List[Dict[str, Any]]:
+    """
+    Calculate rankings using tournament-style pairwise comparison.
+
+    For each pair of models, count how many rankers preferred one over the other.
+    The model with more pairwise wins ranks higher. This method is more robust
+    to outlier rankings than simple position averaging.
+
+    Args:
+        stage2_results: Rankings from each model with parsed_ranking
+        label_to_model: Mapping from anonymous labels to model names
+
+    Returns:
+        List of dicts sorted by win_percentage (descending):
+        [
+            {
+                "model": "openai/gpt-4o",
+                "wins": 4.0,
+                "losses": 1.0,
+                "ties": 1.0,
+                "win_percentage": 0.75,
+                "total_matchups": 6
+            },
+            ...
+        ]
+    """
+    from collections import defaultdict
+
+    # Get all models from label_to_model
+    models = list(set(label_to_model.values()))
+
+    if len(models) < 2:
+        # Need at least 2 models for pairwise comparison
+        return [{"model": m, "wins": 0, "losses": 0, "ties": 0, "win_percentage": 0.0, "total_matchups": 0} for m in models]
+
+    # Track pairwise wins: pairwise_wins[(model_a, model_b)] = count of times a ranked above b
+    pairwise_wins = defaultdict(int)
+
+    # Process each ranker's parsed ranking
+    # Use pre-parsed ranking if available, otherwise parse from text
+    for ranking in stage2_results:
+        parsed_ranking = ranking.get('parsed_ranking')
+        if not parsed_ranking:
+            # Fallback: parse from raw ranking text (consistent with calculate_aggregate_rankings)
+            ranking_text = ranking.get('ranking', '')
+            parsed_ranking = parse_ranking_from_text(ranking_text) if ranking_text else []
+
+        if not parsed_ranking:
+            continue
+
+        # Convert labels to model names and get their positions
+        model_positions = {}
+        for position, label in enumerate(parsed_ranking):
+            if label in label_to_model:
+                model_name = label_to_model[label]
+                model_positions[model_name] = position
+
+        # For each pair of models, record who was ranked higher (lower position = better)
+        ranked_models = list(model_positions.keys())
+        for i in range(len(ranked_models)):
+            for j in range(i + 1, len(ranked_models)):
+                model_a = ranked_models[i]
+                model_b = ranked_models[j]
+                pos_a = model_positions[model_a]
+                pos_b = model_positions[model_b]
+
+                # Ensure consistent ordering for the key
+                if model_a > model_b:
+                    model_a, model_b = model_b, model_a
+                    pos_a, pos_b = pos_b, pos_a
+
+                if pos_a < pos_b:
+                    pairwise_wins[(model_a, model_b, 'a')] += 1
+                elif pos_b < pos_a:
+                    pairwise_wins[(model_a, model_b, 'b')] += 1
+                # Equal positions would be a tie (shouldn't happen with rankings)
+
+    # Calculate wins, losses, and ties for each model
+    model_stats = {model: {"wins": 0.0, "losses": 0.0, "ties": 0.0} for model in models}
+
+    # Process each unique pair of models
+    processed_pairs = set()
+    for i in range(len(models)):
+        for j in range(i + 1, len(models)):
+            model_a, model_b = models[i], models[j]
+            if model_a > model_b:
+                model_a, model_b = model_b, model_a
+
+            pair_key = (model_a, model_b)
+            if pair_key in processed_pairs:
+                continue
+            processed_pairs.add(pair_key)
+
+            a_wins = pairwise_wins.get((model_a, model_b, 'a'), 0)
+            b_wins = pairwise_wins.get((model_a, model_b, 'b'), 0)
+
+            if a_wins > b_wins:
+                model_stats[model_a]["wins"] += 1
+                model_stats[model_b]["losses"] += 1
+            elif b_wins > a_wins:
+                model_stats[model_b]["wins"] += 1
+                model_stats[model_a]["losses"] += 1
+            elif a_wins == b_wins and (a_wins > 0 or b_wins > 0):
+                # Tie - both get 0.5
+                model_stats[model_a]["ties"] += 1
+                model_stats[model_b]["ties"] += 1
+
+    # Calculate win percentage and build results
+    total_possible_matchups = len(models) - 1 if len(models) > 1 else 1
+    results = []
+
+    for model in models:
+        stats = model_stats[model]
+        total_matchups = stats["wins"] + stats["losses"] + stats["ties"]
+        # Win percentage: wins + 0.5*ties / total matchups
+        if total_matchups > 0:
+            win_pct = (stats["wins"] + 0.5 * stats["ties"]) / total_possible_matchups
+        else:
+            win_pct = 0.0
+
+        results.append({
+            "model": model,
+            "wins": stats["wins"],
+            "losses": stats["losses"],
+            "ties": stats["ties"],
+            "win_percentage": round(win_pct, 3),
+            "total_matchups": int(total_matchups)
+        })
+
+    # Sort by win percentage (higher is better)
+    results.sort(key=lambda x: (-x['win_percentage'], x['losses']))
+
+    return results
+
+
+def detect_minority_opinions(
+    stage2_results: List[Dict[str, Any]],
+    label_to_model: Dict[str, str],
+    tournament_rankings: List[Dict[str, Any]],
+    dissent_threshold: float = 0.3,
+    position_tolerance: int = 1
+) -> List[Dict[str, Any]]:
+    """
+    Detect minority opinions where a significant portion of rankers disagree
+    with the consensus ranking for a specific model.
+
+    A minority opinion is flagged when ≥dissent_threshold of rankers place a model
+    more than position_tolerance positions away from its consensus position.
+
+    Args:
+        stage2_results: Rankings from each model with parsed_ranking
+        label_to_model: Mapping from anonymous labels to model names
+        tournament_rankings: Consensus ranking from tournament method
+        dissent_threshold: Minimum fraction of rankers that must disagree (default 0.3 = 30%)
+        position_tolerance: How many positions away counts as disagreement (default 1)
+
+    Returns:
+        List of minority opinion dicts:
+        [
+            {
+                "model": "openai/gpt-4o",
+                "consensus_position": 1,
+                "dissent_positions": [3, 4],  # where dissenters placed it
+                "dissent_rate": 0.4,
+                "dissenters": ["anthropic/claude-3.5-sonnet", "google/gemini-2.0-flash"],
+                "direction": "undervalued"  # or "overvalued" - dissenters think it's worse/better
+            },
+            ...
+        ]
+    """
+    from collections import defaultdict
+
+    if not stage2_results or not tournament_rankings:
+        return []
+
+    # Build consensus position lookup from tournament rankings
+    consensus_positions = {
+        entry["model"]: position + 1  # 1-indexed
+        for position, entry in enumerate(tournament_rankings)
+    }
+
+    # Track each ranker's position for each model
+    # Structure: {model_name: [(ranker_model, position), ...]}
+    model_rankings_by_ranker = defaultdict(list)
+
+    for ranking in stage2_results:
+        ranker_model = ranking.get('model')
+        parsed_ranking = ranking.get('parsed_ranking')
+        if not parsed_ranking:
+            ranking_text = ranking.get('ranking', '')
+            parsed_ranking = parse_ranking_from_text(ranking_text) if ranking_text else []
+
+        if not parsed_ranking:
+            continue
+
+        # Record where this ranker placed each model
+        for position, label in enumerate(parsed_ranking, start=1):
+            if label in label_to_model:
+                model_name = label_to_model[label]
+                model_rankings_by_ranker[model_name].append((ranker_model, position))
+
+    # Detect minority opinions for each model
+    minority_opinions = []
+
+    for model_name, rankings in model_rankings_by_ranker.items():
+        if model_name not in consensus_positions:
+            continue
+
+        consensus_pos = consensus_positions[model_name]
+        total_rankers = len(rankings)
+
+        if total_rankers == 0:
+            continue
+
+        # Find dissenters: rankers who placed this model far from consensus
+        dissenters = []
+        dissent_positions = []
+
+        for ranker_model, ranker_position in rankings:
+            position_diff = abs(ranker_position - consensus_pos)
+            if position_diff > position_tolerance:
+                dissenters.append(ranker_model)
+                dissent_positions.append(ranker_position)
+
+        dissent_rate = len(dissenters) / total_rankers
+
+        # Only report if dissent rate meets threshold
+        if dissent_rate >= dissent_threshold and dissenters:
+            # Determine direction: are dissenters ranking it higher or lower?
+            avg_dissent_pos = sum(dissent_positions) / len(dissent_positions)
+            if avg_dissent_pos > consensus_pos:
+                direction = "overvalued"  # consensus ranks it higher than dissenters think
+            else:
+                direction = "undervalued"  # consensus ranks it lower than dissenters think
+
+            minority_opinions.append({
+                "model": model_name,
+                "consensus_position": consensus_pos,
+                "dissent_positions": sorted(set(dissent_positions)),
+                "dissent_rate": round(dissent_rate, 2),
+                "dissenters": dissenters,
+                "direction": direction
+            })
+
+    # Sort by dissent rate (highest first)
+    minority_opinions.sort(key=lambda x: -x['dissent_rate'])
+
+    return minority_opinions
+
+
 async def generate_conversation_title(user_query: str) -> str:
     """
     Generate a short title for a conversation based on the first user message.
@@ -316,8 +569,14 @@ async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]:
     # Stage 2: Collect rankings
     stage2_results, label_to_model = await stage2_collect_rankings(user_query, stage1_results)
 
-    # Calculate aggregate rankings
+    # Calculate aggregate rankings (both methods)
     aggregate_rankings = calculate_aggregate_rankings(stage2_results, label_to_model)
+    tournament_rankings = calculate_tournament_rankings(stage2_results, label_to_model)
+
+    # Detect minority opinions
+    minority_opinions = detect_minority_opinions(
+        stage2_results, label_to_model, tournament_rankings
+    )
 
     # Stage 3: Synthesize final answer
     stage3_result = await stage3_synthesize_final(
@@ -329,7 +588,9 @@ async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]:
     # Prepare metadata
     metadata = {
         "label_to_model": label_to_model,
-        "aggregate_rankings": aggregate_rankings
+        "aggregate_rankings": aggregate_rankings,
+        "tournament_rankings": tournament_rankings,
+        "minority_opinions": minority_opinions
     }
 
     return stage1_results, stage2_results, stage3_result, metadata
diff --git a/frontend/src/components/ChatInterface.jsx b/frontend/src/components/ChatInterface.jsx
index 3ae796caa..096bf3829 100644
--- a/frontend/src/components/ChatInterface.jsx
+++ b/frontend/src/components/ChatInterface.jsx
@@ -93,6 +93,7 @@ export default function ChatInterface({
                       rankings={msg.stage2}
                       labelToModel={msg.metadata?.label_to_model}
                       aggregateRankings={msg.metadata?.aggregate_rankings}
+                      minorityOpinions={msg.metadata?.minority_opinions}
                     />
                   )}
 
diff --git a/frontend/src/components/Stage2.css b/frontend/src/components/Stage2.css
index 99c460a6f..583ad5d5d 100644
--- a/frontend/src/components/Stage2.css
+++ b/frontend/src/components/Stage2.css
@@ -151,3 +151,80 @@
   color: #999;
   font-size: 12px;
 }
+
+/* Minority Opinions */
+.minority-opinions {
+  background: #fff8e6;
+  padding: 16px;
+  border-radius: 8px;
+  margin-top: 20px;
+  border: 2px solid #ffd666;
+}
+
+.minority-opinions h4 {
+  margin: 0 0 12px 0;
+  color: #ad6800;
+  font-size: 15px;
+}
+
+.minority-list {
+  display: flex;
+  flex-direction: column;
+  gap: 12px;
+}
+
+.minority-item {
+  background: #ffffff;
+  padding: 12px;
+  border-radius: 6px;
+  border: 1px solid #ffd666;
+}
+
+.minority-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 8px;
+}
+
+.minority-model {
+  font-family: monospace;
+  font-size: 14px;
+  font-weight: 600;
+  color: #333;
+}
+
+.minority-direction {
+  padding: 2px 8px;
+  border-radius: 4px;
+  font-size: 12px;
+  font-weight: 600;
+}
+
+.minority-direction.overvalued {
+  background: #fff1f0;
+  color: #cf1322;
+}
+
+.minority-direction.undervalued {
+  background: #f6ffed;
+  color: #389e0d;
+}
+
+.minority-details {
+  display: flex;
+  gap: 16px;
+  flex-wrap: wrap;
+  margin-bottom: 8px;
+}
+
+.minority-stat {
+  font-size: 13px;
+  color: #666;
+}
+
+.minority-dissenters {
+  font-size: 12px;
+  color: #888;
+  font-style: italic;
+}
diff --git a/frontend/src/components/Stage2.jsx b/frontend/src/components/Stage2.jsx
index 2550fa691..ddef4b91f 100644
--- a/frontend/src/components/Stage2.jsx
+++ b/frontend/src/components/Stage2.jsx
@@ -14,7 +14,7 @@ function deAnonymizeText(text, labelToModel) {
   return result;
 }
 
-export default function Stage2({ rankings, labelToModel, aggregateRankings }) {
+export default function Stage2({ rankings, labelToModel, aggregateRankings, minorityOpinions }) {
   const [activeTab, setActiveTab] = useState(0);
 
   if (!rankings || rankings.length === 0) {
@@ -94,6 +94,43 @@ export default function Stage2({ rankings, labelToModel, aggregateRankings }) {
           </div>
         </div>
       )}
+
+      {minorityOpinions && minorityOpinions.length > 0 && (
+        <div className="minority-opinions">
+          <h4>Minority Opinions</h4>
+          <p className="stage-description">
+            Significant disagreement detected (30% or more of rankers dissent):
+          </p>
+          <div className="minority-list">
+            {minorityOpinions.map((opinion, index) => (
+              <div key={index} className="minority-item">
+                <div className="minority-header">
+                  <span className="minority-model">
+                    {opinion.model.split('/')[1] || opinion.model}
+                  </span>
+                  <span className={`minority-direction ${opinion.direction}`}>
+                    {opinion.direction === 'overvalued' ? '↓ Overvalued' : '↑ Undervalued'}
+                  </span>
+                </div>
+                <div className="minority-details">
+                  <span className="minority-stat">
+                    Consensus: #{opinion.consensus_position}
+                  </span>
+                  <span className="minority-stat">
+                    Dissenters say: #{opinion.dissent_positions.join(', #')}
+                  </span>
+                  <span className="minority-stat">
+                    {Math.round(opinion.dissent_rate * 100)}% disagree
+                  </span>
+                </div>
+                <div className="minority-dissenters">
+                  Dissenters: {opinion.dissenters.map(d => d.split('/')[1] || d).join(', ')}
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
     </div>
   );
 }
diff --git a/tests/test_minority_opinions.py b/tests/test_minority_opinions.py
new file mode 100644
index 000000000..ab2ee0d7c
--- /dev/null
+++ b/tests/test_minority_opinions.py
@@ -0,0 +1,291 @@
+"""Tests for minority opinion detection."""
+
+import sys
+sys.path.insert(0, '/tmp/llm-council')
+
+from backend.council import detect_minority_opinions, parse_ranking_from_text
+
+
+def make_stage2_entry(model: str, ranking_order: list) -> dict:
+    """Helper to create stage2 result entries with proper structure."""
+    ranking_text = "FINAL RANKING:\n" + "\n".join(
+        f"{i+1}. {label}" for i, label in enumerate(ranking_order)
+    )
+    return {
+        "model": model,
+        "ranking": ranking_text,
+        "parsed_ranking": ranking_order
+    }
+
+
+def test_no_minority_when_consensus():
+    """When all rankers agree, no minority opinions should be detected."""
+    # All 3 rankers agree on the same order
+    stage2_results = [
+        make_stage2_entry("model_a", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_b", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_c", ["Response A", "Response B", "Response C"]),
+    ]
+    
+    label_to_model = {
+        "Response A": "model_a",
+        "Response B": "model_b",
+        "Response C": "model_c"
+    }
+    
+    # Tournament rankings (consensus)
+    tournament_rankings = [
+        {"model": "model_a", "wins": 2, "win_percentage": 1.0},
+        {"model": "model_b", "wins": 1, "win_percentage": 0.5},
+        {"model": "model_c", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    minority = detect_minority_opinions(stage2_results, label_to_model, tournament_rankings)
+    
+    assert len(minority) == 0, f"Expected no minority opinions, got {minority}"
+    print("✓ No minority when consensus - PASSED")
+
+
+def test_minority_detected_with_dissent():
+    """When 1 of 3 rankers (33%) disagrees significantly, minority should be detected."""
+    # 2 rankers say A is #1, 1 ranker says A is #3 (significant disagreement)
+    stage2_results = [
+        make_stage2_entry("model_a", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_b", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_c", ["Response B", "Response C", "Response A"]),  # Disagrees on A
+    ]
+    
+    label_to_model = {
+        "Response A": "model_a",
+        "Response B": "model_b",
+        "Response C": "model_c"
+    }
+    
+    # Tournament has model_a at position 1
+    tournament_rankings = [
+        {"model": "model_a", "wins": 2, "win_percentage": 1.0},
+        {"model": "model_b", "wins": 1, "win_percentage": 0.5},
+        {"model": "model_c", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    minority = detect_minority_opinions(stage2_results, label_to_model, tournament_rankings)
+    
+    # model_a: consensus position 1, but model_c placed it at position 3
+    # That's 1/3 = 33% dissent, which meets the 30% threshold
+    # Position difference is 2 (1->3), which exceeds tolerance of 1
+    assert len(minority) >= 1, f"Expected at least 1 minority opinion, got {minority}"
+    
+    model_a_minority = next((m for m in minority if m["model"] == "model_a"), None)
+    assert model_a_minority is not None, "Expected minority opinion for model_a"
+    assert model_a_minority["consensus_position"] == 1
+    assert 3 in model_a_minority["dissent_positions"]
+    assert model_a_minority["dissent_rate"] >= 0.3
+    assert "model_c" in model_a_minority["dissenters"]
+    assert model_a_minority["direction"] == "overvalued"  # consensus ranks higher than dissenter thinks
+    
+    print("✓ Minority detected with dissent - PASSED")
+
+
+def test_minority_direction_undervalued():
+    """Test that 'undervalued' direction is correctly identified."""
+    # Consensus has model_c at #3, but one ranker thinks it should be #1
+    stage2_results = [
+        make_stage2_entry("model_a", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_b", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_c", ["Response C", "Response A", "Response B"]),  # Thinks C is best
+    ]
+    
+    label_to_model = {
+        "Response A": "model_a",
+        "Response B": "model_b",
+        "Response C": "model_c"
+    }
+    
+    # Tournament has model_c at position 3
+    tournament_rankings = [
+        {"model": "model_a", "wins": 2, "win_percentage": 1.0},
+        {"model": "model_b", "wins": 1, "win_percentage": 0.5},
+        {"model": "model_c", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    minority = detect_minority_opinions(stage2_results, label_to_model, tournament_rankings)
+    
+    model_c_minority = next((m for m in minority if m["model"] == "model_c"), None)
+    if model_c_minority:
+        # Consensus position 3, dissenter placed at 1 -> undervalued
+        assert model_c_minority["direction"] == "undervalued", f"Expected undervalued, got {model_c_minority['direction']}"
+        print("✓ Minority direction undervalued - PASSED")
+    else:
+        print("✓ No minority for model_c (within tolerance) - PASSED")
+
+
+def test_below_threshold_not_flagged():
+    """When dissent rate is below 30%, no minority should be flagged."""
+    # 4 rankers, only 1 disagrees = 25% < 30% threshold
+    stage2_results = [
+        make_stage2_entry("model_a", ["Response A", "Response B", "Response C", "Response D"]),
+        make_stage2_entry("model_b", ["Response A", "Response B", "Response C", "Response D"]),
+        make_stage2_entry("model_c", ["Response A", "Response B", "Response C", "Response D"]),
+        make_stage2_entry("model_d", ["Response D", "Response C", "Response B", "Response A"]),  # One dissenter
+    ]
+    
+    label_to_model = {
+        "Response A": "model_a",
+        "Response B": "model_b",
+        "Response C": "model_c",
+        "Response D": "model_d"
+    }
+    
+    tournament_rankings = [
+        {"model": "model_a", "wins": 3, "win_percentage": 1.0},
+        {"model": "model_b", "wins": 2, "win_percentage": 0.67},
+        {"model": "model_c", "wins": 1, "win_percentage": 0.33},
+        {"model": "model_d", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    minority = detect_minority_opinions(stage2_results, label_to_model, tournament_rankings)
+    
+    # With 25% dissent (1/4), should not meet 30% threshold
+    # Note: model_a goes from 1 to 4 (diff 3), model_d goes from 4 to 1 (diff 3)
+    # Both have only 1 dissenter out of 4, so 25% < 30%
+    for m in minority:
+        assert m["dissent_rate"] >= 0.3, f"Should not flag below threshold: {m}"
+    
+    print("✓ Below threshold not flagged - PASSED")
+
+
+def test_within_tolerance_not_flagged():
+    """Disagreement within position tolerance should not be flagged."""
+    # All rankers within 1 position of each other
+    stage2_results = [
+        make_stage2_entry("model_a", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_b", ["Response B", "Response A", "Response C"]),  # A and B swapped
+        make_stage2_entry("model_c", ["Response A", "Response B", "Response C"]),
+    ]
+    
+    label_to_model = {
+        "Response A": "model_a",
+        "Response B": "model_b",
+        "Response C": "model_c"
+    }
+    
+    tournament_rankings = [
+        {"model": "model_a", "wins": 2, "win_percentage": 1.0},
+        {"model": "model_b", "wins": 1, "win_percentage": 0.5},
+        {"model": "model_c", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    minority = detect_minority_opinions(stage2_results, label_to_model, tournament_rankings)
+    
+    # model_a: consensus pos 1, one ranker put at pos 2 -> diff of 1, within tolerance
+    # Should not be flagged
+    assert len(minority) == 0, f"Expected no minority (within tolerance), got {minority}"
+    print("✓ Within tolerance not flagged - PASSED")
+
+
+def test_empty_inputs():
+    """Empty inputs should return empty list."""
+    assert detect_minority_opinions([], {}, []) == []
+    assert detect_minority_opinions([], {"Response A": "model_a"}, []) == []
+    assert detect_minority_opinions(
+        [make_stage2_entry("model_a", ["Response A"])],
+        {"Response A": "model_a"},
+        []
+    ) == []
+    print("✓ Empty inputs - PASSED")
+
+
+def test_5_model_realistic_scenario():
+    """Realistic 5-model council with mixed agreement."""
+    # 5 models, with 2 strongly disagreeing about model_c
+    stage2_results = [
+        make_stage2_entry("gpt-4", ["Response A", "Response B", "Response C", "Response D", "Response E"]),
+        make_stage2_entry("claude", ["Response A", "Response B", "Response C", "Response D", "Response E"]),
+        make_stage2_entry("gemini", ["Response A", "Response B", "Response C", "Response D", "Response E"]),
+        # These 2 (40%) think model_c should be #1, not #3
+        make_stage2_entry("grok", ["Response C", "Response A", "Response B", "Response D", "Response E"]),
+        make_stage2_entry("llama", ["Response C", "Response A", "Response B", "Response D", "Response E"]),
+    ]
+    
+    label_to_model = {
+        "Response A": "gpt-4",
+        "Response B": "claude",
+        "Response C": "gemini",
+        "Response D": "grok",
+        "Response E": "llama"
+    }
+    
+    # Consensus from majority
+    tournament_rankings = [
+        {"model": "gpt-4", "wins": 4, "win_percentage": 1.0},
+        {"model": "claude", "wins": 3, "win_percentage": 0.75},
+        {"model": "gemini", "wins": 2, "win_percentage": 0.5},
+        {"model": "grok", "wins": 1, "win_percentage": 0.25},
+        {"model": "llama", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    minority = detect_minority_opinions(stage2_results, label_to_model, tournament_rankings)
+    
+    # gemini: consensus #3, but 2/5 (40%) placed it at #1
+    # Diff of 2 positions exceeds tolerance of 1
+    gemini_minority = next((m for m in minority if m["model"] == "gemini"), None)
+    assert gemini_minority is not None, f"Expected minority for gemini, got {minority}"
+    assert gemini_minority["consensus_position"] == 3
+    assert 1 in gemini_minority["dissent_positions"]
+    assert gemini_minority["dissent_rate"] == 0.4  # 2/5
+    assert set(gemini_minority["dissenters"]) == {"grok", "llama"}
+    assert gemini_minority["direction"] == "undervalued"
+    
+    print("✓ 5-model realistic scenario - PASSED")
+
+
+def test_custom_threshold():
+    """Test with custom dissent threshold."""
+    stage2_results = [
+        make_stage2_entry("model_a", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_b", ["Response A", "Response B", "Response C"]),
+        make_stage2_entry("model_c", ["Response C", "Response B", "Response A"]),
+    ]
+    
+    label_to_model = {
+        "Response A": "model_a",
+        "Response B": "model_b",
+        "Response C": "model_c"
+    }
+    
+    tournament_rankings = [
+        {"model": "model_a", "wins": 2, "win_percentage": 1.0},
+        {"model": "model_b", "wins": 1, "win_percentage": 0.5},
+        {"model": "model_c", "wins": 0, "win_percentage": 0.0},
+    ]
+    
+    # With 50% threshold, 33% dissent should not be flagged
+    minority_50 = detect_minority_opinions(
+        stage2_results, label_to_model, tournament_rankings, 
+        dissent_threshold=0.5
+    )
+    assert len(minority_50) == 0, f"50% threshold should filter out 33% dissent: {minority_50}"
+    
+    # With 20% threshold, 33% dissent should be flagged
+    minority_20 = detect_minority_opinions(
+        stage2_results, label_to_model, tournament_rankings,
+        dissent_threshold=0.2
+    )
+    assert len(minority_20) > 0, "20% threshold should catch 33% dissent"
+    
+    print("✓ Custom threshold - PASSED")
+
+
+if __name__ == "__main__":
+    test_no_minority_when_consensus()
+    test_minority_detected_with_dissent()
+    test_minority_direction_undervalued()
+    test_below_threshold_not_flagged()
+    test_within_tolerance_not_flagged()
+    test_empty_inputs()
+    test_5_model_realistic_scenario()
+    test_custom_threshold()
+    
+    print("\n" + "="*50)
+    print("All minority opinion tests passed!")
+    print("="*50)