@@ -563,6 +563,168 @@ logging:
563
563
# 6. Exponential backoff for failing endpoints
564
564
```
565
565
566
+ ## Filtering Examples
567
+
568
+ Examples showing profile and model filtering capabilities. See [ Filter Concepts] ( filters.md ) for detailed pattern syntax.
569
+
570
+ ### Specialized Embedding Service
571
+
572
+ Configure endpoints to serve only embedding models:
573
+
574
+ ``` yaml
575
+ server :
576
+ port : 40114
577
+
578
+ proxy :
579
+ engine : " sherpa"
580
+ load_balancer : " priority"
581
+ # Only load profiles that support embeddings
582
+ profile_filter :
583
+ include :
584
+ - " ollama"
585
+ - " openai*"
586
+ exclude :
587
+ - " lm-studio" # Doesn't have good embedding support
588
+
589
+ discovery :
590
+ static :
591
+ endpoints :
592
+ - url : " http://localhost:11434"
593
+ name : " embedding-server"
594
+ type : " ollama"
595
+ priority : 100
596
+ model_filter :
597
+ include :
598
+ - " *embed*" # Embedding models
599
+ - " bge-*" # BGE models
600
+ - " e5-*" # E5 models
601
+ - " nomic-*" # Nomic models
602
+ exclude :
603
+ - " *test*" # No test models
604
+ ` ` `
605
+
606
+ ### Production Chat Service
607
+
608
+ Filter out experimental and inappropriate models:
609
+
610
+ ` ` ` yaml
611
+ proxy :
612
+ engine : " olla"
613
+ load_balancer : " least-connections"
614
+ # Exclude test/debug profiles
615
+ profile_filter :
616
+ exclude :
617
+ - " *test*"
618
+ - " *debug*"
619
+
620
+ discovery :
621
+ static :
622
+ endpoints :
623
+ - url : " http://prod-gpu-1:11434"
624
+ name : " prod-chat-1"
625
+ type : " ollama"
626
+ priority : 100
627
+ model_filter :
628
+ include :
629
+ - " llama*" # Llama family
630
+ - " mistral*" # Mistral family
631
+ - " qwen*" # Qwen family
632
+ exclude :
633
+ - " *uncensored*" # No uncensored models
634
+ - " *test*" # No test models
635
+ - " *debug*" # No debug models
636
+ - " *embed*" # No embedding models
637
+
638
+ - url : " http://prod-gpu-2:11434"
639
+ name : " prod-chat-2"
640
+ type : " ollama"
641
+ priority : 100
642
+ model_filter :
643
+ # Same filters for consistency
644
+ include : ["llama*", "mistral*", "qwen*"]
645
+ exclude : ["*uncensored*", "*test*", "*debug*", "*embed*"]
646
+ ` ` `
647
+
648
+ ### Mixed Workload with Different Endpoints
649
+
650
+ Different model types on different endpoints:
651
+
652
+ ` ` ` yaml
653
+ discovery :
654
+ static :
655
+ endpoints :
656
+ # Code generation endpoint
657
+ - url : " http://code-server:11434"
658
+ name : " code-gen"
659
+ type : " ollama"
660
+ priority : 100
661
+ model_filter :
662
+ include :
663
+ - " *code*" # Code models
664
+ - " deepseek-coder*"
665
+ - " codellama*"
666
+ - " starcoder*"
667
+
668
+ # General chat endpoint
669
+ - url : " http://chat-server:11434"
670
+ name : " chat"
671
+ type : " ollama"
672
+ priority : 90
673
+ model_filter :
674
+ include :
675
+ - " *chat*" # Chat models
676
+ - " *instruct*" # Instruction models
677
+ exclude :
678
+ - " *code*" # No code models
679
+ - " *embed*" # No embeddings
680
+
681
+ # Vision endpoint
682
+ - url : " http://vision-server:11434"
683
+ name : " vision"
684
+ type : " ollama"
685
+ priority : 80
686
+ model_filter :
687
+ include :
688
+ - " *vision*" # Vision models
689
+ - " llava*" # LLaVA models
690
+ - " *clip*" # CLIP models
691
+ ` ` `
692
+
693
+ ### Resource-Constrained Environment
694
+
695
+ Filter by model size:
696
+
697
+ ` ` ` yaml
698
+ discovery :
699
+ static :
700
+ endpoints :
701
+ # Small GPU - only small models
702
+ - url : " http://small-gpu:11434"
703
+ name : " small-models"
704
+ type : " ollama"
705
+ priority : 100
706
+ model_filter :
707
+ include :
708
+ - " *-3b*" # 3B models
709
+ - " *-7b*" # 7B models
710
+ - " *-8b*" # 8B models
711
+ exclude :
712
+ - " *-13b*" # Nothing larger
713
+ - " *-34b*"
714
+ - " *-70b*"
715
+
716
+ # Large GPU - only large models
717
+ - url : " http://large-gpu:11434"
718
+ name : " large-models"
719
+ type : " ollama"
720
+ priority : 50
721
+ model_filter :
722
+ include :
723
+ - " *-34b*" # 34B+ models
724
+ - " *-70b*"
725
+ - " *-72b*"
726
+ ` ` `
727
+
566
728
## Environment Variables Override
567
729
568
730
Example showing environment variable overrides:
0 commit comments