diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index 20f36697a7..98b2b56669 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -1016,5 +1016,42 @@ class FineTuningJobCheckpoint(BaseModel):
     """The step number that the checkpoint was created at."""
 
 
+# Args allowed in openai-like chat completions API calls in OpeaTextGenService
+ALLOWED_CHATCOMPLETION_ARGS = (
+    "model",
+    "messages",
+    "frequency_penalty",
+    "max_tokens",
+    "n",
+    "presence_penalty",
+    "response_format",
+    "seed",
+    "stop",
+    "stream",
+    "stream_options",
+    "temperature",
+    "top_p",
+    "user",
+)
+
+# Args allowed in openai-like regular completion API calls in OpeaTextGenService
+ALLOWED_COMPLETION_ARGS = (
+    "model",
+    "prompt",
+    "echo",
+    "frequency_penalty",
+    "max_tokens",
+    "n",
+    "presence_penalty",
+    "seed",
+    "stop",
+    "stream",
+    "suffix",
+    "temperature",
+    "top_p",
+    "user",
+)
+
+
 class RouteEndpointDoc(BaseModel):
     url: str = Field(..., description="URL of the chosen inference endpoint")
diff --git a/comps/llms/deployment/docker_compose/compose_text-generation.yaml b/comps/llms/deployment/docker_compose/compose_text-generation.yaml
index d20490c0d0..010e4d0355 100644
--- a/comps/llms/deployment/docker_compose/compose_text-generation.yaml
+++ b/comps/llms/deployment/docker_compose/compose_text-generation.yaml
@@ -118,6 +118,15 @@ services:
       LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME:-OpeaTextGenPredictionguard}
       PREDICTIONGUARD_API_KEY: ${PREDICTIONGUARD_API_KEY}
 
+  textgen-service-endpoint-openai:
+    extends: textgen
+    container_name: textgen-service-endpoint-openai
+    environment:
+      LLM_COMPONENT_NAME: OpeaTextGenService
+      LLM_ENDPOINT: ${LLM_ENDPOINT} # an endpoint that uses OpenAI API style e.g., https://openrouter.ai/api
+      OPENAI_API_KEY: ${OPENAI_API_KEY} # the key associated with the endpoint
+      LLM_MODEL_ID: ${LLM_MODEL_ID:-google/gemma-3-1b-it:free}
+
   textgen-native-gaudi:
     extends: textgen-gaudi
     container_name: textgen-native-gaudi
diff --git a/comps/llms/src/text-generation/README_endpoint_openai.md b/comps/llms/src/text-generation/README_endpoint_openai.md
new file mode 100644
index 0000000000..1773bd6ff1
--- /dev/null
+++ b/comps/llms/src/text-generation/README_endpoint_openai.md
@@ -0,0 +1,70 @@
+# Introduction
+
+This OPEA text generation service can connect to any OpenAI-compatible API endpoint, including local deployments (like vLLM or TGI) and remote services (like OpenRouter.ai).
+
+## 1 Prepare TextGen docker image.
+
+```bash
+# Build the microservice docker
+
+git clone https://github.com/opea-project/GenAIComps
+cd GenAIComps
+
+docker build \
+  --no-cache \
+  --build-arg https_proxy=$https_proxy \
+  --build-arg http_proxy=$http_proxy \
+  -t opea/llm-textgen:latest \
+  -f comps/llms/src/text-generation/Dockerfile .
+```
+
+## 2 Setup Environment Variables
+
+The key environment variable is `LLM_ENDPOINT`, which specifies the URL of the OpenAI-compatible API. This can be a local address (e.g., for vLLM or TGI) or a remote address.
+
+```
+export host_ip=$(hostname -I | awk '{print $1}')
+export LLM_MODEL_ID="" # e.g. "google/gemma-3-1b-it:free"
+export LLM_ENDPOINT=""  # e.g., "http://localhost:8000" (for local vLLM) or "https://openrouter.ai/api" (please make sure to omit /v1 suffix)
+export OPENAI_API_KEY=""
+```
+
+## 3 Run the Textgen Service
+
+```
+export service_name="textgen-service-endpoint-openai"
+docker compose -f comps/llms/deployment/docker_compose/compose_text-generation.yaml up ${service_name} -d
+```
+
+To observe logs:
+
+```
+docker logs textgen-service-endpoint-openai
+```
+
+## 4 Test the service
+
+You can first test the remote/local endpoint with `curl`. If you're using a service like OpenRouter, you can test it directly first:
+
+```
+curl https://openrouter.ai/api/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $OPENAI_API_KEY" \
+  -d '{
+  "model": "'${LLM_MODEL_ID}'",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Tell me a joke?"
+    }
+  ]
+}'
+```
+
+Then you can test the OPEA text generation service that wrapped the endpoint, with the following:
+
+```
+curl http://localhost:9000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model":"'${LLM_MODEL_ID}'","messages":[{"role":"user","content":"Tell me a joke?"}]}'
+```
diff --git a/comps/llms/src/text-generation/integrations/service.py b/comps/llms/src/text-generation/integrations/service.py
index 485571dc07..4979e107af 100644
--- a/comps/llms/src/text-generation/integrations/service.py
+++ b/comps/llms/src/text-generation/integrations/service.py
@@ -2,7 +2,9 @@
 # SPDX-License-Identified: Apache-2.0
 
 import asyncio
+import logging
 import os
+from pprint import pformat
 from typing import Union
 
 from fastapi.responses import StreamingResponse
@@ -11,12 +13,18 @@
 
 from comps import CustomLogger, LLMParamsDoc, OpeaComponent, OpeaComponentRegistry, SearchedDoc, ServiceType
 from comps.cores.mega.utils import ConfigError, get_access_token, load_model_configs
-from comps.cores.proto.api_protocol import ChatCompletionRequest
+from comps.cores.proto.api_protocol import ALLOWED_CHATCOMPLETION_ARGS, ALLOWED_COMPLETION_ARGS, ChatCompletionRequest
 
 from .template import ChatTemplate
 
 logger = CustomLogger("opea_llm")
-logflag = os.getenv("LOGFLAG", False)
+
+# Configure advanced logging based on LOGFLAG environment variable
+logflag = os.getenv("LOGFLAG", "False").lower() in ("true", "1", "yes")
+if logflag:
+    logger.logger.setLevel(logging.DEBUG)
+else:
+    logger.logger.setLevel(logging.INFO)
 
 # Environment variables
 MODEL_NAME = os.getenv("LLM_MODEL_ID")
@@ -96,27 +104,30 @@ async def send_simple_request():
     def align_input(
         self, input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc], prompt_template, input_variables
     ):
+        """Aligns different input types to a standardized chat completion format.
+
+        Args:
+            input: SearchedDoc, LLMParamsDoc, or ChatCompletionRequest
+            prompt_template: Optional template for formatting prompts
+            input_variables: Variables expected by the prompt template
+        """
         if isinstance(input, SearchedDoc):
-            if logflag:
-                logger.info("[ SearchedDoc ] input from retriever microservice")
+            logger.debug(f"Processing SearchedDoc input from retriever microservice:\n{pformat(vars(input), indent=2)}")
             prompt = input.initial_query
             if input.retrieved_docs:
                 docs = [doc.text for doc in input.retrieved_docs]
-                if logflag:
-                    logger.info(f"[ SearchedDoc ] combined retrieved docs: {docs}")
+                logger.debug(f"Retrieved documents:\n{pformat(docs, indent=2)}")
                 prompt = ChatTemplate.generate_rag_prompt(input.initial_query, docs, MODEL_NAME)
+                logger.debug(f"Generated RAG prompt:\n{prompt}")
 
-            ## use default ChatCompletionRequest parameters
-            new_input = ChatCompletionRequest(messages=prompt)
+                # Convert to ChatCompletionRequest with default parameters
+                new_input = ChatCompletionRequest(messages=prompt)
+                logger.debug(f"Final converted input:\n{pformat(vars(new_input), indent=2)}")
 
-            if logflag:
-                logger.info(f"[ SearchedDoc ] final input: {new_input}")
-
-            return prompt, new_input
+                return prompt, new_input
 
         elif isinstance(input, LLMParamsDoc):
-            if logflag:
-                logger.info("[ LLMParamsDoc ] input from rerank microservice")
+            logger.debug(f"Processing LLMParamsDoc input from rerank microservice:\n{pformat(vars(input), indent=2)}")
             prompt = input.query
             if prompt_template:
                 if sorted(input_variables) == ["context", "question"]:
@@ -124,8 +135,8 @@ def align_input(
                 elif input_variables == ["question"]:
                     prompt = prompt_template.format(question=input.query)
                 else:
-                    logger.info(
-                        f"[ LLMParamsDoc ] {prompt_template} not used, we only support 2 input variables ['question', 'context']"
+                    logger.warning(
+                        f"Prompt template not used - unsupported variables. Template: {prompt_template}\nOnly ['question', 'context'] or ['question'] are supported"
                     )
             else:
                 if input.documents:
@@ -145,8 +156,7 @@ def align_input(
             return prompt, new_input
 
         else:
-            if logflag:
-                logger.info("[ ChatCompletionRequest ] input in opea format")
+            logger.debug(f"Processing ChatCompletionRequest input:\n{pformat(vars(input), indent=2)}")
 
             prompt = input.messages
             if prompt_template:
@@ -179,8 +189,7 @@ async def invoke(self, input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
             input_variables = prompt_template.input_variables
 
         if isinstance(input, ChatCompletionRequest) and not isinstance(input.messages, str):
-            if logflag:
-                logger.info("[ ChatCompletionRequest ] input in opea format")
+            logger.debug("[ ChatCompletionRequest ] input in opea format")
 
             if input.messages[0]["role"] == "system":
                 if "{context}" in input.messages[0]["content"]:
@@ -200,22 +209,11 @@ async def invoke(self, input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
 
                     input.messages.insert(0, {"role": "system", "content": system_prompt})
 
-            chat_completion = await self.client.chat.completions.create(
-                model=MODEL_NAME,
-                messages=input.messages,
-                frequency_penalty=input.frequency_penalty,
-                max_tokens=input.max_tokens,
-                n=input.n,
-                presence_penalty=input.presence_penalty,
-                response_format=input.response_format,
-                seed=input.seed,
-                stop=input.stop,
-                stream=input.stream,
-                stream_options=input.stream_options,
-                temperature=input.temperature,
-                top_p=input.top_p,
-                user=input.user,
-            )
+            # Create input params directly from input object attributes
+            input_params = {**vars(input), "model": MODEL_NAME}
+            filtered_params = self._filter_api_params(input_params, ALLOWED_CHATCOMPLETION_ARGS)
+            logger.debug(f"Filtered chat completion parameters:\n{pformat(filtered_params, indent=2)}")
+            chat_completion = await self.client.chat.completions.create(**filtered_params)
             """TODO need validate following parameters for vllm
                 logit_bias=input.logit_bias,
                 logprobs=input.logprobs,
@@ -226,22 +224,10 @@ async def invoke(self, input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
                 parallel_tool_calls=input.parallel_tool_calls,"""
         else:
             prompt, input = self.align_input(input, prompt_template, input_variables)
-            chat_completion = await self.client.completions.create(
-                model=MODEL_NAME,
-                prompt=prompt,
-                echo=input.echo,
-                frequency_penalty=input.frequency_penalty,
-                max_tokens=input.max_tokens,
-                n=input.n,
-                presence_penalty=input.presence_penalty,
-                seed=input.seed,
-                stop=input.stop,
-                stream=input.stream,
-                suffix=input.suffix,
-                temperature=input.temperature,
-                top_p=input.top_p,
-                user=input.user,
-            )
+            input_params = {**vars(input), "model": MODEL_NAME, "prompt": prompt}
+            filtered_params = self._filter_api_params(input_params, ALLOWED_COMPLETION_ARGS)
+            logger.debug(f"Filtered completion parameters:\n{pformat(filtered_params, indent=2)}")
+            chat_completion = await self.client.completions.create(**filtered_params)
             """TODO need validate following parameters for vllm
                 best_of=input.best_of,
                 logit_bias=input.logit_bias,
@@ -251,8 +237,7 @@ async def invoke(self, input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
 
             async def stream_generator():
                 async for c in chat_completion:
-                    if logflag:
-                        logger.info(c)
+                    logger.debug(c)
                     chunk = c.model_dump_json()
                     if chunk not in ["<|im_end|>", "<|endoftext|>"]:
                         yield f"data: {chunk}\n\n"
@@ -260,6 +245,21 @@ async def stream_generator():
 
             return StreamingResponse(stream_generator(), media_type="text/event-stream")
         else:
-            if logflag:
-                logger.info(chat_completion)
+            logger.debug(chat_completion)
             return chat_completion
+
+    def _filter_api_params(self, input_params: dict, allowed_args: tuple) -> dict:
+        """Filters input parameters to only include allowed non-None arguments.
+
+        Only allow allowed args, and and filter non-None default arguments because
+        some open AI-like APIs e.g. OpenRouter.ai will disallow None parameters.
+        Works for both chat completion and regular completion API calls.
+
+        Args:
+            input_params: Dictionary of input parameters
+            allowed_args: Tuple of allowed argument names
+
+        Returns:
+            Filtered dictionary containing only allowed non-None arguments
+        """
+        return {arg: input_params[arg] for arg in allowed_args if arg in input_params and input_params[arg] is not None}
diff --git a/tests/llms/test_llms_textgen_endpoint_openai.sh b/tests/llms/test_llms_textgen_endpoint_openai.sh
new file mode 100644
index 0000000000..8416211574
--- /dev/null
+++ b/tests/llms/test_llms_textgen_endpoint_openai.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# tests/llms/test_llms_textgen_remote_openai.sh
+# SPDX-License-Identifier: Apache-2.0
+
+# This script tests the textgen connection with an openai endpoint based on a vLLM openAI endpoint.
+# The vLLM server is started in a docker container, and the textgen service is started in another container.
+# The textgen service is configured to connect to the vLLM server using a test key.
+# The test sends a request to the textgen service and validates the response.
+
+set -e # Exit on error
+
+# --- Check for jq ---
+if ! command -v jq &> /dev/null
+then
+    echo "jq could not be found. Please install it (e.g., apt install jq)."
+    exit 1
+fi
+
+# --- Setup ---
+export WORKPATH=$(dirname "$PWD")
+export host_ip=$(hostname -I | awk '{print $1}')
+export http_proxy=""
+export LOG_PATH="$WORKPATH/tests"
+export VLLM_MODEL="Qwen/Qwen2.5-0.5B-Instruct"
+export LLM_ENDPOINT_PORT="8000"
+export OPENAI_API_KEY=testkey
+
+
+function build_vllm_image() {
+    # This image is used to test textgen-service-endpoint-openai
+    rm -rf $WORKPATH/vllm  # Remove existing vllm directory if it exists
+    cd $WORKPATH
+
+    # Pull the last tagged version of vLLM.
+    git clone https://github.com/vllm-project/vllm.git && cd vllm
+    VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
+    echo "Checked out vLLM tag ${VLLM_VER}"
+    git checkout ${VLLM_VER} &> /dev/null
+
+    docker build --no-cache -f docker/Dockerfile.cpu -t opea/vllm-cpu:test .
+    cd $WORKPATH
+}
+
+function start_vllm() {
+    export HF_TOKEN=${HF_TOKEN} # Remember to set HF_TOKEN before invoking this test!
+    export VLLM_API_KEY=${OPENAI_API_KEY} # This is the VLLM environment variable to set keys.
+    export host_ip=$(hostname -I | awk '{print $1}')
+
+    # Block size must be 16 for CPU backend unless Intel Extension for PyTorch (IPEX) is installed
+    BLOCK_SIZE=16   # If IPEX is installed can use 128.
+
+    # Environment variables for vLLM CPU configuration:
+    # - VLLM_USE_CPU=1: Explicitly force CPU backend usage
+    # - VLLM_CPU_OMP_THREADS_BIND=all: Configure OpenMP to use all available CPU threads
+    # - VLLM_CPU_KVCACHE_SPACE=4: Allocate 4GB of CPU memory for KV cache
+    # - VLLM_MLA_DISABLE=1: Disable MLA (Multi-head Linear Attention) optimizations which aren't supported on CPU
+    docker run --rm -d \
+        -p ${LLM_ENDPOINT_PORT}:8000 \
+        -e VLLM_API_KEY=${OPENAI_API_KEY} \
+        -e VLLM_USE_CPU=1 \
+        -e VLLM_CPU_OMP_THREADS_BIND=all \
+        -e VLLM_CPU_KVCACHE_SPACE=4 \
+        -e VLLM_MLA_DISABLE=1 \
+        --name vllm-server \
+        --network bridge \
+        opea/vllm-cpu:test \
+        --model ${VLLM_MODEL} \
+        --port 8000 \
+        --host 0.0.0.0 \
+        --block-size ${BLOCK_SIZE}
+
+    echo "Waiting for vLLM server to initialize..."
+    # Wait for server to be ready by checking logs
+    while ! docker logs vllm-server 2>&1 | grep -q "Application startup complete"; do
+        echo "  Still waiting for vLLM server..."
+        sleep 10
+    done
+    echo "vLLM server is ready!"
+
+    # Verify server is responding
+    curl -v http://localhost:${LLM_ENDPOINT_PORT}/health 2>&1 || echo "Warning: vLLM health check failed"
+}
+
+function start_textgen() {
+    # Testing if the textgen can connect to a vllm endpoint, with a testkey.
+    export OPENAI_API_KEY=testkey
+    # Use host.docker.internal to access the host machine from within the container
+    export LLM_ENDPOINT="http://${host_ip}:8000"
+    export LLM_MODEL_ID=$VLLM_MODEL  # Must match vLLM
+    export service_name="textgen-service-endpoint-openai"
+    export LOGFLAG=True
+
+    echo "Starting textgen service connecting to vllm at: ${LLM_ENDPOINT}"
+
+    # textgen-service-endpoint-openai extends the textgen service. This test uses the image: opea/llm-textgen:latest
+    cd $WORKPATH/comps/llms/deployment/docker_compose
+    docker compose -f compose_text-generation.yaml up ${service_name} -d
+
+    echo "Waiting for textgen-service-endpoint-openai to initialize..."
+    while ! docker logs textgen-service-endpoint-openai 2>&1 | grep -q "Application startup complete"; do
+        echo "  Still waiting for textgen-service-endpoint-openai server..."
+        sleep 5
+    done
+    echo "textgen-service-endpoint-openai is ready!"
+
+    curl http://localhost:9000/health 2>&1 || echo "Warning: textgen health check failed"
+}
+
+function validate_chat_completions() {
+    echo "Validating chat completions endpoint"
+    local response=$(curl -s -X POST http://${host_ip}:9000/v1/chat/completions \
+        -H "Content-Type: application/json" \
+        -H "Authorization: Bearer testkey" \
+        -d '{
+            "model": "'${VLLM_MODEL}'",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "Hello, world!"
+                }
+            ]
+        }')
+
+    echo "Raw chat response: $response"
+
+    local status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${host_ip}:9000/v1/chat/completions \
+        -H "Content-Type: application/json" \
+        -H "Authorization: Bearer testkey" \
+        -d '{
+            "model": "'${VLLM_MODEL}'",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "Hello, world!"
+                }
+            ]
+        }')
+
+    if [[ "$status_code" -ne 200 ]]; then
+        echo "Error: Chat completions HTTP status code is not 200. Received: $status_code"
+        docker logs textgen-service-endpoint-openai || true
+        docker logs vllm-server || true
+        exit 1
+    fi
+
+    local generated_text=$(echo "$response" | jq -r '.choices[0].message.content')
+
+    if [[ -z "$generated_text" ]]; then
+        echo "Error: No generated text found in chat response."
+        docker logs textgen-service-endpoint-openai || true
+        docker logs vllm-server || true
+        exit 1
+    fi
+
+    echo "Chat completions test passed. Generated text: $generated_text"
+}
+
+
+function stop_containers() {
+    docker compose -f compose_text-generation.yaml down
+    docker stop vllm-server || true # the --rm flag will ensure it is removed
+}
+
+
+# Assumes containers from other test runs are already cleared.
+build_vllm_image
+start_vllm
+start_textgen
+validate_chat_completions
+stop_containers
+docker system prune -a -f
+
+echo "All tests completed."