Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_build-image-to-registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ jobs:
- name: Build Image and Push Image
run: |
sudo apt install ansible -y
ansible-playbook build-image-to-registry.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}"
ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}"
working-directory: ${{ github.workspace }}/setup-scripts/build-image-to-registry/
7 changes: 0 additions & 7 deletions .github/workflows/_e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,9 @@ jobs:
ref: ${{ env.CHECKOUT_REF }}
fetch-depth: 0

- name: Update Manifest
run: |
find . -type f -name 'studio-manifest.yaml' -exec sed -i 's/value: opea/value: ${REGISTRY}/g' {} \;
working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/manifests/

- name: Deploy GenAI Studio
run: |

sudo apt install ansible -y
sed -i 's/value: "${TAG}"/value: latest/' manifests/studio-manifest.yaml
ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local"
sleep 5
kubectl wait --for=condition=ready pod --all --namespace=studio --timeout=300s --field-selector=status.phase!=Succeeded
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ The downloaded zip file includes the necessary configurations for deploying the
3. Access the application by opening your web browser and go to:

```bash
http://<public_host_ip>:8080
http://<public_host_ip>:8090
```


Expand Down
4 changes: 2 additions & 2 deletions app-backend/megaservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def align_inputs(self, inputs, *args, **kwargs):
elif self.services[node_id].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = inputs.get("model") or "Intel/neural-chat-7b-v3-3"
next_inputs["model"] = inputs.get("model") or "NA"
if inputs.get("inputs"):
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
elif inputs.get("query") and inputs.get("documents"):
Expand Down Expand Up @@ -401,7 +401,7 @@ def start(self):

if __name__ == "__main__":
print('pre initialize appService')
app = AppService(host="0.0.0.0", port=8888)
app = AppService(host="0.0.0.0", port=8899)
print('after initialize appService')
app.add_remote_service()
print('after add_remote_service')
Expand Down
6 changes: 4 additions & 2 deletions app-backend/opea_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

logger = CustomLogger("OpeaComponent")

# studio update
def get_k8s_namespace():
try:
with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as f:
Expand All @@ -41,6 +42,7 @@ def detach_ignore_err(self, token: object) -> None:
# bypass the ValueError that ContextVar context was created in a different Context from StreamingResponse
ContextVarsRuntimeContext.detach = detach_ignore_err

# studio update
namespace_name = get_k8s_namespace()
resource = Resource.create({
SERVICE_NAME: "opea",
Expand All @@ -67,15 +69,15 @@ def opea_telemetry(func):

@wraps(func)
async def wrapper(*args, **kwargs):
with tracer.start_as_current_span(func.__name__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
with tracer.start_as_current_span(func.__qualname__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
res = await func(*args, **kwargs)
return res

else:

@wraps(func)
def wrapper(*args, **kwargs):
with tracer.start_as_current_span(func.__name__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
with tracer.start_as_current_span(func.__qualname__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
res = func(*args, **kwargs)
return res

Expand Down
23 changes: 14 additions & 9 deletions app-backend/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
LOGFLAG = os.getenv("LOGFLAG", False)
ENABLE_OPEA_TELEMETRY = bool(os.environ.get("TELEMETRY_ENDPOINT"))


class OrchestratorMetrics:
def __init__(self) -> None:
# locking for latency metric creation / method change
Expand Down Expand Up @@ -134,7 +135,7 @@ async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMPa
if LOGFLAG:
logger.info(initial_inputs)

timeout = aiohttp.ClientTimeout(total=1000)
timeout = aiohttp.ClientTimeout(total=2000)
async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session:
pending = {
asyncio.create_task(
Expand Down Expand Up @@ -241,8 +242,7 @@ async def execute(
**kwargs,
):
# send the cur_node request/reply
endpoint = self.services[cur_node].endpoint_path
access_token = self.services[cur_node].api_key_value

llm_parameters_dict = llm_parameters.dict()

is_llm_vlm = self.services[cur_node].service_type in (ServiceType.LLM, ServiceType.LVM)
Expand All @@ -253,7 +253,11 @@ async def execute(
inputs[field] = value
# pre-process
inputs = self.align_inputs(inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs)

access_token = self.services[cur_node].api_key_value
if access_token:
endpoint = self.services[cur_node].endpoint_path(inputs["model"])
else:
endpoint = self.services[cur_node].endpoint_path(None)
if is_llm_vlm and llm_parameters.stream:
# Still leave to sync requests.post for StreamingResponse
if LOGFLAG:
Expand All @@ -270,7 +274,7 @@ async def execute(
headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"},
proxies={"http": None},
stream=True,
timeout=1000,
timeout=2000,
)
else:
response = requests.post(
Expand All @@ -281,15 +285,15 @@ async def execute(
},
proxies={"http": None},
stream=True,
timeout=1000,
timeout=2000,
)

downstream = runtime_graph.downstream(cur_node)
if downstream:
assert len(downstream) == 1, "Not supported multiple stream downstreams yet!"
cur_node = downstream[0]
hitted_ends = [".", "?", "!", "。", ",", "!"]
downstream_endpoint = self.services[downstream[0]].endpoint_path
downstream_endpoint = self.services[downstream[0]].endpoint_path()

def generate():
token_start = req_start
Expand All @@ -313,6 +317,7 @@ def generate():
"Authorization": f"Bearer {access_token}",
},
proxies={"http": None},
timeout=2000,
)
else:
res = requests.post(
Expand All @@ -322,6 +327,7 @@ def generate():
"Content-type": "application/json",
},
proxies={"http": None},
timeout=2000,
)
res_json = res.json()
if "text" in res_json:
Expand Down Expand Up @@ -367,7 +373,6 @@ def generate():
span.set_attribute("llm.input", str(input_data))
span.set_attribute("llm.output", await response.text())


if response.content_type == "audio/wav":
audio_data = await response.read()
data = self.align_outputs(audio_data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs)
Expand Down Expand Up @@ -419,4 +424,4 @@ def token_generator(self, sentence: str, token_start: float, is_first: bool, is_
yield prefix + repr(token.replace("\\n", "\n").encode("utf-8")) + suffix
is_first = False
if is_last:
yield "data: [DONE]\n\n"
yield "data: [DONE]\n\n"
2 changes: 1 addition & 1 deletion app-frontend/react/.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VITE_CHAT_SERVICE_URL=http://backend_address:8888/v1/chatqna
VITE_CHAT_SERVICE_URL=http://backend_address:8899/v1/chatqna
VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ const Conversation = ({ title, enabledUiFeatures }: ConversationProps) => {
messages,
maxTokens: tokenLimit,
temperature: temperature,
model: "Intel/neural-chat-7b-v3-3",
model: "",
// setIsInThinkMode
});
setPrompt("");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
- name: Clone or update GenAIComps repo and build/push images
hosts: localhost
vars_files:
- vars.yml
tasks:
- name: Check if /tmp/GenAIComps exists
stat:
path: /tmp/GenAIComps
register: genaicomp_dir

- name: Clone GenAIComps repo if not present
git:
repo: https://github.com/opea-project/GenAIComps.git
dest: /tmp/GenAIComps
clone: yes
update: no
when: not genaicomp_dir.stat.exists

- name: Pull latest changes in GenAIComps repo
git:
repo: https://github.com/opea-project/GenAIComps.git
dest: /tmp/GenAIComps
update: yes
when: genaicomp_dir.stat.exists

- name: Build and push GenAIComps images
vars:
genaicomp_images:
- { name: 'embedding', dockerfile: 'comps/embeddings/src/Dockerfile' }
- { name: 'reranking', dockerfile: 'comps/rerankings/src/Dockerfile' }
- { name: 'retriever', dockerfile: 'comps/retrievers/src/Dockerfile' }
- { name: 'llm-textgen', dockerfile: 'comps/llms/src/text-generation/Dockerfile' }
- { name: 'dataprep', dockerfile: 'comps/dataprep/src/Dockerfile' }
- { name: 'agent', dockerfile: 'comps/agent/src/Dockerfile' }
block:
- name: Build image
command: docker build -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} -f {{ item.dockerfile }} .
args:
chdir: /tmp/GenAIComps
loop: "{{ genaicomp_images }}"

- name: Push image
command: docker push {{ container_registry }}/{{ item.name }}:{{ container_tag }}
loop: "{{ genaicomp_images }}"
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- vars.yml
tasks:
- name: Build Docker image
command: docker build -t "{{ item.image_name }}:latest" .
command: docker build -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" .
args:
chdir: "{{ item.directory }}"
loop:
Expand All @@ -15,14 +15,6 @@
- { directory: '../../app-backend/', image_name: 'app-backend' }
register: build_results

- name: Tag Docker image
command: docker tag "{{ item.image_name }}:latest" "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}"
loop:
- { image_name: 'studio-frontend' }
- { image_name: 'studio-backend' }
- { image_name: 'app-frontend' }
- { image_name: 'app-backend' }

- name: Push Docker image
command: docker push "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}"
loop:
Expand Down
8 changes: 7 additions & 1 deletion setup-scripts/build-image-to-registry/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,11 @@ The ansible scripts used here are building, tag and push to the specified contai
Run below commands:
```sh
sudo apt install ansible -y
ansible-playbook build-image-to-registry.yml
ansible-playbook buildpush-genaistudio-images.yml
```

If would like to build GenAIComps images to use
```sh
sudo apt install ansible -y
ansible-playbook buildpush-genaicomps-images.yml
```
35 changes: 30 additions & 5 deletions setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,18 @@ rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["list", "get"]
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get"]
- apiGroups: [""]
resources: ["events"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "get"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "create", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "create", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down Expand Up @@ -328,7 +331,7 @@ spec:
- name: REGISTRY
value: ${REGISTRY}
- name: TAG
value: "${TAG}"
value: ${TAG}
- name: SBX_HTTP_PROXY
value: ${HTTP_PROXY}
- name: SBX_NO_PROXY
Expand All @@ -353,6 +356,18 @@ spec:
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
volumeMounts:
- name: ssh-key-volume
mountPath: /root/.ssh
readOnly: true
volumes:
- name: ssh-key-volume
secret:
secretName: ssh-keys
items:
- key: studio-id_rsa
path: id_rsa
mode: 0400
serviceAccountName: studio-backend-sa
---
apiVersion: v1
Expand Down Expand Up @@ -407,7 +422,7 @@ spec:
- name: DATABASE_NAME
value: studio
- name: DATABASE_SSL
value: "true"
value: "false"
ports:
- name: studio-frontend
containerPort: 8080
Expand All @@ -416,9 +431,19 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp
- name: ssh-key-volume
mountPath: /root/.ssh
readOnly: true
volumes:
- name: tmp
emptyDir: {}
- name: ssh-key-volume
secret:
secretName: ssh-keys
items:
- key: studio-id_rsa.pub
path: id_rsa.pub
mode: 0644
---
apiVersion: apps/v1
kind: Deployment
Expand Down
Loading
Loading