Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_build-image-to-registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ jobs:
- name: Build Image and Push Image
run: |
sudo apt install ansible -y
ansible-playbook build-image-to-registry.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}"
ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}"
working-directory: ${{ github.workspace }}/setup-scripts/build-image-to-registry/
7 changes: 0 additions & 7 deletions .github/workflows/_e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,9 @@ jobs:
ref: ${{ env.CHECKOUT_REF }}
fetch-depth: 0

- name: Update Manifest
run: |
find . -type f -name 'studio-manifest.yaml' -exec sed -i 's/value: opea/value: ${REGISTRY}/g' {} \;
working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/manifests/

- name: Deploy GenAI Studio
run: |

sudo apt install ansible -y
sed -i 's/value: "${TAG}"/value: latest/' manifests/studio-manifest.yaml
ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local"
sleep 5
kubectl wait --for=condition=ready pod --all --namespace=studio --timeout=300s --field-selector=status.phase!=Succeeded
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ The downloaded zip file includes the necessary configurations for deploying the
3. Access the application by opening your web browser and go to:

```bash
http://<public_host_ip>:8080
http://<public_host_ip>:8090
```


Expand Down
4 changes: 2 additions & 2 deletions app-backend/megaservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def align_inputs(self, inputs, *args, **kwargs):
elif self.services[node_id].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = inputs.get("model") or "Intel/neural-chat-7b-v3-3"
next_inputs["model"] = inputs.get("model") or "NA"
if inputs.get("inputs"):
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
elif inputs.get("query") and inputs.get("documents"):
Expand Down Expand Up @@ -401,7 +401,7 @@ def start(self):

if __name__ == "__main__":
print('pre initialize appService')
app = AppService(host="0.0.0.0", port=8888)
app = AppService(host="0.0.0.0", port=8899)
print('after initialize appService')
app.add_remote_service()
print('after add_remote_service')
Expand Down
6 changes: 4 additions & 2 deletions app-backend/opea_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

logger = CustomLogger("OpeaComponent")

# studio update
def get_k8s_namespace():
try:
with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as f:
Expand All @@ -41,6 +42,7 @@ def detach_ignore_err(self, token: object) -> None:
# bypass the ValueError that ContextVar context was created in a different Context from StreamingResponse
ContextVarsRuntimeContext.detach = detach_ignore_err

# studio update
namespace_name = get_k8s_namespace()
resource = Resource.create({
SERVICE_NAME: "opea",
Expand All @@ -67,15 +69,15 @@ def opea_telemetry(func):

@wraps(func)
async def wrapper(*args, **kwargs):
with tracer.start_as_current_span(func.__name__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
with tracer.start_as_current_span(func.__qualname__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
res = await func(*args, **kwargs)
return res

else:

@wraps(func)
def wrapper(*args, **kwargs):
with tracer.start_as_current_span(func.__name__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
with tracer.start_as_current_span(func.__qualname__) if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
res = func(*args, **kwargs)
return res

Expand Down
23 changes: 14 additions & 9 deletions app-backend/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
LOGFLAG = os.getenv("LOGFLAG", False)
ENABLE_OPEA_TELEMETRY = bool(os.environ.get("TELEMETRY_ENDPOINT"))


class OrchestratorMetrics:
def __init__(self) -> None:
# locking for latency metric creation / method change
Expand Down Expand Up @@ -134,7 +135,7 @@ async def schedule(self, initial_inputs: Dict | BaseModel, llm_parameters: LLMPa
if LOGFLAG:
logger.info(initial_inputs)

timeout = aiohttp.ClientTimeout(total=1000)
timeout = aiohttp.ClientTimeout(total=2000)
async with aiohttp.ClientSession(trust_env=True, timeout=timeout) as session:
pending = {
asyncio.create_task(
Expand Down Expand Up @@ -241,8 +242,7 @@ async def execute(
**kwargs,
):
# send the cur_node request/reply
endpoint = self.services[cur_node].endpoint_path
access_token = self.services[cur_node].api_key_value

llm_parameters_dict = llm_parameters.dict()

is_llm_vlm = self.services[cur_node].service_type in (ServiceType.LLM, ServiceType.LVM)
Expand All @@ -253,7 +253,11 @@ async def execute(
inputs[field] = value
# pre-process
inputs = self.align_inputs(inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs)

access_token = self.services[cur_node].api_key_value
if access_token:
endpoint = self.services[cur_node].endpoint_path(inputs["model"])
else:
endpoint = self.services[cur_node].endpoint_path(None)
if is_llm_vlm and llm_parameters.stream:
# Still leave to sync requests.post for StreamingResponse
if LOGFLAG:
Expand All @@ -270,7 +274,7 @@ async def execute(
headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"},
proxies={"http": None},
stream=True,
timeout=1000,
timeout=2000,
)
else:
response = requests.post(
Expand All @@ -281,15 +285,15 @@ async def execute(
},
proxies={"http": None},
stream=True,
timeout=1000,
timeout=2000,
)

downstream = runtime_graph.downstream(cur_node)
if downstream:
assert len(downstream) == 1, "Not supported multiple stream downstreams yet!"
cur_node = downstream[0]
hitted_ends = [".", "?", "!", "。", ",", "!"]
downstream_endpoint = self.services[downstream[0]].endpoint_path
downstream_endpoint = self.services[downstream[0]].endpoint_path()

def generate():
token_start = req_start
Expand All @@ -313,6 +317,7 @@ def generate():
"Authorization": f"Bearer {access_token}",
},
proxies={"http": None},
timeout=2000,
)
else:
res = requests.post(
Expand All @@ -322,6 +327,7 @@ def generate():
"Content-type": "application/json",
},
proxies={"http": None},
timeout=2000,
)
res_json = res.json()
if "text" in res_json:
Expand Down Expand Up @@ -367,7 +373,6 @@ def generate():
span.set_attribute("llm.input", str(input_data))
span.set_attribute("llm.output", await response.text())


if response.content_type == "audio/wav":
audio_data = await response.read()
data = self.align_outputs(audio_data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs)
Expand Down Expand Up @@ -419,4 +424,4 @@ def token_generator(self, sentence: str, token_start: float, is_first: bool, is_
yield prefix + repr(token.replace("\\n", "\n").encode("utf-8")) + suffix
is_first = False
if is_last:
yield "data: [DONE]\n\n"
yield "data: [DONE]\n\n"
2 changes: 1 addition & 1 deletion app-frontend/react/.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VITE_CHAT_SERVICE_URL=http://backend_address:8888/v1/chatqna
VITE_CHAT_SERVICE_URL=http://backend_address:8899/v1/chatqna
VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ const Conversation = ({ title, enabledUiFeatures }: ConversationProps) => {
messages,
maxTokens: tokenLimit,
temperature: temperature,
model: "Intel/neural-chat-7b-v3-3",
model: "",
// setIsInThinkMode
});
setPrompt("");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
- name: Clone or update GenAIComps repo and build/push images
hosts: localhost
vars_files:
- vars.yml
tasks:
- name: Check if /tmp/GenAIComps exists
stat:
path: /tmp/GenAIComps
register: genaicomp_dir

- name: Clone GenAIComps repo if not present
git:
repo: https://github.com/opea-project/GenAIComps.git
dest: /tmp/GenAIComps
clone: yes
update: no
when: not genaicomp_dir.stat.exists

- name: Pull latest changes in GenAIComps repo
git:
repo: https://github.com/opea-project/GenAIComps.git
dest: /tmp/GenAIComps
update: yes
when: genaicomp_dir.stat.exists

- name: Build and push GenAIComps images
vars:
genaicomp_images:
- { name: 'embedding', dockerfile: 'comps/embeddings/src/Dockerfile' }
- { name: 'reranking', dockerfile: 'comps/rerankings/src/Dockerfile' }
- { name: 'retriever', dockerfile: 'comps/retrievers/src/Dockerfile' }
- { name: 'llm-textgen', dockerfile: 'comps/llms/src/text-generation/Dockerfile' }
- { name: 'dataprep', dockerfile: 'comps/dataprep/src/Dockerfile' }
- { name: 'agent', dockerfile: 'comps/agent/src/Dockerfile' }
block:
- name: Build image
command: docker build -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} -f {{ item.dockerfile }} .
args:
chdir: /tmp/GenAIComps
loop: "{{ genaicomp_images }}"

- name: Push image
command: docker push {{ container_registry }}/{{ item.name }}:{{ container_tag }}
loop: "{{ genaicomp_images }}"
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- vars.yml
tasks:
- name: Build Docker image
command: docker build -t "{{ item.image_name }}:latest" .
command: docker build -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" .
args:
chdir: "{{ item.directory }}"
loop:
Expand All @@ -15,14 +15,6 @@
- { directory: '../../app-backend/', image_name: 'app-backend' }
register: build_results

- name: Tag Docker image
command: docker tag "{{ item.image_name }}:latest" "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}"
loop:
- { image_name: 'studio-frontend' }
- { image_name: 'studio-backend' }
- { image_name: 'app-frontend' }
- { image_name: 'app-backend' }

- name: Push Docker image
command: docker push "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}"
loop:
Expand Down
8 changes: 7 additions & 1 deletion setup-scripts/build-image-to-registry/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,11 @@ The ansible scripts used here are building, tag and push to the specified contai
Run below commands:
```sh
sudo apt install ansible -y
ansible-playbook build-image-to-registry.yml
ansible-playbook buildpush-genaistudio-images.yml
```

If would like to build GenAIComps images to use
```sh
sudo apt install ansible -y
ansible-playbook buildpush-genaicomps-images.yml
```
35 changes: 30 additions & 5 deletions setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,18 @@ rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["list", "get"]
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get"]
- apiGroups: [""]
resources: ["events"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "get"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "create", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "create", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down Expand Up @@ -328,7 +331,7 @@ spec:
- name: REGISTRY
value: ${REGISTRY}
- name: TAG
value: "${TAG}"
value: ${TAG}
- name: SBX_HTTP_PROXY
value: ${HTTP_PROXY}
- name: SBX_NO_PROXY
Expand All @@ -353,6 +356,18 @@ spec:
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
volumeMounts:
- name: ssh-key-volume
mountPath: /root/.ssh
readOnly: true
volumes:
- name: ssh-key-volume
secret:
secretName: ssh-keys
items:
- key: studio-id_rsa
path: id_rsa
mode: 0400
serviceAccountName: studio-backend-sa
---
apiVersion: v1
Expand Down Expand Up @@ -407,7 +422,7 @@ spec:
- name: DATABASE_NAME
value: studio
- name: DATABASE_SSL
value: "true"
value: "false"
ports:
- name: studio-frontend
containerPort: 8080
Expand All @@ -416,9 +431,19 @@ spec:
volumeMounts:
- mountPath: /tmp
name: tmp
- name: ssh-key-volume
mountPath: /root/.ssh
readOnly: true
volumes:
- name: tmp
emptyDir: {}
- name: ssh-key-volume
secret:
secretName: ssh-keys
items:
- key: studio-id_rsa.pub
path: id_rsa.pub
mode: 0644
---
apiVersion: apps/v1
kind: Deployment
Expand Down
Loading
Loading