Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SCP] Open port support #4490

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b38f3d9
[SCP] open port support
hyoxt121 Nov 23, 2024
4ccdbd4
[SCP] open port support (#1)
hyoxt121 Nov 23, 2024
41538a8
[SCP] open port support
hyoxt121 Nov 23, 2024
8c407f4
Merge branch 'skypilot-org:master' into master
hyoxt121 Nov 23, 2024
97d6f36
Merge branch 'skypilot-org:master' into master
hyoxt121 Nov 24, 2024
2c711a0
[SCP] open port support
hyoxt121 Nov 25, 2024
33b7755
[SC
hyoxt121 Nov 25, 2024
9850699
[SCP] open port support
hyoxt121 Nov 25, 2024
7ac4344
[SCP] open port support
hyoxt121 Nov 25, 2024
de20948
[SCP] open port support
hyoxt121 Nov 25, 2024
61ef98a
[SCP] open port support
hyoxt121 Nov 25, 2024
1ad8900
[SCP] open port support
hyoxt121 Nov 25, 2024
218a23e
[SCP] open port support
hyoxt121 Nov 25, 2024
0708b70
[SCP] open port support
hyoxt121 Nov 25, 2024
cea4988
[SCP] open port support
hyoxt121 Nov 25, 2024
d08164d
Merge branch 'skypilot-org:master' into master
hyoxt121 Nov 25, 2024
0f1922c
Merge branch 'skypilot-org:master' into master
hyoxt121 Nov 29, 2024
ecb3491
Merge branch 'skypilot-org:master' into master
hyoxt121 Dec 5, 2024
e3f3494
SCP firewall fix (#2)
hyoxt121 Dec 5, 2024
8c520f9
Merge branch 'skypilot-org:master' into master
hyoxt121 Dec 18, 2024
7adcad5
Merge branch 'skypilot-org:master' into master
hyoxt121 Dec 19, 2024
4f819de
Create VPC (#3)
hyoxt121 Dec 19, 2024
3d4c536
Merge branch 'skypilot-org:master' into master
hyoxt121 Dec 20, 2024
1881970
[SCP] open port support
hyoxt121 Dec 20, 2024
a39d548
[SCP] open port support
hyoxt121 Dec 20, 2024
7e0856d
[SCP] open port support
hyoxt121 Dec 20, 2024
7d3518b
[SCP] open port support
hyoxt121 Dec 20, 2024
91cf2b4
[SCP] open port support
hyoxt121 Dec 20, 2024
46e68fb
[SCP] open port support
hyoxt121 Dec 20, 2024
d1c1678
[SCP] open port support
hyoxt121 Dec 20, 2024
5cb5e11
Merge branch 'master' into master
hyoxt121 Dec 25, 2024
90c6fde
Merge branch 'skypilot-org:master' into master
hyoxt121 Jan 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions sky/clouds/scp.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ class SCP(clouds.Cloud):
(f'Spot instances are not supported in {_REPR}.'),
clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
(f'Custom disk tiers are not supported in {_REPR}.'),
clouds.CloudImplementationFeatures.OPEN_PORTS:
(f'Opening ports is currently not supported on {_REPR}.'),
}

_INDENT_PREFIX = ' '
Expand Down Expand Up @@ -236,7 +234,7 @@ def _get_default_ami(cls, region_name: str, instance_type: str) -> str:
if acc is not None:
assert len(acc) == 1, acc
image_id = service_catalog.get_image_id_from_tag(
'skypilot:gpu-ubuntu-1804', region_name, clouds='scp')
'skypilot:gpu-ubuntu-2204', region_name, clouds='scp')
if image_id is not None:
return image_id
# Raise ResourcesUnavailableError to make sure the failover in
Expand Down
211 changes: 208 additions & 3 deletions sky/clouds/utils/scp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def del_security_group(self, sg_id):
url = f'{API_ENDPOINT}/security-group/v2/security-groups/{sg_id}'
return self._delete(url)

def del_firwall_rules(self, firewall_id, rule_id_list):
def del_firewall_rules(self, firewall_id, rule_id_list):
url = f'{API_ENDPOINT}/firewall/v2/firewalls/{firewall_id}/rules'
request_body = {'ruleDeletionType': 'PARTIAL', 'ruleIds': rule_id_list}
return self._delete(url, request_body=request_body)
Expand All @@ -422,11 +422,11 @@ def get_vm_info(self, vm_id):
url = f'{API_ENDPOINT}/virtual-server/v3/virtual-servers/{vm_id}'
return self._get(url, contents_key=None)

def get_firewal_rule_info(self, firewall_id, rule_id):
def get_firewall_rule_info(self, firewall_id, rule_id):
url = f'{API_ENDPOINT}/firewall/v2/firewalls/{firewall_id}/rules/{rule_id}' # pylint: disable=line-too-long
return self._get(url, contents_key=None)

def list_firwalls(self):
def list_firewalls(self):
url = f'{API_ENDPOINT}/firewall/v2/firewalls'
return self._get(url)

Expand All @@ -442,3 +442,208 @@ def start_instance(self, vm_id):
def stop_instance(self, vm_id):
url = f'{API_ENDPOINT}/virtual-server/v2/virtual-servers/{vm_id}/stop'
return self._post(url=url, request_body={})

def list_security_group_rules(self, sg_id):
url = f'{API_ENDPOINT}/security-group/v2/security-groups/{sg_id}/rules'
return self._get(url)

def _check_existing_security_group_in_rule(self, sg_id, port):
response = self.list_security_group_rules(sg_id)
rules = []
for rule in response:
rule_direction = rule['ruleDirection']
if rule_direction == 'IN':
rules.append(rule)
for rule in rules:
port_list = rule['tcpServices']
if port in port_list:
return False
return True

def _check_existing_security_group_out_rule(self, sg_id, port):
response = self.list_security_group_rules(sg_id)
rules = []
for rule in response:
rule_direction = rule['ruleDirection']
if rule_direction == 'OUT':
rules.append(rule)
for rule in rules:
port_list = rule['tcpServices']
if port in port_list:
return False
return True

def add_new_security_group_in_rule(self, sg_id, port):
if self._check_existing_security_group_in_rule(sg_id, port):
url = f'{API_ENDPOINT}/security-group/v2/security-groups/{sg_id}/rules' # pylint: disable=line-too-long
request_body = {
'ruleDirection': 'IN',
'services': [{
'serviceType': 'TCP',
'serviceValue': port
}],
'sourceIpAddresses': ['0.0.0.0/0'],
'ruleDescription': 'skyserve rule'
}
return self._post(url, request_body)

def add_new_security_group_out_rule(self, sg_id, port):
if self._check_existing_security_group_out_rule(sg_id, port):
url = f'{API_ENDPOINT}/security-group/v2/security-groups/{sg_id}/rules' # pylint: disable=line-too-long
request_body = {
'ruleDirection': 'OUT',
'services': [{
'serviceType': 'TCP',
'serviceValue': port
}],
'destinationIpAddresses': ['0.0.0.0/0'],
'ruleDescription': 'skyserve rule'
}
return self._post(url, request_body)

def list_firewall_rules(self, firewall_id):
url = f'{API_ENDPOINT}/firewall/v2/firewalls/{firewall_id}/rules'
return self._get(url)

def _check_existing_firewall_in_rule(self, firewall_id, internal_ip, port):
response = self.list_firewall_rules(firewall_id)
rule_list = []
for rule in response:
rule_direction = rule['ruleDirection']
if rule_direction == 'IN' and internal_ip == rule[
'destinationIpAddresses'][0]:
rule_list.append(rule)
for rule in rule_list:
port_list = rule['tcpServices']
if port in port_list:
return False
return True

def _check_existing_firewall_out_rule(self, firewall_id, internal_ip, port):
response = self.list_firewall_rules(firewall_id)
rule_list = []
for rule in response:
rule_direction = rule['ruleDirection']
if rule_direction == 'OUT' and internal_ip == rule[
'sourceIpAddresses'][0]:
rule_list.append(rule)
for rule in rule_list:
port_list = rule['tcpServices']
if port in port_list:
return False
return True

def add_new_firewall_inbound_rule(self, firewall_id, internal_ip, port):
if self._check_existing_firewall_in_rule(firewall_id, internal_ip,
port):
url = f'{API_ENDPOINT}/firewall/v2/firewalls/{firewall_id}/rules'
request_body = {
'sourceIpAddresses': ['0.0.0.0/0'],
'destinationIpAddresses': [internal_ip],
'services': [{
'serviceType': 'TCP',
'serviceValue': port
}],
'ruleDirection': 'IN',
'ruleAction': 'ALLOW',
'isRuleEnabled': True,
'ruleLocationType': 'FIRST',
'ruleDescription': 'skyserve rule'
}
return self._post(url, request_body)

def add_new_firewall_outbound_rule(self, firewall_id, internal_ip, port):
if self._check_existing_firewall_out_rule(firewall_id, internal_ip,
port):
url = f'{API_ENDPOINT}/firewall/v2/firewalls/{firewall_id}/rules'
request_body = {
'sourceIpAddresses': [internal_ip],
'destinationIpAddresses': ['0.0.0.0/0'],
'services': [{
'serviceType': 'TCP',
'serviceValue': port
}],
'ruleDirection': 'OUT',
'ruleAction': 'ALLOW',
'isRuleEnabled': True,
'ruleLocationType': 'FIRST',
'ruleDescription': 'skyserve rule'
}
return self._post(url, request_body)

def wait_firewall_inbound_rule_complete(self, firewall_id, rule_id):
while True:
time.sleep(5)
rule_info = self.get_firewall_rule_info(firewall_id, rule_id)
if rule_info['ruleState'] == 'ACTIVE':
break
return

def wait_firewall_outbound_rule_complete(self, firewall_id, rule_id):
while True:
time.sleep(5)
rule_info = self.get_firewall_rule_info(firewall_id, rule_id)
if rule_info['ruleState'] == 'ACTIVE':
break
return

def get_virtual_server_info(self, vm_id):
url = f'{API_ENDPOINT}/virtual-server/v3/virtual-servers/{vm_id}'
return self._get(url=url, contents_key=None)

def create_vpc(self, zone_id):
vpc_name = 'skyvpc' + zone_id[5:10]
request_body = {
'serviceZoneId': zone_id,
'tags': [{
'tagKey': 'tagKey',
'tagValue': 'tagValue'
}],
'vpcName': vpc_name,
'vpcDescription': 'sky vpc'
}
url = f'{API_ENDPOINT}/vpc/v3/vpcs'
return self._post(url, request_body)

def create_subnet(self, vpc_id, zone_id):
subnet_name = 'skysubnet' + zone_id[5:10]
request_body = {
'subnetCidrBlock': '192.168.0.0/24',
'subnetName': subnet_name,
'subnetType': 'PUBLIC',
'tags': [{
'tagKey': 'tagKey',
'tagValue': 'tagValue'
}],
'vpcId': vpc_id,
'subnetDescription': 'sky subnet'
}
url = f'{API_ENDPOINT}/subnet/v2/subnets'
return self._post(url, request_body)

def create_internet_gateway(self, vpc_id):
request_body = {
'firewallEnabled': True,
'firewallLoggable': False,
'internetGatewayType': 'SHARED',
'tags': [{
'tagKey': 'tagKey',
'tagValue': 'tagValue'
}],
'vpcId': vpc_id,
'internetGatewayDescription': 'sky internet gateway'
}
url = f'{API_ENDPOINT}/internet-gateway/v4/internet-gateways'
return self._post(url, request_body)

def get_vpc_info(self, vpc_id):
url = f'{API_ENDPOINT}/vpc/v2/vpcs/{vpc_id}'
return self._get(url=url, contents_key=None)

def get_subnet_info(self, subnet_id):
url = f'{API_ENDPOINT}/subnet/v2/subnets/{subnet_id}'
return self._get(url=url, contents_key=None)

def get_internet_gateway_info(self, internet_gateway_id):
url = f'{API_ENDPOINT}/internet-gateway/v2/internet-gateways/{internet_gateway_id}' # pylint: disable=line-too-long
return self._get(url=url, contents_key=None)
1 change: 1 addition & 0 deletions sky/provision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from sky.provision import lambda_cloud
from sky.provision import oci
from sky.provision import runpod
from sky.provision import scp
from sky.provision import vsphere
from sky.utils import command_runner
from sky.utils import timeline
Expand Down
4 changes: 4 additions & 0 deletions sky/provision/scp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""SCP provisioner for SkyPilot."""

from sky.provision.scp.instance import cleanup_ports
from sky.provision.scp.instance import open_ports
74 changes: 74 additions & 0 deletions sky/provision/scp/instance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""SCP instance provisioning."""

import time
from typing import Any, Dict, List, Optional

from sky.clouds.utils import scp_utils


def open_ports( # pylint: disable=unused-argument,too-many-locals
cluster_name_on_cloud: str,
ports: List[str],
provider_config: Optional[Dict[str, Any]] = None,
) -> None:
"""See sky/provision/__init__.py"""
scp_client = scp_utils.SCPClient()
vm_list = scp_client.list_instances()

for vm in vm_list: # pylint: disable=too-many-nested-blocks
vm_info = scp_client.get_virtual_server_info(vm['virtualServerId'])
sg_id = vm_info['securityGroupIds'][0]['securityGroupId']
scp_client.add_new_security_group_in_rule(sg_id, ports[0])
scp_client.add_new_security_group_out_rule(sg_id, ports[0])

vpc_id = vm_info['vpcId']
firewall_list = scp_client.list_firewalls()
internal_ip = vm_info['ip']

for firewall in firewall_list:
if firewall['vpcId'] == vpc_id:
firewall_id = firewall['firewallId']

attempts = 0
max_attempts = 300
while attempts < max_attempts:
try:
rule_info = scp_client.add_new_firewall_inbound_rule(
firewall_id, internal_ip, ports[0])
if rule_info is not None:
rule_id = rule_info['resourceId']
scp_client.wait_firewall_inbound_rule_complete(
firewall_id, rule_id)
break
except Exception: # pylint: disable=broad-except
attempts += 1
time.sleep(10)
continue

attempts = 0
max_attempts = 300
while attempts < max_attempts:
try:
rule_info = scp_client.add_new_firewall_outbound_rule(
firewall_id, internal_ip, ports[0])
if rule_info is not None:
rule_id = rule_info['resourceId']
scp_client.wait_firewall_outbound_rule_complete(
firewall_id, rule_id)
break
except Exception: # pylint: disable=broad-except
attempts += 1
time.sleep(10)
continue


def cleanup_ports( # pylint: disable=unused-argument, pointless-string-statement
cluster_name_on_cloud: str,
ports: List[str],
provider_config: Optional[Dict[str, Any]] = None,
) -> None:
"""See sky/provision/__init__.py"""
"""cleanup_ports is implemented
in sky/skylet/providers/scp/node_provider.py$terminate_node
because it cannot be reached for SCP after terminate_node
"""
Loading
Loading